Files updated, added and removed in order to turn the ERASER branch into HEAD


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1086 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am
index 60553dd..96911ed 100644
--- a/coregrind/Makefile.am
+++ b/coregrind/Makefile.am
@@ -1,15 +1,17 @@
+
+
 SUBDIRS = demangle . docs tests
 
 CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \
-		-Winline -Wall -Wshadow -O -fomit-frame-pointer -g
+		-Winline -Wall -Wshadow -O -fomit-frame-pointer @PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
 
-LDFLAGS = -Wl,-z -Wl,initfirst
+#LDFLAGS = -Wl,-z -Wl,initfirst
 
 INCLUDES = -I$(srcdir)/demangle
 
-bin_SCRIPTS = valgrind cachegrind vg_annotate
+bin_SCRIPTS = valgrind vg_annotate
 
 SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
 
@@ -26,60 +28,103 @@
 	PATCHES_APPLIED ACKNOWLEDGEMENTS \
 	README_KDE3_FOLKS README_PACKAGERS \
 	README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \
-	valgrind.spec valgrind.spec.in
+	valgrind.spec valgrind.spec.in \
+	vg_profile.c \
+	vg_cachesim_I1.c vg_cachesim_D1.c vg_cachesim_L2.c vg_cachesim_gen.c
 
-val_PROGRAMS = valgrind.so valgrinq.so libpthread.so
+val_PROGRAMS = \
+	valgrind.so \
+	valgrinq.so \
+	libpthread.so \
+	vgskin_memcheck.so \
+	vgskin_cachesim.so \
+	vgskin_eraser.so \
+	vgskin_addrcheck.so \
+	vgskin_none.so \
+	vgskin_lackey.so \
+	vgskin_corecheck.so
 
-libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c
+libpthread_so_SOURCES = \
+	vg_libpthread.c \
+	vg_libpthread_unimp.c
+libpthread_so_DEPENDENCIES = $(srcdir)/vg_libpthread.vs
+libpthread_so_LDFLAGS	   = -Werror -fno-omit-frame-pointer -UVG_LIBDIR -shared -fpic -Wl,-version-script $(srcdir)/vg_libpthread.vs
 
 valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+valgrinq_so_LDFLAGS = -shared
 
 valgrind_so_SOURCES = \
 	vg_clientfuncs.c \
 	vg_scheduler.c \
-        vg_cachesim.c \
 	vg_clientmalloc.c \
-	vg_clientperms.c \
+	vg_default.c \
 	vg_demangle.c \
 	vg_dispatch.S \
 	vg_errcontext.c \
 	vg_execontext.c \
 	vg_from_ucode.c \
 	vg_helpers.S \
+	vg_instrument.c \
 	vg_main.c \
 	vg_malloc2.c \
 	vg_memory.c \
 	vg_messages.c \
 	vg_mylibc.c \
 	vg_procselfmaps.c \
-	vg_profile.c \
+	vg_dummy_profile.c \
 	vg_signals.c \
 	vg_startup.S \
 	vg_symtab2.c \
-	vg_syscall_mem.c \
+	vg_syscalls.c \
 	vg_syscall.S \
 	vg_to_ucode.c \
 	vg_translate.c \
-	vg_transtab.c \
-	vg_vtagops.c
-
+	vg_transtab.c
+valgrind_so_LDFLAGS = -Wl,-z -Wl,initfirst -shared
 valgrind_so_LDADD = \
 	demangle/cp-demangle.o \
 	demangle/cplus-dem.o \
 	demangle/dyn-string.o \
 	demangle/safe-ctype.o
 
+vgskin_memcheck_so_SOURCES = \
+	vg_memcheck.c \
+	vg_memcheck_clientreqs.c \
+	vg_memcheck_errcontext.c \
+	vg_memcheck_from_ucode.c \
+	vg_memcheck_translate.c \
+	vg_memcheck_helpers.S
+vgskin_memcheck_so_LDFLAGS = -shared
+
+vgskin_cachesim_so_SOURCES = vg_cachesim.c
+vgskin_cachesim_so_LDFLAGS = -shared
+
+vgskin_eraser_so_SOURCES = vg_eraser.c
+vgskin_eraser_so_LDFLAGS = -shared
+
+vgskin_addrcheck_so_SOURCES = vg_addrcheck.c
+vgskin_addrcheck_so_LDFLAGS = -shared
+
+vgskin_none_so_SOURCES 	 = vg_none.c
+vgskin_none_so_LDFLAGS   = -shared
+
+vgskin_lackey_so_SOURCES = vg_lackey.c
+vgskin_lackey_so_LDFLAGS = -shared
+
+vgskin_corecheck_so_SOURCES = vg_corecheck.c
+vgskin_corecheck_so_LDFLAGS = -shared
+
 include_HEADERS = valgrind.h
 
 noinst_HEADERS = \
-        vg_cachesim_gen.c       \
-        vg_cachesim_I1.c        \
-        vg_cachesim_D1.c        \
-        vg_cachesim_L2.c        \
         vg_kerneliface.h        \
         vg_include.h            \
+        vg_skin.h               \
         vg_constants.h          \
-        vg_unsafe.h
+        vg_constants_skin.h     \
+        vg_unsafe.h		\
+	vg_memcheck_include.h	\
+	vg_memcheck.h
 
 MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) 
 
@@ -92,19 +137,40 @@
 vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS)
 	$(COMPILE) -fno-omit-frame-pointer -c $<
 
-valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
-	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
-		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+##valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+##		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
 
-valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
-	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
+##valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
+##	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
 
-libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
-	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
-		$(libpthread_so_OBJECTS) \
-		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+##libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
+##	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
+##		$(libpthread_so_OBJECTS) \
+##		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+
+##vgskin_memcheck.so$(EXEEXT): $(vgskin_memcheck_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_memcheck.so \
+##		$(vgskin_memcheck_so_OBJECTS)
+
+##vgskin_cachesim.so$(EXEEXT): $(vgskin_cachesim_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_cachesim.so \
+##		$(vgskin_cachesim_so_OBJECTS)
+
+##vgskin_eraser.so$(EXEEXT): $(vgskin_eraser_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_eraser.so \
+##		$(vgskin_eraser_so_OBJECTS)
+
+##vgskin_none.so$(EXEEXT): $(vgskin_none_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_none.so \
+##		$(vgskin_none_so_OBJECTS)
+
+##vgskin_lackey.so$(EXEEXT): $(vgskin_lackey_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_lackey.so \
+##		$(vgskin_lackey_so_OBJECTS)
 
 install-exec-hook:
 	$(mkinstalldirs) $(DESTDIR)$(valdir)
 	rm -f $(DESTDIR)$(valdir)/libpthread.so.0
 	$(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0
+
diff --git a/coregrind/arch/x86-linux/vg_libpthread.c b/coregrind/arch/x86-linux/vg_libpthread.c
index 994cdb7..5972dfa 100644
--- a/coregrind/arch/x86-linux/vg_libpthread.c
+++ b/coregrind/arch/x86-linux/vg_libpthread.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 /* ALL THIS CODE RUNS ON THE SIMULATED CPU.
@@ -257,6 +257,12 @@
    return 0;
 }
 
+int pthread_attr_getdetachstate(const pthread_attr_t *attr, int *detachstate)
+{
+   *detachstate = attr->__detachstate;
+   return 0;
+}
+
 int pthread_attr_setinheritsched(pthread_attr_t *attr, int inherit)
 {
    static int moans = N_MOANS;
@@ -1044,6 +1050,7 @@
 void __my_pthread_testcancel(void)
 {
    int res;
+   ensure_valgrind("__my_pthread_testcancel");
    VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
                            VG_USERREQ__TESTCANCEL,
                            0, 0, 0, 0);
@@ -1178,7 +1185,7 @@
       if (n_now != n_orig) break;
 
       nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 52 * 1000 * 1000; /* 52 milliseconds */
+      nanosleep_interval.tv_nsec = 12 * 1000 * 1000; /* 12 milliseconds */
       /* It's critical here that valgrind's nanosleep implementation
          is nonblocking. */
       (void)my_do_syscall2(__NR_nanosleep, 
@@ -1381,13 +1388,14 @@
 /* Relies on assumption that initial private data is NULL.  This
    should be fixed somehow. */
 
-/* The allowable keys (indices) (all 2 of them). 
+/* The allowable keys (indices) (all 3 of them). 
    From sysdeps/pthread/bits/libc-tsd.h
 */
-#define N_LIBC_TSD_EXTRA_KEYS 1
+#define N_LIBC_TSD_EXTRA_KEYS 0
 
 enum __libc_tsd_key_t { _LIBC_TSD_KEY_MALLOC = 0,
                         _LIBC_TSD_KEY_DL_ERROR,
+                        _LIBC_TSD_KEY_RPC_VARS,
                         _LIBC_TSD_KEY_N };
 
 /* Auto-initialising subsystem.  libc_specifics_inited is set 
@@ -1877,6 +1885,10 @@
 }
 
 
+pid_t __vfork(void)
+{
+   return __fork();
+}
 
 
 /* ---------------------------------------------------------------------
@@ -1965,7 +1977,7 @@
    Basic idea is: modify the timeout parameter to select so that it
    returns immediately.  Poll like this until select returns non-zero,
    indicating something interesting happened, or until our time is up.
-   Space out the polls with nanosleeps of say 20 milliseconds, which
+   Space out the polls with nanosleeps of say 11 milliseconds, which
    is required to be nonblocking; this allows other threads to run.  
 
    Assumes:
@@ -2083,7 +2095,7 @@
       /* fprintf(stderr, "MY_SELECT: nanosleep\n"); */
       /* nanosleep and go round again */
       nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 50 * 1000 * 1000; /* 50 milliseconds */
+      nanosleep_interval.tv_nsec = 11 * 1000 * 1000; /* 11 milliseconds */
       /* It's critical here that valgrind's nanosleep implementation
          is nonblocking. */
       res = my_do_syscall2(__NR_nanosleep, 
@@ -2193,7 +2205,7 @@
       /* fprintf(stderr, "MY_POLL: nanosleep\n"); */
       /* nanosleep and go round again */
       nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 51 * 1000 * 1000; /* 51 milliseconds */
+      nanosleep_interval.tv_nsec = 13 * 1000 * 1000; /* 13 milliseconds */
       /* It's critical here that valgrind's nanosleep implementation
          is nonblocking. */
       (void)my_do_syscall2(__NR_nanosleep, 
@@ -2810,6 +2822,7 @@
 weak_alias (__pread64, pread64)
 weak_alias (__pwrite64, pwrite64)
 weak_alias(__fork, fork)
+weak_alias(__vfork, vfork)
 
 weak_alias (__pthread_kill_other_threads_np, pthread_kill_other_threads_np)
 
diff --git a/coregrind/arch/x86-linux/vg_libpthread_unimp.c b/coregrind/arch/x86-linux/vg_libpthread_unimp.c
index f413887..f3938ec 100644
--- a/coregrind/arch/x86-linux/vg_libpthread_unimp.c
+++ b/coregrind/arch/x86-linux/vg_libpthread_unimp.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 /* ---------------------------------------------------------------------
@@ -82,7 +82,7 @@
 //void longjmp ( void )  { unimp("longjmp"); }
 //void pthread_atfork ( void )  { unimp("pthread_atfork"); }
 //void pthread_attr_destroy ( void )  { unimp("pthread_attr_destroy"); }
-void pthread_attr_getdetachstate ( void )  { unimp("pthread_attr_getdetachstate"); }
+//void pthread_attr_getdetachstate ( void )  { unimp("pthread_attr_getdetachstate"); }
 void pthread_attr_getinheritsched ( void )  { unimp("pthread_attr_getinheritsched"); }
 //void pthread_attr_getschedparam ( void )  { unimp("pthread_attr_getschedparam"); }
 //void pthread_attr_getschedpolicy ( void )  { unimp("pthread_attr_getschedpolicy"); }
diff --git a/coregrind/arch/x86-linux/vg_syscall.S b/coregrind/arch/x86-linux/vg_syscall.S
index adabbed..52d6091 100644
--- a/coregrind/arch/x86-linux/vg_syscall.S
+++ b/coregrind/arch/x86-linux/vg_syscall.S
@@ -26,7 +26,7 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.
 
-  The GNU General Public License is contained in the file LICENSE.
+  The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_constants.h"
diff --git a/coregrind/demangle/cp-demangle.c b/coregrind/demangle/cp-demangle.c
index 5cf99c8..8d91d29 100644
--- a/coregrind/demangle/cp-demangle.c
+++ b/coregrind/demangle/cp-demangle.c
@@ -46,9 +46,9 @@
 #include "demangle.h"
 
 #ifndef STANDALONE
-#define malloc(s) VG_(malloc)(VG_AR_DEMANGLE, s)
-#define free(p) VG_(free)(VG_AR_DEMANGLE, p)
-#define realloc(p,s) VG_(realloc)(VG_AR_DEMANGLE, p, s)
+#define malloc(s)    VG_(arena_malloc) (VG_AR_DEMANGLE, s)
+#define free(p)      VG_(arena_free)   (VG_AR_DEMANGLE, p)
+#define realloc(p,s) VG_(arena_realloc)(VG_AR_DEMANGLE, p, /*alignment*/4, s)
 #endif
 
 /* If CP_DEMANGLE_DEBUG is defined, a trace of the grammar evaluation,
@@ -1406,7 +1406,7 @@
   }
 
   if (base == 36) {
-     *value = VG_(atoll36) (dyn_string_buf (number));
+     *value = VG_(atoll36) (36, dyn_string_buf (number));
   } else {
      *value = VG_(atoll) (dyn_string_buf (number));
   }
diff --git a/coregrind/demangle/cplus-dem.c b/coregrind/demangle/cplus-dem.c
index 56c3261..959dbd3 100644
--- a/coregrind/demangle/cplus-dem.c
+++ b/coregrind/demangle/cplus-dem.c
@@ -70,17 +70,18 @@
 #endif
 
 #ifndef STANDALONE
-#define xstrdup(ptr) VG_(strdup)(VG_AR_DEMANGLE, ptr)
-#define free(ptr) VG_(free)(VG_AR_DEMANGLE, ptr)
-#define xmalloc(size) VG_(malloc)(VG_AR_DEMANGLE, size)
-#define xrealloc(ptr, size) VG_(realloc)(VG_AR_DEMANGLE, ptr, size)
+#define xstrdup(ptr)        VG_(arena_strdup) (VG_AR_DEMANGLE, ptr)
+#define free(ptr)           VG_(arena_free)   (VG_AR_DEMANGLE, ptr)
+#define xmalloc(size)       VG_(arena_malloc) (VG_AR_DEMANGLE, size)
+#define xrealloc(ptr, size) VG_(arena_realloc)(VG_AR_DEMANGLE, ptr, \
+                                               /*align*/4, size)
 #define abort() vg_assert(0)
 #undef strstr
-#define strstr VG_(strstr)
+#define strstr  VG_(strstr)
 #define sprintf VG_(sprintf)
 #define strncpy VG_(strncpy)
 #define strncat VG_(strncat)
-#define strchr VG_(strchr)
+#define strchr  VG_(strchr)
 #define strpbrk VG_(strpbrk)
 #endif
 
diff --git a/coregrind/demangle/dyn-string.c b/coregrind/demangle/dyn-string.c
index aaa7e36..d6130a1 100644
--- a/coregrind/demangle/dyn-string.c
+++ b/coregrind/demangle/dyn-string.c
@@ -36,9 +36,9 @@
 #include "dyn-string.h"
 
 #ifndef STANDALONE
-#define malloc(s) VG_(malloc)(VG_AR_DEMANGLE, s)
-#define free(p) VG_(free)(VG_AR_DEMANGLE, p)
-#define realloc(p,s) VG_(realloc)(VG_AR_DEMANGLE, p, s)
+#define malloc(s)    VG_(arena_malloc) (VG_AR_DEMANGLE, s)
+#define free(p)      VG_(arena_free)   (VG_AR_DEMANGLE, p)
+#define realloc(p,s) VG_(arena_realloc)(VG_AR_DEMANGLE, p, /*alignment*/4, s)
 #endif
 
 /* If this file is being compiled for inclusion in the C++ runtime
diff --git a/coregrind/docs/manual.html b/coregrind/docs/manual.html
index b715ee3..95fe840 100644
--- a/coregrind/docs/manual.html
+++ b/coregrind/docs/manual.html
@@ -345,7 +345,7 @@
 </pre>
 
 <p>Note that Valgrind also reads options from the environment variable
-<code>$VALGRIND</code>, and processes them before the command-line
+<code>$VALGRIND_OPTS</code>, and processes them before the command-line
 options.
 
 <p>Valgrind's default settings succeed in giving reasonable behaviour
@@ -838,8 +838,8 @@
   <li>The contents of malloc'd blocks, before you write something
       there.  In C++, the new operator is a wrapper round malloc, so
       if you create an object with new, its fields will be
-      uninitialised until you fill them in, which is only Right and
-      Proper.</li>
+      uninitialised until you (or the constructor) fill them in, which
+      is only Right and Proper.</li>
 </ul>
 
 
@@ -1066,16 +1066,16 @@
       <p>
 
   <li>The "immediate location" specification.  For Value and Addr
-      errors, is either the name of the function in which the error
-      occurred, or, failing that, the full path the the .so file
-      containing the error location.  For Param errors, is the name of
-      the offending system call parameter.  For Free errors, is the
-      name of the function doing the freeing (eg, <code>free</code>,
-      <code>__builtin_vec_delete</code>, etc)</li><br>
+      errors, it is either the name of the function in which the error
+      occurred, or, failing that, the full path of the .so file or
+      executable containing the error location.  For Param errors,
+      is the name of the offending system call parameter.  For Free
+      errors, is the name of the function doing the freeing (eg,
+      <code>free</code>, <code>__builtin_vec_delete</code>, etc)</li><br>
       <p>
 
   <li>The caller of the above "immediate location".  Again, either a
-      function or shared-object name.</li><br>
+      function or shared-object/executable name.</li><br>
       <p>
 
   <li>Optionally, one or two extra calling-function or object names,
@@ -1083,8 +1083,8 @@
 </ul>
 
 <p>
-Locations may be either names of shared objects or wildcards matching
-function names.  They begin <code>obj:</code> and <code>fun:</code>
+Locations may be either names of shared objects/executables or wildcards
+matching function names.  They begin <code>obj:</code> and <code>fun:</code>
 respectively.  Function and object names to match against may use the 
 wildcard characters <code>*</code> and <code>?</code>.
 
@@ -1617,11 +1617,11 @@
 
   <li>If the new size is smaller, the dropped-off section is marked as
       unaddressible.  You may only pass to realloc a pointer
-      previously issued to you by malloc/calloc/new/realloc.</li><br>
+      previously issued to you by malloc/calloc/realloc.</li><br>
       <p>
 
   <li>free/delete: you may only pass to free a pointer previously
-      issued to you by malloc/calloc/new/realloc, or the value
+      issued to you by malloc/calloc/realloc, or the value
       NULL. Otherwise, Valgrind complains.  If the pointer is indeed
       valid, Valgrind marks the entire area it points at as
       unaddressible, and places the block in the freed-blocks-queue.
@@ -2058,7 +2058,9 @@
   <li>Run your program with <code>cachegrind</code> in front of the
       normal command line invocation.  When the program finishes,
       Valgrind will print summary cache statistics. It also collects
-      line-by-line information in a file <code>cachegrind.out</code>.
+      line-by-line information in a file
+      <code>cachegrind.out.<i>pid</i></code>, where <code><i>pid</i></code>
+      is the program's process id.
       <p>
       This step should be done every time you want to collect
       information about a new program, a changed program, or about the
@@ -2197,15 +2199,17 @@
 
 As well as printing summary information, Cachegrind also writes
 line-by-line cache profiling information to a file named
-<code>cachegrind.out</code>.  This file is human-readable, but is best
-interpreted by the accompanying program <code>vg_annotate</code>,
+<code>cachegrind.out.<i>pid</i></code>.  This file is human-readable, but is
+best interpreted by the accompanying program <code>vg_annotate</code>,
 described in the next section.
 <p>
-Things to note about the <code>cachegrind.out</code> file:
+Things to note about the <code>cachegrind.out.<i>pid</i></code> file:
 <ul>
   <li>It is written every time <code>valgrind --cachesim=yes</code> or
       <code>cachegrind</code> is run, and will overwrite any existing
-      <code>cachegrind.out</code> in the current directory.</li>
+      <code>cachegrind.out.<i>pid</i></code> in the current directory (but
+      that won't happen very often because it takes some time for process ids
+      to be recycled).</li>
   <p>
   <li>It can be huge: <code>ls -l</code> generates a file of about
       350KB.  Browsing a few files and web pages with a Konqueror
@@ -2213,6 +2217,13 @@
       of around 15 MB.</li>
 </ul>
 
+Note that older versions of Cachegrind used a log file named
+<code>cachegrind.out</code> (i.e. no <code><i>.pid</i></code> suffix).
+The suffix serves two purposes.  Firstly, it means you don't have to rename old
+log files that you don't want to overwrite.  Secondly, and more importantly,
+it allows correct profiling with the <code>--trace-children=yes</code> option
+of programs that spawn child processes.
+
 <a name="profileflags"></a>
 <h3>7.5&nbsp; Cachegrind options</h3>
 Cachegrind accepts all the options that Valgrind does, although some of them
@@ -2245,9 +2256,13 @@
 window to be at least 120-characters wide if possible, as the output
 lines can be quite long.
 <p>
-To get a function-by-function summary, run <code>vg_annotate</code> in
-directory containing a <code>cachegrind.out</code> file.  The output
-looks like this:
+To get a function-by-function summary, run <code>vg_annotate
+--<i>pid</i></code> in a directory containing a
+<code>cachegrind.out.<i>pid</i></code> file.  The <code>--<i>pid</i></code>
+is required so that <code>vg_annotate</code> knows which log file to use when
+several are present.
+<p>
+The output looks like this:
 
 <pre>
 --------------------------------------------------------------------------------
@@ -2468,8 +2483,9 @@
 specific enough.
 
 Beware that vg_annotate can take some time to digest large
-<code>cachegrind.out</code> files, eg. 30 seconds or more.  Also beware that
-auto-annotation can produce a lot of output if your program is large!
+<code>cachegrind.out.<i>pid</i></code> files, e.g. 30 seconds or more.  Also
+beware that auto-annotation can produce a lot of output if your program is
+large!
 
 
 <h3>7.7&nbsp; Annotating assembler programs</h3>
@@ -2492,13 +2508,18 @@
 
 <h3>7.8&nbsp; <code>vg_annotate</code> options</h3>
 <ul>
+  <li><code>--<i>pid</i></code></li><p>
+
+      Indicates which <code>cachegrind.out.<i>pid</i></code> file to read.
+      Not actually an option -- it is required.
+    
   <li><code>-h, --help</code></li><p>
   <li><code>-v, --version</code><p>
 
       Help and version, as usual.</li>
 
   <li><code>--sort=A,B,C</code> [default: order in 
-      <code>cachegrind.out</code>]<p>
+      <code>cachegrind.out.<i>pid</i></code>]<p>
       Specifies the events upon which the sorting of the function-by-function
       entries will be based.  Useful if you want to concentrate on eg. I cache
       misses (<code>--sort=I1mr,I2mr</code>), or D cache misses
@@ -2506,10 +2527,10 @@
       (<code>--sort=D2mr,I2mr</code>).</li><p>
 
   <li><code>--show=A,B,C</code> [default: all, using order in
-      <code>cachegrind.out</code>]<p>
+      <code>cachegrind.out.<i>pid</i></code>]<p>
       Specifies which events to show (and the column order). Default is to use
-      all present in the <code>cachegrind.out</code> file (and use the order in
-      the file).</li><p>
+      all present in the <code>cachegrind.out.<i>pid</i></code> file (and use
+      the order in the file).</li><p>
 
   <li><code>--threshold=X</code> [default: 99%] <p>
       Sets the threshold for the function-by-function summary.  Functions are
@@ -2547,17 +2568,18 @@
 There are a couple of situations in which vg_annotate issues warnings.
 
 <ul>
-  <li>If a source file is more recent than the <code>cachegrind.out</code>
-      file.  This is because the information in <code>cachegrind.out</code> is
-      only recorded with line numbers, so if the line numbers change at all in
-      the source (eg. lines added, deleted, swapped), any annotations will be 
+  <li>If a source file is more recent than the
+      <code>cachegrind.out.<i>pid</i></code> file.  This is because the
+      information in <code>cachegrind.out.<i>pid</i></code> is only recorded
+      with line numbers, so if the line numbers change at all in the source
+      (eg.  lines added, deleted, swapped), any annotations will be
       incorrect.<p>
 
   <li>If information is recorded about line numbers past the end of a file.
       This can be caused by the above problem, ie. shortening the source file
-      while using an old <code>cachegrind.out</code> file.  If this happens,
-      the figures for the bogus lines are printed anyway (clearly marked as
-      bogus) in case they are important.</li><p>
+      while using an old <code>cachegrind.out.<i>pid</i></code> file.  If this
+      happens, the figures for the bogus lines are printed anyway (clearly
+      marked as bogus) in case they are important.</li><p>
 </ul>
 
 
@@ -2677,6 +2699,13 @@
       <blockquote><code>btsl %eax, %edx</code></blockquote>
 
       This should only happen rarely.
+      </li><p>
+
+  <li>FPU instructions with data sizes of 28 and 108 bytes (e.g.
+      <code>fsave</code>) are treated as though they only access 16 bytes.
+      These instructions seem to be rare so hopefully this won't affect
+      accuracy much.
+      </li><p>
 </ul>
 
 Another thing worth nothing is that results are very sensitive.  Changing the
diff --git a/coregrind/valgrind.in b/coregrind/valgrind.in
index 7b99277..4fee909 100755
--- a/coregrind/valgrind.in
+++ b/coregrind/valgrind.in
@@ -1,11 +1,37 @@
 #!/bin/sh
+##--------------------------------------------------------------------##
+##--- The startup script.                                 valgrind ---##
+##--------------------------------------------------------------------##
+
+#  This file is part of Valgrind, an x86 protected-mode emulator 
+#  designed for debugging and profiling binaries on x86-Unixes.
+#
+#  Copyright (C) 2002 Julian Seward
+#     jseward@acm.org
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License as
+#  published by the Free Software Foundation; either version 2 of the
+#  License, or (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+#  02111-1307, USA.
+#
+#  The GNU General Public License is contained in the file COPYING.
+
 
 # Should point to the installation directory
 prefix="@prefix@"
 exec_prefix="@exec_prefix@"
 VALGRIND="@libdir@/valgrind"
 
-
 # Other stuff ...
 version="@VERSION@"
 emailto="jseward@acm.org"
@@ -13,168 +39,57 @@
 # The default name of the suppressions file
 vgsupp="--suppressions=$VALGRIND/default.supp"
 
-# name we were invoked with
-vgname=`echo $0 | sed 's,^.*/,,'`
-
 # Valgrind options
 vgopts=
 
-# Prog and arg to run
-argopts=
+# --skin=<foo> arg, specifying skin used
+skin_arg=
 
-# Show usage info?
-dousage=0
-
-# show version info?
-doversion=0
-
-# Collect up args for Valgrind
+# Collect up args for Valgrind.  Only --version and --skin are intercepted 
+# here;  the rest are passed to vg_main.c.
 while [ $+ != 0 ]
 do
   arg=$1
   case "$arg" in
-#   options for the user
-    --help)                 dousage=1; break;;
-    --version)              doversion=1; break;;
-    --logfile-fd=*)         vgopts="$vgopts $arg"; shift;;
-    -v)                     vgopts="$vgopts $arg"; shift;;
-    --verbose)              vgopts="$vgopts -v"; shift;;
-    -q)                     vgopts="$vgopts $arg"; shift;;
-    --quiet)                vgopts="$vgopts $arg"; shift;;
-    --error-limit=no)       vgopts="$vgopts $arg"; shift;;
-    --error-limit=yes)      vgopts="$vgopts $arg"; shift;;
-    --check-addrVs=no)      vgopts="$vgopts $arg"; shift;;
-    --check-addrVs=yes)     vgopts="$vgopts $arg"; shift;;
-    --gdb-attach=no)        vgopts="$vgopts $arg"; shift;;
-    --gdb-attach=yes)       vgopts="$vgopts $arg"; shift;;
-    --demangle=no)          vgopts="$vgopts $arg"; shift;;
-    --demangle=yes)         vgopts="$vgopts $arg"; shift;;
-    --num-callers=*)        vgopts="$vgopts $arg"; shift;;
-    --partial-loads-ok=no)  vgopts="$vgopts $arg"; shift;;
-    --partial-loads-ok=yes) vgopts="$vgopts $arg"; shift;;
-    --leak-check=no)        vgopts="$vgopts $arg"; shift;;
-    --leak-check=yes)       vgopts="$vgopts $arg"; shift;;
-    --show-reachable=no)    vgopts="$vgopts $arg"; shift;;
-    --show-reachable=yes)   vgopts="$vgopts $arg"; shift;;
-    --leak-resolution=low)  vgopts="$vgopts $arg"; shift;;
-    --leak-resolution=med)  vgopts="$vgopts $arg"; shift;;
-    --leak-resolution=high) vgopts="$vgopts $arg"; shift;;
-    --sloppy-malloc=no)     vgopts="$vgopts $arg"; shift;;
-    --sloppy-malloc=yes)    vgopts="$vgopts $arg"; shift;;
-    --alignment=*)          vgopts="$vgopts $arg"; shift;;
-    --trace-children=no)    vgopts="$vgopts $arg"; shift;;
-    --trace-children=yes)   vgopts="$vgopts $arg"; shift;;
-    --workaround-gcc296-bugs=no)    vgopts="$vgopts $arg"; shift;;
-    --workaround-gcc296-bugs=yes)   vgopts="$vgopts $arg"; shift;;
-    --freelist-vol=*)       vgopts="$vgopts $arg"; shift;;
-    --suppressions=*)       vgopts="$vgopts $arg"; shift;;
-    --cachesim=yes)         vgopts="$vgopts $arg"; shift;;
-    --cachesim=no)          vgopts="$vgopts $arg"; shift;;
-    --I1=*,*,*)             vgopts="$vgopts $arg"; shift;;
-    --D1=*,*,*)             vgopts="$vgopts $arg"; shift;;
-    --L2=*,*,*)             vgopts="$vgopts $arg"; shift;;
-    --weird-hacks=*)        vgopts="$vgopts $arg"; shift;;
-#   options for debugging Valgrind
-    --sanity-level=*)       vgopts="$vgopts $arg"; shift;;
-    --single-step=yes)      vgopts="$vgopts $arg"; shift;;
-    --single-step=no)       vgopts="$vgopts $arg"; shift;;
-    --optimise=yes)         vgopts="$vgopts $arg"; shift;;
-    --optimise=no)          vgopts="$vgopts $arg"; shift;;
-    --instrument=yes)       vgopts="$vgopts $arg"; shift;;
-    --instrument=no)        vgopts="$vgopts $arg"; shift;;
-    --cleanup=yes)          vgopts="$vgopts $arg"; shift;;
-    --cleanup=no)           vgopts="$vgopts $arg"; shift;;
-    --smc-check=none)       vgopts="$vgopts $arg"; shift;;
-    --smc-check=some)       vgopts="$vgopts $arg"; shift;;
-    --smc-check=all)        vgopts="$vgopts $arg"; shift;;
-    --trace-syscalls=yes)   vgopts="$vgopts $arg"; shift;;
-    --trace-syscalls=no)    vgopts="$vgopts $arg"; shift;;
-    --trace-signals=yes)    vgopts="$vgopts $arg"; shift;;
-    --trace-signals=no)     vgopts="$vgopts $arg"; shift;;
-    --trace-symtab=yes)     vgopts="$vgopts $arg"; shift;;
-    --trace-symtab=no)      vgopts="$vgopts $arg"; shift;;
-    --trace-malloc=yes)     vgopts="$vgopts $arg"; shift;;
-    --trace-malloc=no)      vgopts="$vgopts $arg"; shift;;
-    --trace-sched=yes)      vgopts="$vgopts $arg"; shift;;
-    --trace-sched=no)       vgopts="$vgopts $arg"; shift;;
-    --trace-pthread=none)   vgopts="$vgopts $arg"; shift;;
-    --trace-pthread=some)   vgopts="$vgopts $arg"; shift;;
-    --trace-pthread=all)    vgopts="$vgopts $arg"; shift;;
-    --stop-after=*)         vgopts="$vgopts $arg"; shift;;
-    --dump-error=*)         vgopts="$vgopts $arg"; shift;;
-    -*)                     dousage=1; break;;
+    --version)              echo "valgrind-$version"; exit 1 ;;
+    --skin=*)               skin_arg=$arg;            shift;;
+    -*)                     vgopts="$vgopts $arg";    shift;;
     *)                      break;;
   esac
 done
 
-if [ z"$doversion" = z1 ]; then
-   echo "valgrind-$version"
+
+# Decide on the skin.  Default to memory checking if not specified.
+if [ z"$skin_arg" = z ]; then
+   skin=memcheck
+else
+   # Hack off the "--skin=" prefix.
+   skin=`echo $skin_arg | sed 's/--skin=//'`
+fi
+
+# Setup skin shared object.
+skin_so="vgskin_${skin}.so"
+if [ ! -r $VALGRIND/$skin_so ] ; then
+   echo
+   echo "Extension error:"
+   echo "  The shared library \`$skin_so' for the chosen"
+   echo "  skin \`$skin' could not be found in"
+   echo "  $VALGRIND"
+   echo
    exit 1
 fi
 
-if [ $# = 0 ] || [ z"$dousage" = z1 ]; then
-   echo
-   echo "usage: $vgname [options] prog-and-args"
-   echo
-   echo "  options for the user, with defaults in [ ], are:"
-   echo "    --help                    show this message"
-   echo "    --version                 show version"
-   echo "    -q --quiet                run silently; only print error msgs"
-   echo "    -v --verbose              be more verbose, incl counts of errors"
-   echo "    --gdb-attach=no|yes       start GDB when errors detected? [no]"
-   echo "    --demangle=no|yes         automatically demangle C++ names? [yes]"
-   echo "    --num-callers=<number>    show <num> callers in stack traces [4]"
-   echo "    --error-limit=no|yes      stop showing new errors if too many? [yes]"
-   echo "    --partial-loads-ok=no|yes too hard to explain here; see manual [yes]"
-   echo "    --leak-check=no|yes       search for memory leaks at exit? [no]"
-   echo "    --leak-resolution=low|med|high"
-   echo "                              amount of bt merging in leak check [low]"
-   echo "    --show-reachable=no|yes   show reachable blocks in leak check? [no]"
-   echo "    --sloppy-malloc=no|yes    round malloc sizes to next word? [no]"
-   echo "    --alignment=<number>      set minimum alignment of allocations [4]"
-   echo "    --trace-children=no|yes   Valgrind-ise child processes? [no]"
-   echo "    --logfile-fd=<number>     file descriptor for messages [2=stderr]"
-   echo "    --freelist-vol=<number>   volume of freed blocks queue [1000000]"
-   echo "    --workaround-gcc296-bugs=no|yes  self explanatory [no]"
-   echo "    --suppressions=<filename> suppress errors described in"
-   echo "                              suppressions file <filename>"
-   echo "    --check-addrVs=no|yes     experimental lighterweight checking? [yes]"
-   echo "                              yes == Valgrind's original behaviour"
-   echo "    --cachesim=no|yes         do cache profiling? [no]"
-   echo "    --I1=<size>,<assoc>,<line_size>  set I1 cache manually"
-   echo "    --D1=<size>,<assoc>,<line_size>  set D1 cache manually"
-   echo "    --L2=<size>,<assoc>,<line_size>  set L2 cache manually"
-   echo "    --weird-hacks=hack1,hack2,...  [no hacks selected]"
-   echo "         recognised hacks are: ioctl-VTIME truncate-writes"
-   echo ""
-   echo
-   echo "  options for debugging Valgrind itself are:"
-   echo "    --sanity-level=<number>   level of sanity checking to do [1]"
-   echo "    --single-step=no|yes      translate each instr separately? [no]"
-   echo "    --optimise=no|yes         improve intermediate code? [yes]"
-   echo "    --instrument=no|yes       actually do memory checks? [yes]"
-   echo "    --cleanup=no|yes          improve after instrumentation? [yes]"
-   echo "    --smc-check=none|some|all check writes for s-m-c? [some]"
-   echo "    --trace-syscalls=no|yes   show all system calls? [no]"
-   echo "    --trace-signals=no|yes    show signal handling details? [no]"
-   echo "    --trace-symtab=no|yes     show symbol table details? [no]"
-   echo "    --trace-malloc=no|yes     show client malloc details? [no]"
-   echo "    --trace-sched=no|yes      show thread scheduler details? [no]"
-   echo "    --trace-pthread=none|some|all  show pthread event details? [no]"
-   echo "    --stop-after=<number>     switch to real CPU after executing"
-   echo "                              <number> basic blocks [infinity]"
-   echo "    --dump-error=<number>     show translation for basic block"
-   echo "                              associated with <number>'th"
-   echo "                              error context [0=don't show any]"
-   echo
-   echo "  Extra options are read from env variable \$VALGRIND_OPTS"
-   echo
-   echo "  Valgrind is Copyright (C) 2000-2002 Julian Seward"
-   echo "  and licensed under the GNU General Public License, version 2."
-   echo "  Bug reports, feedback, admiration, abuse, etc, to: $emailto."
-   echo
-   exit 1
+VG_CMD="$@"
+VG_ARGS="$VALGRIND_OPTS $vgsupp $vgopts"
+
+# If no command given, act like -h was given so vg_main.c prints out
+# the usage string.  And set VG_CMD to be any program, doesn't matter which
+# because it won't be run anyway (we use 'true').
+if [ z"$VG_CMD" = z ] ; then
+   VG_ARGS="$VG_ARGS -h"
+   VG_CMD=true
 fi
+export VG_ARGS
 
 # A bit subtle.  The LD_PRELOAD added entry must be absolute
 # and not depend on LD_LIBRARY_PATH.  This is so that we can
@@ -182,13 +97,19 @@
 # libpthread.so fall out of visibility, independently of
 # whether valgrind.so is visible.
 
-VG_ARGS="$VALGRIND_OPTS $vgsupp $vgopts"
-export VG_ARGS
 LD_LIBRARY_PATH=$VALGRIND:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH
-LD_PRELOAD=$VALGRIND/valgrind.so:$LD_PRELOAD
+
+# Insert skin .so before valgrind.so to override template functions.
+LD_PRELOAD=$VALGRIND/$skin_so:$VALGRIND/valgrind.so:$LD_PRELOAD
 export LD_PRELOAD
 #LD_DEBUG=files
 #LD_DEBUG=symbols
 #export LD_DEBUG
-exec "$@"
+
+exec $VG_CMD
+
+##--------------------------------------------------------------------##
+##--- end                                                 valgrind ---##
+##--------------------------------------------------------------------##
+
diff --git a/coregrind/vg_clientfuncs.c b/coregrind/vg_clientfuncs.c
index c71b6db..b37059b 100644
--- a/coregrind/vg_clientfuncs.c
+++ b/coregrind/vg_clientfuncs.c
@@ -26,11 +26,10 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
 
 #include "valgrind.h"   /* for VALGRIND_MAGIC_SEQUENCE */
 
@@ -72,7 +71,7 @@
    the real one, this is because the dynamic linker is running the
    static initialisers for C++, before starting up Valgrind itself.
    In this case it is safe to route calls through to
-   VG_(malloc)/vg_free, since that is self-initialising.
+   VG_(arena_malloc)/VG_(arena_free), since they are self-initialising.
 
    Once Valgrind is initialised, vg_running_on_simd_CPU becomes True.
    The call needs to be transferred from the simulated CPU back to the
@@ -91,15 +90,16 @@
                   (UInt)VG_(running_on_simd_CPU), n );
    if (n < 0) {
       v = NULL;
-      VG_(message)(Vg_UserMsg, 
-         "Warning: silly arg (%d) to malloc()", n );
+      if (VG_(needs).core_errors)
+         VG_(message)(Vg_UserMsg, 
+                      "Warning: silly arg (%d) to malloc()", n );
    } else {
       if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
 
       if (VG_(running_on_simd_CPU)) {
          v = (void*)SIMPLE_REQUEST1(VG_USERREQ__MALLOC, n);
       } else {
-         v = VG_(malloc)(VG_AR_CLIENT, n);
+         v = VG_(arena_malloc)(VG_AR_CLIENT, n);
       }
    }
    if (VG_(clo_trace_malloc)) 
@@ -116,15 +116,16 @@
                   (UInt)VG_(running_on_simd_CPU), n );
    if (n < 0) {
       v = NULL;
-      VG_(message)(Vg_UserMsg, 
-         "Warning: silly arg (%d) to __builtin_new()", n );
+      if (VG_(needs).core_errors)
+         VG_(message)(Vg_UserMsg, 
+                      "Warning: silly arg (%d) to __builtin_new()", n );
    } else {
       if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
 
       if (VG_(running_on_simd_CPU)) {
          v = (void*)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_NEW, n);
       } else {
-         v = VG_(malloc)(VG_AR_CLIENT, n);
+         v = VG_(arena_malloc)(VG_AR_CLIENT, n);
       }
    }
    if (VG_(clo_trace_malloc)) 
@@ -147,15 +148,16 @@
                   (UInt)VG_(running_on_simd_CPU), n );
    if (n < 0) {
       v = NULL;
-      VG_(message)(Vg_UserMsg, 
-         "Warning: silly arg (%d) to __builtin_vec_new()", n );
+      if (VG_(needs).core_errors)
+         VG_(message)(Vg_UserMsg, 
+                      "Warning: silly arg (%d) to __builtin_vec_new()", n );
    } else {
       if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
 
       if (VG_(running_on_simd_CPU)) {
          v = (void*)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_VEC_NEW, n);
       } else {
-         v = VG_(malloc)(VG_AR_CLIENT, n);
+         v = VG_(arena_malloc)(VG_AR_CLIENT, n);
       }
    }
    if (VG_(clo_trace_malloc)) 
@@ -179,7 +181,7 @@
    if (VG_(running_on_simd_CPU)) {
       (void)SIMPLE_REQUEST1(VG_USERREQ__FREE, p);
    } else {
-      VG_(free)(VG_AR_CLIENT, p);      
+      VG_(arena_free)(VG_AR_CLIENT, p);      
    }
 }
 
@@ -193,7 +195,7 @@
    if (VG_(running_on_simd_CPU)) {
       (void)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_DELETE, p);
    } else {
-      VG_(free)(VG_AR_CLIENT, p);
+      VG_(arena_free)(VG_AR_CLIENT, p);
    }
 }
 
@@ -213,7 +215,7 @@
    if (VG_(running_on_simd_CPU)) {
       (void)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_VEC_DELETE, p);
    } else {
-      VG_(free)(VG_AR_CLIENT, p);
+      VG_(arena_free)(VG_AR_CLIENT, p);
    }
 }
 
@@ -232,13 +234,14 @@
                   (UInt)VG_(running_on_simd_CPU), nmemb, size );
    if (nmemb < 0 || size < 0) {
       v = NULL;
-      VG_(message)(Vg_UserMsg, "Warning: silly args (%d,%d) to calloc()", 
-                               nmemb, size );
+      if (VG_(needs).core_errors)
+         VG_(message)(Vg_UserMsg, "Warning: silly args (%d,%d) to calloc()", 
+                                  nmemb, size );
    } else {
       if (VG_(running_on_simd_CPU)) {
          v = (void*)SIMPLE_REQUEST2(VG_USERREQ__CALLOC, nmemb, size);
       } else {
-         v = VG_(calloc)(VG_AR_CLIENT, nmemb, size);
+         v = VG_(arena_calloc)(VG_AR_CLIENT, nmemb, size);
       }
    }
    if (VG_(clo_trace_malloc)) 
@@ -269,7 +272,7 @@
    if (VG_(running_on_simd_CPU)) {
       v = (void*)SIMPLE_REQUEST2(VG_USERREQ__REALLOC, ptrV, new_size);
    } else {
-      v = VG_(realloc)(VG_AR_CLIENT, ptrV, new_size);
+      v = VG_(arena_realloc)(VG_AR_CLIENT, ptrV, /*alignment*/4, new_size);
    }
    if (VG_(clo_trace_malloc)) 
       VG_(printf)(" = %p\n", v );
@@ -292,7 +295,7 @@
       if (VG_(running_on_simd_CPU)) {
          v = (void*)SIMPLE_REQUEST2(VG_USERREQ__MEMALIGN, alignment, n);
       } else {
-         v = VG_(malloc_aligned)(VG_AR_CLIENT, alignment, n);
+         v = VG_(arena_malloc_aligned)(VG_AR_CLIENT, alignment, n);
       }
    }
    if (VG_(clo_trace_malloc)) 
@@ -579,7 +582,7 @@
 {
    int res;
    extern void __libc_freeres(void);
-   __libc_freeres();
+   //__libc_freeres();
    VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
                            VG_USERREQ__LIBC_FREERES_DONE, 0, 0, 0, 0);
    /*NOTREACHED*/
diff --git a/coregrind/vg_clientmalloc.c b/coregrind/vg_clientmalloc.c
index 0292aa4..0959843 100644
--- a/coregrind/vg_clientmalloc.c
+++ b/coregrind/vg_clientmalloc.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -38,15 +38,9 @@
 
 /* #define DEBUG_CLIENTMALLOC */
 
-/* Holds malloc'd but not freed blocks. */
+/* Holds malloc'd but not freed blocks.  Static, so zero-inited by default. */
 #define VG_MALLOCLIST_NO(aa) (((UInt)(aa)) % VG_N_MALLOCLISTS)
 static ShadowChunk* vg_malloclist[VG_N_MALLOCLISTS];
-static Bool         vg_client_malloc_init_done = False;
-
-/* Holds blocks after freeing. */
-static ShadowChunk* vg_freed_list_start   = NULL;
-static ShadowChunk* vg_freed_list_end     = NULL;
-static Int          vg_freed_list_volume  = 0;
 
 /* Stats ... */
 static UInt         vg_cmalloc_n_mallocs  = 0;
@@ -61,6 +55,105 @@
 /*--- Fns                                                  ---*/
 /*------------------------------------------------------------*/
 
+static __inline__
+Bool needs_shadow_chunks ( void )
+{
+   return VG_(needs).core_errors             ||
+          VG_(needs).alternative_free        ||
+          VG_(needs).sizeof_shadow_block > 0 ||
+          VG_(track_events).bad_free         ||
+          VG_(track_events).mismatched_free  ||
+          VG_(track_events).copy_mem_heap    ||
+          VG_(track_events).die_mem_heap;
+}
+
+#ifdef DEBUG_CLIENTMALLOC
+static 
+Int count_malloclists ( void )
+{
+   ShadowChunk* sc;
+   UInt ml_no;
+   Int  n = 0;
+
+   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) 
+      for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next)
+         n++;
+   return n;
+}
+#endif
+
+/*------------------------------------------------------------*/
+/*--- Shadow chunks, etc                                   ---*/
+/*------------------------------------------------------------*/
+
+/* Allocate a user-chunk of size bytes.  Also allocate its shadow
+   block, make the shadow block point at the user block.  Put the
+   shadow chunk on the appropriate list, and set all memory
+   protections correctly. */
+static void addShadowChunk ( ThreadState* tst,
+                             Addr p, UInt size, VgAllocKind kind )
+{
+   ShadowChunk* sc;
+   UInt         ml_no = VG_MALLOCLIST_NO(p);
+
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] addShadowChunk "
+               "( sz %d, addr %p, list %d )\n", 
+               count_malloclists(), 
+               0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+               size, p, ml_no );
+#  endif
+
+   sc = VG_(arena_malloc)(VG_AR_CORE, 
+                          sizeof(ShadowChunk)
+                           + VG_(needs).sizeof_shadow_block);
+   sc->size      = size;
+   sc->allockind = kind;
+   sc->data      = p;
+   /* Fill in any skin-specific shadow chunk stuff */
+   if (VG_(needs).sizeof_shadow_block > 0)
+      SK_(complete_shadow_chunk) ( sc, tst );
+
+   sc->next  = vg_malloclist[ml_no];
+   vg_malloclist[ml_no] = sc;
+}
+
+/* Get the sc, and return the address of the previous node's next pointer
+   which allows sc to be removed from the list later without having to look
+   it up again.  */
+static ShadowChunk* getShadowChunk ( Addr a, /*OUT*/ShadowChunk*** next_ptr )
+{
+   ShadowChunk *prev, *curr;
+   Int ml_no;
+   
+   ml_no = VG_MALLOCLIST_NO(a);
+
+   prev = NULL;
+   curr = vg_malloclist[ml_no];
+   while (True) {
+      if (curr == NULL) 
+         break;
+      if (a == curr->data)
+         break;
+      prev = curr;
+      curr = curr->next;
+   }
+
+   if (NULL == prev)
+      *next_ptr = &vg_malloclist[ml_no];
+   else
+      *next_ptr = &prev->next;
+
+   return curr;
+}
+
+void VG_(freeShadowChunk) ( ShadowChunk* sc )
+{
+   VG_(arena_free) ( VG_AR_CLIENT, (void*)sc->data );
+   VG_(arena_free) ( VG_AR_CORE,   sc );
+}
+
+
 /* Allocate a suitably-sized array, copy all the malloc-d block
    shadows into it, and return both the array and the size of it.
    This is used by the memory-leak detector.
@@ -78,8 +171,7 @@
    }
    if (*n_shadows == 0) return NULL;
 
-   arr = VG_(malloc)( VG_AR_PRIVATE, 
-                      *n_shadows * sizeof(ShadowChunk*) );
+   arr = VG_(malloc)( *n_shadows * sizeof(ShadowChunk*) );
 
    i = 0;
    for (scn = 0; scn < VG_N_MALLOCLISTS; scn++) {
@@ -91,405 +183,284 @@
    return arr;
 }
 
-static void client_malloc_init ( void )
+Bool VG_(addr_is_in_block)( Addr a, Addr start, UInt size )
+{
+   return (start - VG_AR_CLIENT_REDZONE_SZB <= a
+           && a < start + size + VG_AR_CLIENT_REDZONE_SZB);
+}
+
+/* Return the first shadow chunk satisfying the predicate p. */
+ShadowChunk* VG_(any_matching_mallocd_ShadowChunks)
+                        ( Bool (*p) ( ShadowChunk* ))
 {
    UInt ml_no;
-   if (vg_client_malloc_init_done) return;
+   ShadowChunk* sc;
+
    for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++)
-      vg_malloclist[ml_no] = NULL;
-   vg_client_malloc_init_done = True;
-}
-
-
-static __attribute__ ((unused))
-       Int count_freelist ( void )
-{
-   ShadowChunk* sc;
-   Int n = 0;
-   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
-      n++;
-   return n;
-}
-
-static __attribute__ ((unused))
-       Int count_malloclists ( void )
-{
-   ShadowChunk* sc;
-   UInt ml_no;
-   Int  n = 0;
-   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) 
       for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next)
-         n++;
-   return n;
-}
+         if (p(sc))
+            return sc;
 
-static __attribute__ ((unused))
-       void freelist_sanity ( void )
-{
-   ShadowChunk* sc;
-   Int n = 0;
-   /* VG_(printf)("freelist sanity\n"); */
-   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
-      n += sc->size;
-   vg_assert(n == vg_freed_list_volume);
-}
-
-/* Remove sc from malloc list # sc.  It is an unchecked error for
-   sc not to be present in the list. 
-*/
-static void remove_from_malloclist ( UInt ml_no, ShadowChunk* sc )
-{
-   ShadowChunk *sc1, *sc2;
-   if (sc == vg_malloclist[ml_no]) {
-      vg_malloclist[ml_no] = vg_malloclist[ml_no]->next;
-   } else {
-      sc1 = vg_malloclist[ml_no];
-      vg_assert(sc1 != NULL);
-      sc2 = sc1->next;
-      while (sc2 != sc) {
-         vg_assert(sc2 != NULL);
-         sc1 = sc2;
-         sc2 = sc2->next;
-      }
-      vg_assert(sc1->next == sc);
-      vg_assert(sc2 == sc);
-      sc1->next = sc2->next;
-   }
+   return NULL;
 }
 
 
-/* Put a shadow chunk on the freed blocks queue, possibly freeing up
-   some of the oldest blocks in the queue at the same time. */
-
-static void add_to_freed_queue ( ShadowChunk* sc )
-{
-   ShadowChunk* sc1;
-
-   /* Put it at the end of the freed list */
-   if (vg_freed_list_end == NULL) {
-      vg_assert(vg_freed_list_start == NULL);
-      vg_freed_list_end = vg_freed_list_start = sc;
-      vg_freed_list_volume = sc->size;
-   } else {
-      vg_assert(vg_freed_list_end->next == NULL);
-      vg_freed_list_end->next = sc;
-      vg_freed_list_end = sc;
-      vg_freed_list_volume += sc->size;
-   }
-   sc->next = NULL;
-
-   /* Release enough of the oldest blocks to bring the free queue
-      volume below vg_clo_freelist_vol. */
-
-   while (vg_freed_list_volume > VG_(clo_freelist_vol)) {
-      /* freelist_sanity(); */
-      vg_assert(vg_freed_list_start != NULL);
-      vg_assert(vg_freed_list_end != NULL);
-
-      sc1 = vg_freed_list_start;
-      vg_freed_list_volume -= sc1->size;
-      /* VG_(printf)("volume now %d\n", vg_freed_list_volume); */
-      vg_assert(vg_freed_list_volume >= 0);
-
-      if (vg_freed_list_start == vg_freed_list_end) {
-         vg_freed_list_start = vg_freed_list_end = NULL;
-      } else {
-         vg_freed_list_start = sc1->next;
-      }
-      sc1->next = NULL; /* just paranoia */
-      VG_(free)(VG_AR_CLIENT,  (void*)(sc1->data));
-      VG_(free)(VG_AR_PRIVATE, sc1);
-   }
-}
-
-
-/* Allocate a user-chunk of size bytes.  Also allocate its shadow
-   block, make the shadow block point at the user block.  Put the
-   shadow chunk on the appropriate list, and set all memory
-   protections correctly. */
-
-static ShadowChunk* client_malloc_shadow ( ThreadState* tst,
-                                           UInt align, UInt size, 
-                                           VgAllocKind kind )
-{
-   ShadowChunk* sc;
-   Addr         p;
-   UInt         ml_no;
-
-#  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_malloc_shadow ( al %d, sz %d )\n", 
-               count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               align, size );
-#  endif
-
-   vg_assert(align >= 4);
-   if (align == 4)
-      p = (Addr)VG_(malloc)(VG_AR_CLIENT, size);
-   else
-      p = (Addr)VG_(malloc_aligned)(VG_AR_CLIENT, align, size);
-
-   sc        = VG_(malloc)(VG_AR_PRIVATE, sizeof(ShadowChunk));
-   sc->where = VG_(get_ExeContext)(False, tst->m_eip, tst->m_ebp);
-   sc->size  = size;
-   sc->allockind = kind;
-   sc->data  = p;
-   ml_no     = VG_MALLOCLIST_NO(p);
-   sc->next  = vg_malloclist[ml_no];
-   vg_malloclist[ml_no] = sc;
-
-   VGM_(make_writable)(p, size);
-   VGM_(make_noaccess)(p + size, 
-                       VG_AR_CLIENT_REDZONE_SZB);
-   VGM_(make_noaccess)(p - VG_AR_CLIENT_REDZONE_SZB, 
-                       VG_AR_CLIENT_REDZONE_SZB);
-
-   return sc;
-}
-
+/*------------------------------------------------------------*/
+/*--- client_malloc(), etc                                 ---*/
+/*------------------------------------------------------------*/
 
 /* Allocate memory, noticing whether or not we are doing the full
    instrumentation thing. */
-
-void* VG_(client_malloc) ( ThreadState* tst, UInt size, VgAllocKind kind )
+static __inline__
+void* alloc_and_new_mem ( ThreadState* tst, UInt size, UInt alignment,
+                          Bool is_zeroed, VgAllocKind kind )
 {
-   ShadowChunk* sc;
+   Addr p;
 
    VGP_PUSHCC(VgpCliMalloc);
-   client_malloc_init();
-#  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_malloc ( %d, %x )\n", 
-               count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               size, raw_alloc_kind );
-#  endif
 
    vg_cmalloc_n_mallocs ++;
    vg_cmalloc_bs_mallocd += size;
 
-   if (!VG_(clo_instrument)) {
-      VGP_POPCC;
-      return VG_(malloc) ( VG_AR_CLIENT, size );
-   }
+   vg_assert(alignment >= 4);
+   if (alignment == 4)
+      p = (Addr)VG_(arena_malloc)(VG_AR_CLIENT, size);
+   else
+      p = (Addr)VG_(arena_malloc_aligned)(VG_AR_CLIENT, alignment, size);
 
-   sc = client_malloc_shadow ( tst, VG_(clo_alignment), size, kind );
-   VGP_POPCC;
-   return (void*)(sc->data);
+   if (needs_shadow_chunks())
+      addShadowChunk ( tst, p, size, kind );
+
+   VG_TRACK( ban_mem_heap, p-VG_AR_CLIENT_REDZONE_SZB, 
+                           VG_AR_CLIENT_REDZONE_SZB );
+   VG_TRACK( new_mem_heap, p, size, is_zeroed );
+   VG_TRACK( ban_mem_heap, p+size, VG_AR_CLIENT_REDZONE_SZB );
+
+   VGP_POPCC(VgpCliMalloc);
+   return (void*)p;
+}
+
+void* VG_(client_malloc) ( ThreadState* tst, UInt size, VgAllocKind kind )
+{
+   void* p = alloc_and_new_mem ( tst, size, VG_(clo_alignment), 
+                                 /*is_zeroed*/False, kind );
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_malloc ( %d, %x ) = %p\n", 
+               count_malloclists(), 
+               0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+               size, kind, p );
+#  endif
+   return p;
 }
 
 
 void* VG_(client_memalign) ( ThreadState* tst, UInt align, UInt size )
 {
-   ShadowChunk* sc;
-   VGP_PUSHCC(VgpCliMalloc);
-   client_malloc_init();
+   void* p = alloc_and_new_mem ( tst, size, align, 
+                                 /*is_zeroed*/False, Vg_AllocMalloc );
 #  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_memalign ( al %d, sz %d )\n", 
+   VG_(printf)("[m %d, f %d (%d)] client_memalign ( al %d, sz %d ) = %p\n", 
                count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               align, size );
+               0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+               align, size, p );
 #  endif
-
-   vg_cmalloc_n_mallocs ++;
-   vg_cmalloc_bs_mallocd += size;
-
-   if (!VG_(clo_instrument)) {
-      VGP_POPCC;
-      return VG_(malloc_aligned) ( VG_AR_CLIENT, align, size );
-   }
-   sc = client_malloc_shadow ( tst, align, size, Vg_AllocMalloc );
-   VGP_POPCC;
-   return (void*)(sc->data);
+   return p;
 }
 
 
-void VG_(client_free) ( ThreadState* tst, void* ptrV, VgAllocKind kind )
-{
-   ShadowChunk* sc;
-   UInt         ml_no;
-
-   VGP_PUSHCC(VgpCliMalloc);
-   client_malloc_init();
-#  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_free ( %p, %x )\n", 
-               count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               ptrV, raw_alloc_kind );
-#  endif
-
-   vg_cmalloc_n_frees ++;
-
-   if (!VG_(clo_instrument)) {
-      VGP_POPCC;
-      VG_(free) ( VG_AR_CLIENT, ptrV );
-      return;
-   }
-
-   /* first, see if ptrV is one vg_client_malloc gave out. */
-   ml_no = VG_MALLOCLIST_NO(ptrV);
-   vg_mlist_frees++;
-   for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
-      vg_mlist_tries++;
-      if ((Addr)ptrV == sc->data)
-         break;
-   }
-
-   if (sc == NULL) {
-      VG_(record_free_error) ( tst, (Addr)ptrV );
-      VGP_POPCC;
-      return;
-   }
-
-   /* check if its a matching free() / delete / delete [] */
-   if (kind != sc->allockind)
-      VG_(record_freemismatch_error) ( tst, (Addr) ptrV );
-
-   /* Remove the shadow chunk from the mallocd list. */
-   remove_from_malloclist ( ml_no, sc );
-
-   /* Declare it inaccessible. */
-   VGM_(make_noaccess) ( sc->data - VG_AR_CLIENT_REDZONE_SZB, 
-                         sc->size + 2*VG_AR_CLIENT_REDZONE_SZB );
-   VGM_(make_noaccess) ( (Addr)sc, sizeof(ShadowChunk) );
-   sc->where = VG_(get_ExeContext)(False, tst->m_eip, tst->m_ebp);
-
-   /* Put it out of harm's way for a while. */
-   add_to_freed_queue ( sc );
-   VGP_POPCC;
-}
-
-
-
 void* VG_(client_calloc) ( ThreadState* tst, UInt nmemb, UInt size1 )
 {
-   ShadowChunk* sc;
-   Addr         p;
-   UInt         size, i, ml_no;
+   void*        p;
+   UInt         size, i;
 
-   VGP_PUSHCC(VgpCliMalloc);
-   client_malloc_init();
+   size = nmemb * size1;
 
-#  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_calloc ( %d, %d )\n", 
-               count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               nmemb, size1 );
-#  endif
-
-   vg_cmalloc_n_mallocs ++;
-   vg_cmalloc_bs_mallocd += nmemb * size1;
-
-   if (!VG_(clo_instrument)) {
-      VGP_POPCC;
-      return VG_(calloc) ( VG_AR_CLIENT, nmemb, size1 );
-   }
-
-   size      = nmemb * size1;
-   p         = (Addr)VG_(malloc)(VG_AR_CLIENT, size);
-   sc        = VG_(malloc)(VG_AR_PRIVATE, sizeof(ShadowChunk));
-   sc->where = VG_(get_ExeContext)(False, tst->m_eip, tst->m_ebp);
-   sc->size  = size;
-   sc->allockind = Vg_AllocMalloc; /* its a lie - but true. eat this :) */
-   sc->data  = p;
-   ml_no     = VG_MALLOCLIST_NO(p);
-   sc->next  = vg_malloclist[ml_no];
-   vg_malloclist[ml_no] = sc;
-
-   VGM_(make_readable)(p, size);
-   VGM_(make_noaccess)(p + size, 
-                       VG_AR_CLIENT_REDZONE_SZB);
-   VGM_(make_noaccess)(p - VG_AR_CLIENT_REDZONE_SZB, 
-                       VG_AR_CLIENT_REDZONE_SZB);
-
+   p = alloc_and_new_mem ( tst, size, VG_(clo_alignment), 
+                              /*is_zeroed*/True, Vg_AllocMalloc );
+   /* Must zero block for calloc! */
    for (i = 0; i < size; i++) ((UChar*)p)[i] = 0;
 
-   VGP_POPCC;
-   return (void*)p;
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_calloc ( %d, %d ) = %p\n", 
+               count_malloclists(), 
+               0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+               nmemb, size1, p );
+#  endif
+
+   return p;
+}
+
+static
+void die_and_free_mem ( ThreadState* tst, ShadowChunk* sc,
+                        ShadowChunk** prev_chunks_next_ptr )
+{
+   /* Note: ban redzones again -- just in case user de-banned them
+      with a client request... */
+   VG_TRACK( ban_mem_heap, sc->data-VG_AR_CLIENT_REDZONE_SZB, 
+                           VG_AR_CLIENT_REDZONE_SZB );
+   VG_TRACK( die_mem_heap, sc->data, sc->size );
+   VG_TRACK( ban_mem_heap, sc->data+sc->size, VG_AR_CLIENT_REDZONE_SZB );
+
+   /* Remove sc from the malloclist using prev_chunks_next_ptr to
+      avoid repeating the hash table lookup.  Can't remove until at least
+      after free and free_mismatch errors are done because they use
+      describe_addr() which looks for it in malloclist. */
+   *prev_chunks_next_ptr = sc->next;
+
+   if (VG_(needs).alternative_free)
+      SK_(alt_free) ( sc, tst );
+   else
+      VG_(freeShadowChunk) ( sc );
 }
 
 
-void* VG_(client_realloc) ( ThreadState* tst, void* ptrV, UInt size_new )
+void VG_(client_free) ( ThreadState* tst, void* p, VgAllocKind kind )
 {
-   ShadowChunk *sc, *sc_new;
-   UInt         i, ml_no;
+   ShadowChunk*  sc;
+   ShadowChunk** prev_chunks_next_ptr;
 
    VGP_PUSHCC(VgpCliMalloc);
-   client_malloc_init();
 
 #  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_realloc ( %p, %d )\n", 
+   VG_(printf)("[m %d, f %d (%d)] client_free ( %p, %x )\n", 
                count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               ptrV, size_new );
+               0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+               p, kind );
 #  endif
 
    vg_cmalloc_n_frees ++;
-   vg_cmalloc_n_mallocs ++;
-   vg_cmalloc_bs_mallocd += size_new;
 
-   if (!VG_(clo_instrument)) {
-      vg_assert(ptrV != NULL && size_new != 0);
-      VGP_POPCC;
-      return VG_(realloc) ( VG_AR_CLIENT, ptrV, size_new );
-   }
+   if (! needs_shadow_chunks()) {
+      VG_(arena_free) ( VG_AR_CLIENT, p );
 
-   /* First try and find the block. */
-   ml_no = VG_MALLOCLIST_NO(ptrV);
-   for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
-      if ((Addr)ptrV == sc->data)
-         break;
-   }
-  
-   if (sc == NULL) {
-      VG_(record_free_error) ( tst, (Addr)ptrV );
-      /* Perhaps we should keep going regardless. */
-      VGP_POPCC;
-      return NULL;
-   }
-
-   if (sc->allockind != Vg_AllocMalloc) {
-      /* can not realloc a range that was allocated with new or new [] */
-      VG_(record_freemismatch_error) ( tst, (Addr)ptrV );
-      /* but keep going anyway */
-   }
-
-   if (sc->size == size_new) {
-      /* size unchanged */
-      VGP_POPCC;
-      return ptrV;
-   }
-   if (sc->size > size_new) {
-      /* new size is smaller */
-      VGM_(make_noaccess)( sc->data + size_new, 
-                           sc->size - size_new );
-      sc->size = size_new;
-      VGP_POPCC;
-      return ptrV;
    } else {
-      /* new size is bigger */
-      sc_new = client_malloc_shadow ( tst, VG_(clo_alignment), 
-                                      size_new, Vg_AllocMalloc );
-      for (i = 0; i < sc->size; i++)
-         ((UChar*)(sc_new->data))[i] = ((UChar*)(sc->data))[i];
-      VGM_(copy_address_range_perms) ( 
-         sc->data, sc_new->data, sc->size );
-      remove_from_malloclist ( VG_MALLOCLIST_NO(sc->data), sc );
-      VGM_(make_noaccess) ( sc->data - VG_AR_CLIENT_REDZONE_SZB, 
-                            sc->size + 2*VG_AR_CLIENT_REDZONE_SZB );
-      VGM_(make_noaccess) ( (Addr)sc, sizeof(ShadowChunk) );
-      add_to_freed_queue ( sc );
-      VGP_POPCC;
-      return (void*)sc_new->data;
-   }  
+      sc = getShadowChunk ( (Addr)p, &prev_chunks_next_ptr );
+
+      if (sc == NULL) {
+         VG_TRACK( bad_free, tst, (Addr)p );
+         VGP_POPCC(VgpCliMalloc);
+         return;
+      }
+
+      /* check if its a matching free() / delete / delete [] */
+      if (kind != sc->allockind)
+         VG_TRACK( mismatched_free, tst, (Addr)p );
+
+      die_and_free_mem ( tst, sc, prev_chunks_next_ptr );
+   } 
+   VGP_POPCC(VgpCliMalloc);
 }
 
 
-void VG_(clientmalloc_done) ( void )
+void* VG_(client_realloc) ( ThreadState* tst, void* p, UInt new_size )
+{
+   ShadowChunk  *sc;
+   ShadowChunk **prev_chunks_next_ptr;
+   UInt          i;
+
+   VGP_PUSHCC(VgpCliMalloc);
+
+   vg_cmalloc_n_frees ++;
+   vg_cmalloc_n_mallocs ++;
+   vg_cmalloc_bs_mallocd += new_size;
+
+   if (! needs_shadow_chunks()) {
+      vg_assert(p != NULL && new_size != 0);
+      p = VG_(arena_realloc) ( VG_AR_CLIENT, p, VG_(clo_alignment), 
+                               new_size );
+      VGP_POPCC(VgpCliMalloc);
+      return p;
+
+   } else {
+      /* First try and find the block. */
+      sc = getShadowChunk ( (Addr)p, &prev_chunks_next_ptr );
+
+      if (sc == NULL) {
+         VG_TRACK( bad_free, tst, (Addr)p );
+         /* Perhaps we should return to the program regardless. */
+         VGP_POPCC(VgpCliMalloc);
+         return NULL;
+      }
+     
+      /* check if its a matching free() / delete / delete [] */
+      if (Vg_AllocMalloc != sc->allockind) {
+         /* can not realloc a range that was allocated with new or new [] */
+         VG_TRACK( mismatched_free, tst, (Addr)p );
+         /* but keep going anyway */
+      }
+
+      if (sc->size == new_size) {
+         /* size unchanged */
+         VGP_POPCC(VgpCliMalloc);
+         return p;
+         
+      } else if (sc->size > new_size) {
+         /* new size is smaller */
+         VG_TRACK( die_mem_heap, sc->data+new_size, sc->size-new_size );
+         sc->size = new_size;
+         VGP_POPCC(VgpCliMalloc);
+#        ifdef DEBUG_CLIENTMALLOC
+         VG_(printf)("[m %d, f %d (%d)] client_realloc_smaller ( %p, %d ) = %p\n", 
+                     count_malloclists(), 
+                     0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+                     p, new_size, p );
+#        endif
+         return p;
+
+      } else {
+         /* new size is bigger */
+         Addr p_new;
+         
+         /* Get new memory */
+         vg_assert(VG_(clo_alignment) >= 4);
+         if (VG_(clo_alignment) == 4)
+            p_new = (Addr)VG_(arena_malloc)(VG_AR_CLIENT, new_size);
+         else
+            p_new = (Addr)VG_(arena_malloc_aligned)(VG_AR_CLIENT, 
+                                            VG_(clo_alignment), new_size);
+
+         /* First half kept and copied, second half new, 
+            red zones as normal */
+         VG_TRACK( ban_mem_heap, p_new-VG_AR_CLIENT_REDZONE_SZB, 
+                                 VG_AR_CLIENT_REDZONE_SZB );
+         VG_TRACK( copy_mem_heap, (Addr)p, p_new, sc->size );
+         VG_TRACK( new_mem_heap, p_new+sc->size, new_size-sc->size, 
+                   /*inited=*/False );
+         VG_TRACK( ban_mem_heap, p_new+new_size, VG_AR_CLIENT_REDZONE_SZB );
+
+         /* Copy from old to new */
+         for (i = 0; i < sc->size; i++)
+            ((UChar*)p_new)[i] = ((UChar*)p)[i];
+
+         /* Free old memory */
+         die_and_free_mem ( tst, sc, prev_chunks_next_ptr );
+
+         /* this has to be after die_and_free_mem, otherwise the
+            former succeeds in shorting out the new block, not the
+            old, in the case when both are on the same list.  */
+         addShadowChunk ( tst, p_new, new_size, Vg_AllocMalloc );
+
+         VGP_POPCC(VgpCliMalloc);
+#        ifdef DEBUG_CLIENTMALLOC
+         VG_(printf)("[m %d, f %d (%d)] client_realloc_bigger ( %p, %d ) = %p\n", 
+                     count_malloclists(), 
+                     0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+                     p, new_size, (void*)p_new );
+#        endif
+         return (void*)p_new;
+      }  
+   }
+}
+
+void VG_(print_malloc_stats) ( void )
 {
    UInt         nblocks, nbytes, ml_no;
    ShadowChunk* sc;
 
-   client_malloc_init();
+   if (VG_(clo_verbosity) == 0)
+      return;
+
+   vg_assert(needs_shadow_chunks());
 
    nblocks = nbytes = 0;
 
@@ -500,9 +471,6 @@
       }
    }
 
-   if (VG_(clo_verbosity) == 0)
-     return;
-
    VG_(message)(Vg_UserMsg, 
                 "malloc/free: in use at exit: %d bytes in %d blocks.",
                 nbytes, nblocks);
@@ -510,9 +478,6 @@
                 "malloc/free: %d allocs, %d frees, %d bytes allocated.",
                 vg_cmalloc_n_mallocs,
                 vg_cmalloc_n_frees, vg_cmalloc_bs_mallocd);
-   if (!VG_(clo_leak_check))
-      VG_(message)(Vg_UserMsg, 
-                   "For a detailed leak analysis,  rerun with: --leak-check=yes");
    if (0)
       VG_(message)(Vg_DebugMsg,
                    "free search: %d tries, %d frees", 
@@ -522,58 +487,6 @@
       VG_(message)(Vg_UserMsg, "");
 }
 
-
-/* Describe an address as best you can, for error messages,
-   putting the result in ai. */
-
-void VG_(describe_addr) ( Addr a, AddrInfo* ai )
-{
-   ShadowChunk* sc;
-   UInt         ml_no;
-   Bool         ok;
-   ThreadId     tid;
-
-   /* Perhaps it's a user-def'd block ? */
-   ok = VG_(client_perm_maybe_describe)( a, ai );
-   if (ok)
-      return;
-   /* Perhaps it's on a thread's stack? */
-   tid = VG_(identify_stack_addr)(a);
-   if (tid != VG_INVALID_THREADID) {
-      ai->akind     = Stack;
-      ai->stack_tid = tid;
-      return;
-   }
-   /* Search for a freed block which might bracket it. */
-   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next) {
-      if (sc->data - VG_AR_CLIENT_REDZONE_SZB <= a
-          && a < sc->data + sc->size + VG_AR_CLIENT_REDZONE_SZB) {
-         ai->akind      = Freed;
-         ai->blksize    = sc->size;
-         ai->rwoffset   = (Int)(a) - (Int)(sc->data);
-         ai->lastchange = sc->where;
-         return;
-      }
-   }
-   /* Search for a mallocd block which might bracket it. */
-   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) {
-      for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
-         if (sc->data - VG_AR_CLIENT_REDZONE_SZB <= a
-             && a < sc->data + sc->size + VG_AR_CLIENT_REDZONE_SZB) {
-            ai->akind      = Mallocd;
-            ai->blksize    = sc->size;
-            ai->rwoffset   = (Int)(a) - (Int)(sc->data);
-            ai->lastchange = sc->where;
-            return;
-         }
-      }
-   }
-   /* Clueless ... */
-   ai->akind = Unknown;
-   return;
-}
-
-
 /*--------------------------------------------------------------------*/
 /*--- end                                        vg_clientmalloc.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_constants.h b/coregrind/vg_constants.h
index d3da14b..abf7240 100644
--- a/coregrind/vg_constants.h
+++ b/coregrind/vg_constants.h
@@ -26,30 +26,17 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #ifndef __VG_CONSTANTS_H
 #define __VG_CONSTANTS_H
 
+#include "vg_constants_skin.h"
 
 /* This file is included in all Valgrind source files, including
    assembly ones. */
 
-/* All symbols externally visible from valgrind.so are prefixed
-   as specified here.  The prefix can be changed, so as to avoid
-   namespace conflict problems.
-*/
-#define VGAPPEND(str1,str2) str1##str2
-
-/* These macros should add different prefixes so the same base
-   name can safely be used across different macros. */
-#define VG_(str)    VGAPPEND(vgPlain_,str)
-#define VGM_(str)   VGAPPEND(vgMem_,str)
-#define VGP_(str)   VGAPPEND(vgProf_,str)
-#define VGOFF_(str) VGAPPEND(vgOff_,str)
-
-
 /* Magic values that %ebp might be set to when returning to the
    dispatcher.  The only other legitimate value is to point to the
    start of VG_(baseBlock).  These also are return values from
@@ -59,13 +46,12 @@
    returns to the dispatch loop.  TRC means that this value is a valid
    thread return code, which the dispatch loop may return to the
    scheduler.  */
-#define VG_TRC_EBP_JMP_STKADJ     17 /* EBP only; handled by dispatcher */
 #define VG_TRC_EBP_JMP_SYSCALL    19 /* EBP and TRC */
 #define VG_TRC_EBP_JMP_CLIENTREQ  23 /* EBP and TRC */
 
-#define VG_TRC_INNER_COUNTERZERO  29  /* TRC only; means bb ctr == 0 */
-#define VG_TRC_INNER_FASTMISS     31  /* TRC only; means fast-cache miss. */
-#define VG_TRC_UNRESUMABLE_SIGNAL 37  /* TRC only; got sigsegv/sigbus */
+#define VG_TRC_INNER_FASTMISS     31 /* TRC only; means fast-cache miss. */
+#define VG_TRC_INNER_COUNTERZERO  29 /* TRC only; means bb ctr == 0 */
+#define VG_TRC_UNRESUMABLE_SIGNAL 37 /* TRC only; got sigsegv/sigbus */
 
 
 /* Debugging hack for assembly code ... sigh. */
@@ -93,7 +79,7 @@
 /* Assembly code stubs make this request */
 #define VG_USERREQ__SIGNAL_RETURNS          0x4001
 
-#endif /* ndef __VG_INCLUDE_H */
+#endif /* ndef __VG_CONSTANTS_H */
 
 /*--------------------------------------------------------------------*/
 /*--- end                                           vg_constants.h ---*/
diff --git a/coregrind/vg_default.c b/coregrind/vg_default.c
new file mode 100644
index 0000000..a4b52ea
--- /dev/null
+++ b/coregrind/vg_default.c
@@ -0,0 +1,249 @@
+/*--------------------------------------------------------------------*/
+/*--- Default panicky definitions of template functions that skins ---*/
+/*--- should override.                                             ---*/
+/*---                                                vg_defaults.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+
+/* These functions aren't intended to be run.  Replacement functions used by
+ * the chosen skin are substituted by compiling the skin into a .so and
+ * LD_PRELOADing it.  Nasty :) */
+
+#include "vg_include.h"
+
+/* ---------------------------------------------------------------------
+   Error messages (for malformed skins)
+   ------------------------------------------------------------------ */
+
+/* If the skin fails to define one or more of the required functions,
+ * make it very clear what went wrong! */
+
+static __attribute__ ((noreturn))
+void fund_panic ( Char* fn )
+{
+   VG_(printf)(
+      "\nSkin error:\n"
+      "  The skin you have selected is missing the function `%s',\n"
+      "  which is required.\n\n",
+      fn);
+   VG_(skin_error)("Missing skin function");
+}
+
+static __attribute__ ((noreturn))
+void non_fund_panic ( Char* fn )
+{
+   VG_(printf)(
+      "\nSkin error:\n"
+      "  The skin you have selected is missing the function `%s'\n"
+      "  required by one of its needs.\n\n",
+      fn);
+   VG_(skin_error)("Missing skin function");
+}
+
+/* ---------------------------------------------------------------------
+   Fundamental template functions
+   ------------------------------------------------------------------ */
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* track)
+{
+   fund_panic("SK_(pre_clo_init)");
+}
+
+void SK_(post_clo_init)(void)
+{
+   fund_panic("SK_(post_clo_init)");
+}
+
+UCodeBlock* SK_(instrument)(UCodeBlock* cb, Addr not_used)
+{
+   fund_panic("SK_(instrument)");
+}
+
+void SK_(fini)(void)
+{
+   fund_panic("SK_(fini)");
+}
+
+/* ---------------------------------------------------------------------
+   For error reporting and suppression handling
+   ------------------------------------------------------------------ */
+
+Bool SK_(eq_SkinError)(VgRes res, SkinError* e1, SkinError* e2)
+{
+   non_fund_panic("SK_(eq_SkinError)");
+}
+
+void SK_(pp_SkinError)(SkinError* ec, void (*pp_ExeContext)(void))
+{
+   non_fund_panic("SK_(pp_SkinError)");
+}
+
+void SK_(dup_extra_and_update)(SkinError* ec)
+{
+   non_fund_panic("SK_(dup_extra_and_update)");
+}
+
+Bool SK_(recognised_suppression)(Char* name, SuppKind* skind)
+{
+   non_fund_panic("SK_(recognised_suppression)");
+}
+
+Bool SK_(read_extra_suppression_info)(Int fd, Char* buf, 
+                                       Int nBuf, SkinSupp *s)
+{
+   non_fund_panic("SK_(read_extra_suppression_info)");
+}
+
+Bool SK_(error_matches_suppression)(SkinError* ec, SkinSupp* su)
+{
+   non_fund_panic("SK_(error_matches_suppression)");
+}
+
+
+/* ---------------------------------------------------------------------
+   For throwing out basic block level info when code is invalidated
+   ------------------------------------------------------------------ */
+
+void SK_(discard_basic_block_info)(Addr a, UInt size)
+{
+   non_fund_panic("SK_(discard_basic_block_info)");
+}
+
+
+/* ---------------------------------------------------------------------
+   For throwing out basic block level info when code is invalidated
+   ------------------------------------------------------------------ */
+
+void SK_(written_shadow_regs_values)(UInt* gen_reg, UInt* eflags)
+{
+   non_fund_panic("SK_(written_shadow_regs_values)");
+}
+
+
+/* ---------------------------------------------------------------------
+   Command line arg template function
+   ------------------------------------------------------------------ */
+
+Bool SK_(process_cmd_line_option)(Char* argv)
+{
+   non_fund_panic("SK_(process_cmd_line_option)");
+}
+
+Char* SK_(usage)(void)
+{
+   non_fund_panic("SK_(usage)");
+}
+
+/* ---------------------------------------------------------------------
+   Client request template function
+   ------------------------------------------------------------------ */
+
+UInt SK_(handle_client_request)(ThreadState* tst, UInt* arg_block)
+{
+   non_fund_panic("SK_(handle_client_request)");
+}
+
+/* ---------------------------------------------------------------------
+   UCode extension
+   ------------------------------------------------------------------ */
+
+void SK_(emitExtUInstr)(UInstr* u, RRegSet regs_live_before)
+{
+   non_fund_panic("SK_(emitExtUInstr)");
+}
+
+Bool SK_(saneExtUInstr)(Bool beforeRA, Bool beforeLiveness, UInstr* u)
+{
+   non_fund_panic("SK_(saneExtUInstr)");
+}
+
+Char* SK_(nameExtUOpcode)(Opcode opc)
+{
+   non_fund_panic("SK_(nameExtUOpcode)");
+}
+
+void SK_(ppExtUInstr)(UInstr* u)
+{
+   non_fund_panic("SK_(ppExtUInstr)");
+}
+
+Int SK_(getExtRegUsage)(UInstr* u, Tag tag, RegUse* arr)
+{
+   non_fund_panic("SK_(getExtTempUsage)");
+}
+
+/* ---------------------------------------------------------------------
+   Syscall wrapping
+   ------------------------------------------------------------------ */
+
+void* SK_(pre_syscall)(ThreadId tid, UInt syscallno, Bool is_blocking)
+{
+   non_fund_panic("SK_(pre_syscall)");
+}
+
+void  SK_(post_syscall)(ThreadId tid, UInt syscallno,
+                         void* pre_result, Int res, Bool is_blocking)
+{
+   non_fund_panic("SK_(post_syscall)");
+}
+
+/* ---------------------------------------------------------------------
+   Shadow chunks
+   ------------------------------------------------------------------ */
+
+void SK_(complete_shadow_chunk)( ShadowChunk* sc, ThreadState* tst )
+{
+   non_fund_panic("SK_(complete_shadow_chunk)");
+}
+
+/* ---------------------------------------------------------------------
+   Alternative free()
+   ------------------------------------------------------------------ */
+
+void SK_(alt_free) ( ShadowChunk* sc, ThreadState* tst )
+{
+   non_fund_panic("SK_(alt_free)");
+}
+
+/* ---------------------------------------------------------------------
+   Sanity checks
+   ------------------------------------------------------------------ */
+
+Bool SK_(cheap_sanity_check)(void)
+{
+   non_fund_panic("SK_(cheap_sanity_check)");
+}
+
+Bool SK_(expensive_sanity_check)(void)
+{
+   non_fund_panic("SK_(expensive_sanity_check)");
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_defaults.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_demangle.c b/coregrind/vg_demangle.c
index f07f7f3..6dff76f 100644
--- a/coregrind/vg_demangle.c
+++ b/coregrind/vg_demangle.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -49,12 +49,14 @@
    Int   n_result  = 0;
    Char* demangled = NULL;
 
+   VGP_PUSHCC(VgpDemangle);
+
    if (VG_(clo_demangle))
       demangled = VG_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS );
 
    if (demangled) {
       ADD_TO_RESULT(demangled, VG_(strlen)(demangled));
-      VG_(free) (VG_AR_DEMANGLE, demangled);
+      VG_(arena_free) (VG_AR_DEMANGLE, demangled);
    } else {
       ADD_TO_RESULT(orig, VG_(strlen)(orig));
    }
@@ -65,6 +67,8 @@
    vg_assert(VG_(is_empty_arena)(VG_AR_DEMANGLE));
 
    /* VG_(show_all_arena_stats)(); */
+
+   VGP_POPCC(VgpDemangle);
 }
 
 
diff --git a/coregrind/vg_dispatch.S b/coregrind/vg_dispatch.S
index bd1c5b9..7cdb209 100644
--- a/coregrind/vg_dispatch.S
+++ b/coregrind/vg_dispatch.S
@@ -1,8 +1,8 @@
 
-##--------------------------------------------------------------------##
-##--- The core dispatch loop, for jumping to a code address.       ---##
-##---                                                vg_dispatch.S ---##
-##--------------------------------------------------------------------##
+/*--------------------------------------------------------------------*/
+/*--- The core dispatch loop, for jumping to a code address.       ---*/
+/*---                                                vg_dispatch.S ---*/
+/*--------------------------------------------------------------------*/
 
 /*
   This file is part of Valgrind, an x86 protected-mode emulator 
@@ -26,7 +26,7 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.
 
-  The GNU General Public License is contained in the file LICENSE.
+  The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_constants.h"
@@ -59,9 +59,9 @@
 	
 .globl VG_(run_innerloop)
 VG_(run_innerloop):
-	#OYNK(1000)
+	/* OYNK(1000) */
 
-	# ----- entry point to VG_(run_innerloop) -----
+	/* ----- entry point to VG_(run_innerloop) ----- */
 	pushl	%ebx
 	pushl	%ecx
 	pushl	%edx
@@ -69,74 +69,98 @@
 	pushl	%edi
 	pushl	%ebp
 
-	# Set up the baseBlock pointer
+	/* Set up the baseBlock pointer */
 	movl	$VG_(baseBlock), %ebp
 
-	# fetch m_eip into %eax
+	/* fetch m_eip into %eax */
 	movl	VGOFF_(m_eip), %esi
 	movl	(%ebp, %esi, 4), %eax
 	
-	# Start off dispatching paranoically, since we no longer have
-	# any indication whether or not this might be a special call/ret
-	# transfer.
-	jmp	dispatch_stkadj
-	
-	
 dispatch_main:
-	# Jump here to do a new dispatch.
-	# %eax holds destination (original) address.
-	# %ebp indicates further details of the control transfer
-	# requested to the address in %eax.
-	#
-	# If ebp == & VG_(baseBlock), just jump next to %eax.
-	# 
-	# If ebp == VG_EBP_JMP_SYSCALL, do a system call before 
-	# continuing at eax.
-	#
-	# If ebp == VG_EBP_JMP_CLIENTREQ, do a client request before 
-	# continuing at eax.
-	#
-	# If %ebp has any other value, we panic.
+	/* Jump here to do a new dispatch.
+	   %eax holds destination (original) address.
+	   %ebp indicates further details of the control transfer
+	   requested to the address in %eax.
 	
+	   If ebp == & VG_(baseBlock), just jump next to %eax.
+	 
+	   If ebp == VG_EBP_JMP_SYSCALL, do a system call before 
+	   continuing at eax.
+	
+	   If ebp == VG_EBP_JMP_CLIENTREQ, do a client request before 
+	   continuing at eax.
+	
+	   If %ebp has any other value, we panic.
+	*/
+	cmpl	$VG_(baseBlock), %ebp
+	jnz	dispatch_exceptional
+	/* fall into main loop */
+
+
+dispatch_boring:
+	/* save the jump address at VG_(baseBlock)[VGOFF_(m_eip)] */
+	movl	VGOFF_(m_eip), %esi
+	movl	%eax, (%ebp, %esi, 4)
+	/* Are we out of timeslice?  If yes, defer to scheduler. */
+	decl	VG_(dispatch_ctr)
+	jz	counter_is_zero
+	/* try a fast lookup in the translation cache */
+	movl	%eax, %ebx
+	andl	$VG_TT_FAST_MASK, %ebx	
+	/* ebx = tt_fast index */
+	movl	VG_(tt_fast)(,%ebx,4), %ebx	
+	/* ebx points at a tt entry
+	   now compare target with the tte.orig_addr field (+0) */
+	cmpl	%eax, (%ebx)
+	jnz	fast_lookup_failed
+#if 1
+	/* Found a match.  Set the tte.mru_epoch field (+8)
+	   and call the tte.trans_addr field (+4) */
+	movl	VG_(current_epoch), %ecx
+	movl	%ecx, 8(%ebx)
+#endif
+	call	*4(%ebx)
 	cmpl	$VG_(baseBlock), %ebp
 	jnz	dispatch_exceptional
 
-dispatch_boring:
-	# save the jump address at VG_(baseBlock)[VGOFF_(m_eip)],
+dispatch_boring_unroll2:
+	/* save the jump address at VG_(baseBlock)[VGOFF_(m_eip)] */
 	movl	VGOFF_(m_eip), %esi
 	movl	%eax, (%ebp, %esi, 4)
-	
-	# do a timeslice check.
-	# are we out of timeslice?  If yes, defer to scheduler.
-	#OYNK(1001)
+#if 1
+	/* Are we out of timeslice?  If yes, defer to scheduler. */
 	decl	VG_(dispatch_ctr)
 	jz	counter_is_zero
-
-	#OYNK(1002)
-	# try a fast lookup in the translation cache
+#endif
+	/* try a fast lookup in the translation cache */
 	movl	%eax, %ebx
 	andl	$VG_TT_FAST_MASK, %ebx	
-	# ebx = tt_fast index
+	/* ebx = tt_fast index */
 	movl	VG_(tt_fast)(,%ebx,4), %ebx	
-	# ebx points at a tt entry
-	# now compare target with the tte.orig_addr field (+0)
+	/* ebx points at a tt entry
+	   now compare target with the tte.orig_addr field (+0) */
 	cmpl	%eax, (%ebx)
 	jnz	fast_lookup_failed
-
-	# Found a match.  Set the tte.mru_epoch field (+8)
-	# and call the tte.trans_addr field (+4)
+#if 1
+	/* Found a match.  Set the tte.mru_epoch field (+8)
+	   and call the tte.trans_addr field (+4) */
 	movl	VG_(current_epoch), %ecx
 	movl	%ecx, 8(%ebx)
+#endif
 	call	*4(%ebx)
-	jmp	dispatch_main
+	cmpl	$VG_(baseBlock), %ebp
+	jz	dispatch_boring
+
+	jmp	dispatch_exceptional
+
 	
 fast_lookup_failed:
-	# %EIP is up to date here since dispatch_boring dominates
+	/* %EIP is up to date here since dispatch_boring dominates */
 	movl	$VG_TRC_INNER_FASTMISS, %eax
 	jmp	run_innerloop_exit
 
 counter_is_zero:
-	# %EIP is up to date here since dispatch_boring dominates
+	/* %EIP is up to date here since dispatch_boring dominates */
 	movl	$VG_TRC_INNER_COUNTERZERO, %eax
 	jmp	run_innerloop_exit
 	
@@ -155,21 +179,19 @@
    make it look cleaner. 
 */
 dispatch_exceptional:
-	# this is jumped to only, not fallen-through from above
-	cmpl	$VG_TRC_EBP_JMP_STKADJ, %ebp
-	jz	dispatch_stkadj
+	/* this is jumped to only, not fallen-through from above */
 	cmpl	$VG_TRC_EBP_JMP_SYSCALL, %ebp
 	jz	dispatch_syscall
 	cmpl	$VG_TRC_EBP_JMP_CLIENTREQ, %ebp
 	jz	dispatch_clientreq
 
-	# ebp has an invalid value ... crap out.
+	/* ebp has an invalid value ... crap out. */
 	pushl	$panic_msg_ebp
 	call	VG_(panic)
-	#	(never returns)
+	/* (never returns) */
 
 dispatch_syscall:
-	# save %eax in %EIP and defer to sched
+	/* save %eax in %EIP and defer to sched */
 	movl	$VG_(baseBlock), %ebp
 	movl	VGOFF_(m_eip), %esi
 	movl	%eax, (%ebp, %esi, 4)
@@ -177,29 +199,13 @@
 	jmp	run_innerloop_exit
 	
 dispatch_clientreq:
-	# save %eax in %EIP and defer to sched
+	/* save %eax in %EIP and defer to sched */
 	movl	$VG_(baseBlock), %ebp
 	movl	VGOFF_(m_eip), %esi
 	movl	%eax, (%ebp, %esi, 4)
 	movl	$VG_TRC_EBP_JMP_CLIENTREQ, %eax
 	jmp	run_innerloop_exit
 
-dispatch_stkadj:
-	# save %eax in %EIP
-	movl	$VG_(baseBlock), %ebp
-	movl	VGOFF_(m_eip), %esi
-	movl	%eax, (%ebp, %esi, 4)
-
-	# see if we need to mess with stack blocks
-	pushl	%eax
-	call	VG_(delete_client_stack_blocks_following_ESP_change)
-	popl	%eax
-	movl	$VG_(baseBlock), %ebp
-		
-	# ok, its not interesting.  Handle the normal way.
-	jmp	dispatch_boring
-
-
 .data
 panic_msg_ebp:
 .ascii	"vg_dispatch: %ebp has invalid value!"
@@ -207,6 +213,6 @@
 .text	
 
 
-##--------------------------------------------------------------------##
-##--- end                                            vg_dispatch.S ---##
-##--------------------------------------------------------------------##
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_dispatch.S ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_dummy_profile.c b/coregrind/vg_dummy_profile.c
new file mode 100644
index 0000000..2f869c9
--- /dev/null
+++ b/coregrind/vg_dummy_profile.c
@@ -0,0 +1,67 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Dummy profiling machinery -- overridden by skins when they   ---*/
+/*--- want profiling.                                              ---*/
+/*---                                           vg_dummy_profile.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_include.h"
+
+
+void VGP_(register_profile_event) ( Int n, Char* name )
+{
+}
+
+void VGP_(init_profiling) ( void )
+{
+   VG_(printf)(
+      "\nProfiling error:\n"
+      "  The --profile=yes option was specified, but the skin\n"
+      "  wasn't built for profiling.  #include \"vg_profile.c\"\n"
+      "  into the skin and rebuild to allow profiling.\n\n");
+   VG_(exit)(1);
+}
+
+void VGP_(done_profiling) ( void )
+{
+   VG_(panic)("done_profiling");
+}
+
+void VGP_(pushcc) ( UInt cc )
+{
+   VG_(panic)("pushcc");
+}
+
+void VGP_(popcc) ( UInt cc )
+{
+   VG_(panic)("popcc");
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                       vg_dummy_profile.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_errcontext.c b/coregrind/vg_errcontext.c
index 46838b6..f38ade6 100644
--- a/coregrind/vg_errcontext.c
+++ b/coregrind/vg_errcontext.c
@@ -25,147 +25,22 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
-
 
 /*------------------------------------------------------------*/
-/*--- Defns                                                ---*/
+/*--- Globals                                              ---*/
 /*------------------------------------------------------------*/
 
-/* Suppression is a type describing an error which we want to
-   suppress, ie, not show the user, usually because it is caused by a
-   problem in a library which we can't fix, replace or work around.
-   Suppressions are read from a file at startup time, specified by
-   vg_clo_suppressions, and placed in the vg_suppressions list.  This
-   gives flexibility so that new suppressions can be added to the file
-   as and when needed. 
-*/
-typedef 
-   enum { 
-      /* Bad syscall params */
-      Param, 
-      /* Use of invalid values of given size */
-      Value0, Value1, Value2, Value4, Value8, 
-      /* Invalid read/write attempt at given size */
-      Addr1, Addr2, Addr4, Addr8,
-      /* Invalid or mismatching free */
-      FreeS,
-      /* Pthreading error */
-      PThread
-   } 
-   SuppressionKind;
-
-
-/* For each caller specified for a suppression, record the nature of
-   the caller name. */
-typedef
-   enum { 
-      /* Name is of an shared object file. */
-      ObjName,
-      /* Name is of a function. */
-      FunName 
-   }
-   SuppressionLocTy;
-
-
-/* A complete suppression record. */
-typedef
-   struct _Suppression {
-      struct _Suppression* next;
-      /* The number of times this error has been suppressed. */
-      Int count;
-      /* The name by which the suppression is referred to. */
-      Char* sname;
-      /* What kind of suppression. */
-      SuppressionKind skind;
-      /* Name of syscall param if skind==Param */
-      Char* param;
-      /* Name of fn where err occurs, and immediate caller (mandatory). */
-      SuppressionLocTy caller0_ty;
-      Char*            caller0;
-      SuppressionLocTy caller1_ty;
-      Char*            caller1;
-      /* Optional extra callers. */
-      SuppressionLocTy caller2_ty;
-      Char*            caller2;
-      SuppressionLocTy caller3_ty;
-      Char*            caller3;
-   } 
-   Suppression;
-
-
-/* ErrContext is a type for recording just enough info to generate an
-   error report for an illegal memory access.  The idea is that
-   (typically) the same few points in the program generate thousands
-   of illegal accesses, and we don't want to spew out a fresh error
-   message for each one.  Instead, we use these structures to common
-   up duplicates.  
-*/
-
-/* What kind of error it is. */
-typedef 
-   enum { ValueErr, AddrErr, 
-          ParamErr, UserErr, /* behaves like an anonymous ParamErr */
-          FreeErr, FreeMismatchErr,
-          PThreadErr /* pthread API error */
-   }
-   ErrKind;
-
-/* What kind of memory access is involved in the error? */
-typedef
-   enum { ReadAxs, WriteAxs, ExecAxs }
-   AxsKind;
-
-/* Top-level struct for recording errors. */
-typedef
-   struct _ErrContext {
-      /* ALL */
-      struct _ErrContext* next;
-      /* ALL */
-      /* NULL if unsuppressed; or ptr to suppression record. */
-      Suppression* supp;
-      /* ALL */
-      Int count;
-      /* ALL */
-      ErrKind ekind;
-      /* ALL */
-      ExeContext* where;
-      /* Addr */
-      AxsKind axskind;
-      /* Addr, Value */
-      Int size;
-      /* Addr, Free, Param, User */
-      Addr addr;
-      /* Addr, Free, Param, User */
-      AddrInfo addrinfo;
-      /* Param; hijacked for PThread as a description */
-      Char* syscall_param;
-      /* Param, User */
-      Bool isWriteableLack;
-      /* ALL */
-      ThreadId tid;
-      /* ALL */
-      /* These record %EIP, %ESP and %EBP at the error point.  They
-         are only used to make GDB-attaching convenient; there is no
-         other purpose; specifically they are not used to do
-         comparisons between errors. */
-      UInt m_eip;
-      UInt m_esp;
-      UInt m_ebp;
-   } 
-   ErrContext;
-
 /* The list of error contexts found, both suppressed and unsuppressed.
    Initially empty, and grows as errors are detected. */
-static ErrContext* vg_err_contexts = NULL;
+static CoreError* vg_errors = NULL;
 
 /* The list of suppression directives, as read from the specified
    suppressions file. */
-static Suppression* vg_suppressions = NULL;
+static CoreSupp* vg_suppressions = NULL;
 
 /* Running count of unsuppressed errors detected. */
 static UInt vg_n_errs_found = 0;
@@ -173,265 +48,76 @@
 /* Running count of suppressed errors detected. */
 static UInt vg_n_errs_suppressed = 0;
 
-/* Used to disable further error reporting once some huge number of
-   errors have already been logged. */
-static Bool vg_ignore_errors = False;
-
 /* forwards ... */
-static Suppression* is_suppressible_error ( ErrContext* ec );
+static CoreSupp* is_suppressible_error ( CoreError* err );
 
 
 /*------------------------------------------------------------*/
 /*--- Helper fns                                           ---*/
 /*------------------------------------------------------------*/
 
-
-static void clear_AddrInfo ( AddrInfo* ai )
-{
-   ai->akind      = Unknown;
-   ai->blksize    = 0;
-   ai->rwoffset   = 0;
-   ai->lastchange = NULL;
-   ai->stack_tid  = VG_INVALID_THREADID;
-   ai->maybe_gcc  = False;
-}
-
-static void clear_ErrContext ( ErrContext* ec )
-{
-   ec->next    = NULL;
-   ec->supp    = NULL;
-   ec->count   = 0;
-   ec->ekind   = ValueErr;
-   ec->where   = NULL;
-   ec->axskind = ReadAxs;
-   ec->size    = 0;
-   ec->addr    = 0;
-   clear_AddrInfo ( &ec->addrinfo );
-   ec->syscall_param   = NULL;
-   ec->isWriteableLack = False;
-   ec->m_eip   = 0xDEADB00F;
-   ec->m_esp   = 0xDEADBE0F;
-   ec->m_ebp   = 0xDEADB0EF;
-   ec->tid     = VG_INVALID_THREADID;
-}
-
-
-static __inline__
-Bool vg_eq_ExeContext ( Bool top_2_only,
-                        ExeContext* e1, ExeContext* e2 )
-{
-   /* Note that frames after the 4th are always ignored. */
-   if (top_2_only) {
-      return VG_(eq_ExeContext_top2(e1, e2));
-   } else {
-      return VG_(eq_ExeContext_top4(e1, e2));
-   }
-}
-
-
-static Bool eq_AddrInfo ( Bool cheap_addr_cmp,
-                          AddrInfo* ai1, AddrInfo* ai2 )
-{
-   if (ai1->akind != Undescribed 
-       && ai2->akind != Undescribed
-       && ai1->akind != ai2->akind) 
-      return False;
-   if (ai1->akind == Freed || ai1->akind == Mallocd) {
-      if (ai1->blksize != ai2->blksize)
-         return False;
-      if (!vg_eq_ExeContext(cheap_addr_cmp, 
-                            ai1->lastchange, ai2->lastchange))
-         return False;
-   }
-   return True;
-}
-
 /* Compare error contexts, to detect duplicates.  Note that if they
    are otherwise the same, the faulting addrs and associated rwoffsets
    are allowed to be different.  */
-
-static Bool eq_ErrContext ( Bool cheap_addr_cmp,
-                            ErrContext* e1, ErrContext* e2 )
+static Bool eq_CoreError ( VgRes res, CoreError* e1, CoreError* e2 )
 {
-   if (e1->ekind != e2->ekind) 
+   if (e1->skin_err.ekind != e2->skin_err.ekind) 
       return False;
-   if (!vg_eq_ExeContext(cheap_addr_cmp, e1->where, e2->where))
+   if (!VG_(eq_ExeContext)(res, e1->where, e2->where))
       return False;
 
-   switch (e1->ekind) {
+   switch (e1->skin_err.ekind) {
       case PThreadErr:
-         if (e1->syscall_param == e2->syscall_param) 
+         vg_assert(VG_(needs).core_errors);
+         if (e1->skin_err.string == e2->skin_err.string) 
             return True;
-         if (0 == VG_(strcmp)(e1->syscall_param, e2->syscall_param))
+         if (0 == VG_(strcmp)(e1->skin_err.string, e2->skin_err.string))
             return True;
          return False;
-      case UserErr:
-      case ParamErr:
-         if (e1->isWriteableLack != e2->isWriteableLack) return False;
-         if (e1->ekind == ParamErr 
-             && 0 != VG_(strcmp)(e1->syscall_param, e2->syscall_param))
-            return False;
-         return True;
-      case FreeErr:
-      case FreeMismatchErr:
-         if (e1->addr != e2->addr) return False;
-         if (!eq_AddrInfo(cheap_addr_cmp, &e1->addrinfo, &e2->addrinfo)) 
-            return False;
-         return True;
-      case AddrErr:
-         if (e1->axskind != e2->axskind) return False;
-         if (e1->size != e2->size) return False;
-         if (!eq_AddrInfo(cheap_addr_cmp, &e1->addrinfo, &e2->addrinfo)) 
-            return False;
-         return True;
-      case ValueErr:
-         if (e1->size != e2->size) return False;
-         return True;
       default: 
-         VG_(panic)("eq_ErrContext");
+         if (VG_(needs).skin_errors)
+            return SK_(eq_SkinError)(res, &e1->skin_err, &e2->skin_err);
+         else {
+            VG_(printf)("\nUnhandled error type: %u. VG_(needs).skin_errors\n"
+                        "probably needs to be set.\n",
+                        e1->skin_err.ekind);
+            VG_(skin_error)("unhandled error type");
+         }
    }
 }
 
-static void pp_AddrInfo ( Addr a, AddrInfo* ai )
+static void pp_CoreError ( CoreError* err, Bool printCount )
 {
-   switch (ai->akind) {
-      case Stack: 
-         VG_(message)(Vg_UserMsg, 
-                      "   Address 0x%x is on thread %d's stack", 
-                      a, ai->stack_tid);
-         break;
-      case Unknown:
-         if (ai->maybe_gcc) {
-            VG_(message)(Vg_UserMsg, 
-               "   Address 0x%x is just below %%esp.  Possibly a bug in GCC/G++",
-               a);
-            VG_(message)(Vg_UserMsg, 
-               "   v 2.96 or 3.0.X.  To suppress, use: --workaround-gcc296-bugs=yes");
-	 } else {
-            VG_(message)(Vg_UserMsg, 
-               "   Address 0x%x is not stack'd, malloc'd or free'd", a);
-         }
-         break;
-      case Freed: case Mallocd: case UserG: case UserS: {
-         UInt delta;
-         UChar* relative;
-         if (ai->rwoffset < 0) {
-            delta    = (UInt)(- ai->rwoffset);
-            relative = "before";
-         } else if (ai->rwoffset >= ai->blksize) {
-            delta    = ai->rwoffset - ai->blksize;
-            relative = "after";
-         } else {
-            delta    = ai->rwoffset;
-            relative = "inside";
-         }
-         if (ai->akind == UserS) {
-            VG_(message)(Vg_UserMsg, 
-               "   Address 0x%x is %d bytes %s a %d-byte stack red-zone created",
-               a, delta, relative, 
-               ai->blksize );
-	 } else {
-            VG_(message)(Vg_UserMsg, 
-               "   Address 0x%x is %d bytes %s a block of size %d %s",
-               a, delta, relative, 
-               ai->blksize,
-               ai->akind==Mallocd ? "alloc'd" 
-                  : ai->akind==Freed ? "free'd" 
-                                     : "client-defined");
-         }
-         VG_(pp_ExeContext)(ai->lastchange);
-         break;
-      }
-      default:
-         VG_(panic)("pp_AddrInfo");
+   /* Closure for printing where the error occurred.  Abstracts details
+      about the `where' field away from the skin. */
+   void pp_ExeContextClosure(void)
+   {
+      VG_(pp_ExeContext) ( err->where );
    }
-}
-
-static void pp_ErrContext ( ErrContext* ec, Bool printCount )
-{
+   
    if (printCount)
-      VG_(message)(Vg_UserMsg, "Observed %d times:", ec->count );
-   if (ec->tid > 1)
-      VG_(message)(Vg_UserMsg, "Thread %d:", ec->tid );
-   switch (ec->ekind) {
-      case ValueErr:
-         if (ec->size == 0) {
-             VG_(message)(
-                Vg_UserMsg,
-                "Conditional jump or move depends on uninitialised value(s)");
-         } else {
-             VG_(message)(Vg_UserMsg,
-                          "Use of uninitialised value of size %d",
-                          ec->size);
-         }
-         VG_(pp_ExeContext)(ec->where);
-         break;
-      case AddrErr:
-         switch (ec->axskind) {
-            case ReadAxs:
-               VG_(message)(Vg_UserMsg, "Invalid read of size %d", 
-                                        ec->size ); 
-               break;
-            case WriteAxs:
-               VG_(message)(Vg_UserMsg, "Invalid write of size %d", 
-                                        ec->size ); 
-               break;
-            case ExecAxs:
-               VG_(message)(Vg_UserMsg, "Jump to the invalid address "
-                                        "stated on the next line");
-               break;
-            default: 
-               VG_(panic)("pp_ErrContext(axskind)");
-         }
-         VG_(pp_ExeContext)(ec->where);
-         pp_AddrInfo(ec->addr, &ec->addrinfo);
-         break;
-      case FreeErr:
-         VG_(message)(Vg_UserMsg,"Invalid free() / delete / delete[]");
-         /* fall through */
-      case FreeMismatchErr:
-         if (ec->ekind == FreeMismatchErr)
-            VG_(message)(Vg_UserMsg, 
-                         "Mismatched free() / delete / delete []");
-         VG_(pp_ExeContext)(ec->where);
-         pp_AddrInfo(ec->addr, &ec->addrinfo);
-         break;
-      case ParamErr:
-         if (ec->isWriteableLack) {
-            VG_(message)(Vg_UserMsg, 
-               "Syscall param %s contains unaddressable byte(s)",
-                ec->syscall_param );
-         } else {
-            VG_(message)(Vg_UserMsg, 
-                "Syscall param %s contains uninitialised or "
-                "unaddressable byte(s)",
-            ec->syscall_param);
-         }
-         VG_(pp_ExeContext)(ec->where);
-         pp_AddrInfo(ec->addr, &ec->addrinfo);
-         break;
-      case UserErr:
-         if (ec->isWriteableLack) {
-            VG_(message)(Vg_UserMsg, 
-               "Unaddressable byte(s) found during client check request");
-         } else {
-            VG_(message)(Vg_UserMsg, 
-               "Uninitialised or "
-               "unaddressable byte(s) found during client check request");
-         }
-         VG_(pp_ExeContext)(ec->where);
-         pp_AddrInfo(ec->addr, &ec->addrinfo);
-         break;
+      VG_(message)(Vg_UserMsg, "Observed %d times:", err->count );
+   if (err->tid > 1)
+      VG_(message)(Vg_UserMsg, "Thread %d:", err->tid );
+
+   switch (err->skin_err.ekind) {
       case PThreadErr:
-         VG_(message)(Vg_UserMsg, "%s", ec->syscall_param );
-         VG_(pp_ExeContext)(ec->where);
+         vg_assert(VG_(needs).core_errors);
+         VG_(message)(Vg_UserMsg, "%s", err->skin_err.string );
+         VG_(pp_ExeContext)(err->where);
          break;
       default: 
-         VG_(panic)("pp_ErrContext");
+         if (VG_(needs).skin_errors)
+            SK_(pp_SkinError)( &err->skin_err, &pp_ExeContextClosure );
+         else {
+            VG_(printf)("\nUnhandled error type: %u.  VG_(needs).skin_errors\n"
+                        "probably needs to be set?\n",
+                        err->skin_err.ekind);
+            VG_(skin_error)("unhandled error type");
+         }
    }
 }
 
-
 /* Figure out if we want to attach for GDB for this error, possibly
    by asking the user. */
 static
@@ -476,21 +162,69 @@
 }
 
 
-/* Top-level entry point to the error management subsystem.  All
-   detected errors are notified here; this routine decides if/when the
-   user should see the error. */
-static void VG_(maybe_add_context) ( ErrContext* ec )
+/* I've gone all object-oriented... initialisation depends on where the
+   error comes from:
+
+   - If from generated code (tst == NULL), the %EIP/%EBP values that we
+     need in order to create proper error messages are picked up out of
+     VG_(baseBlock) rather than from the thread table (vg_threads in
+     vg_scheduler.c).
+
+   - If not from generated code but in response to requests passed back to
+     the scheduler (tst != NULL), we pick up %EIP/%EBP values from the
+     stored thread state, not from VG_(baseBlock).  
+*/
+static __inline__
+void construct_error ( CoreError* err, ThreadState* tst, 
+                       ErrorKind ekind, Addr a, Char* s, void* extra )
 {
-   ErrContext* p;
-   ErrContext* p_prev;
-   Bool        cheap_addr_cmp         = False;
+   /* CoreError parts */
+   err->next     = NULL;
+   err->supp     = NULL;
+   err->count    = 1;
+   if (NULL == tst) {
+      err->tid   = VG_(get_current_tid)();
+      err->where = 
+         VG_(get_ExeContext2)( VG_(baseBlock)[VGOFF_(m_eip)], 
+                               VG_(baseBlock)[VGOFF_(m_ebp)],
+                               VG_(baseBlock)[VGOFF_(m_esp)],
+                               VG_(threads)[err->tid].stack_highest_word);
+      err->m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
+      err->m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
+      err->m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
+   } else {
+      err->where = VG_(get_ExeContext) ( tst );
+      err->tid   = tst->tid;
+      err->m_eip = tst->m_eip;
+      err->m_esp = tst->m_esp;
+      err->m_ebp = tst->m_ebp;
+   }
+
+   /* SkinError parts */
+   err->skin_err.ekind  = ekind;
+   err->skin_err.addr   = a;
+   err->skin_err.string = s;
+   err->skin_err.extra  = extra;
+
+   /* sanity... */
+   vg_assert(err->tid >= 0 && err->tid < VG_N_THREADS);
+}
+
+/* Top-level entry point to the error management subsystem.
+   All detected errors are notified here; this routine decides if/when the
+   user should see the error. */
+void VG_(maybe_record_error) ( ThreadState* tst, 
+                               ErrorKind ekind, Addr a, Char* s, void* extra )
+{
+   CoreError   err;
+   CoreError*  p;
+   CoreError*  p_prev;
+   VgRes       exe_res                = Vg_MedRes;
    static Bool is_first_shown_context = True;
    static Bool stopping_message       = False;
    static Bool slowdown_message       = False;
    static Int  vg_n_errs_shown        = 0;
 
-   vg_assert(ec->tid >= 0 && ec->tid < VG_N_THREADS);
-
    /* After M_VG_COLLECT_NO_ERRORS_AFTER_SHOWN different errors have
       been found, or M_VG_COLLECT_NO_ERRORS_AFTER_FOUND total errors
       have been found, just refuse to collect any more.  This stops
@@ -520,12 +254,11 @@
          VG_(message)(Vg_UserMsg, 
             "Rerun with --error-limit=no to disable this cutoff.  Note");
          VG_(message)(Vg_UserMsg, 
-            "that your program may now segfault without prior warning from");
+            "that errors may occur in your program without prior warning from");
          VG_(message)(Vg_UserMsg, 
             "Valgrind, because errors are no longer being displayed.");
          VG_(message)(Vg_UserMsg, "");
          stopping_message = True;
-         vg_ignore_errors = True;
       }
       return;
    }
@@ -534,7 +267,7 @@
       been found, be much more conservative about collecting new
       ones. */
    if (vg_n_errs_shown >= M_VG_COLLECT_ERRORS_SLOWLY_AFTER) {
-      cheap_addr_cmp = True;
+      exe_res = Vg_LowRes;
       if (!slowdown_message) {
          VG_(message)(Vg_UserMsg, "");
          VG_(message)(Vg_UserMsg, 
@@ -546,12 +279,14 @@
       }
    }
 
+   /* Build ourselves the error */
+   construct_error ( &err, tst, ekind, a, s, extra );
 
    /* First, see if we've got an error record matching this one. */
-   p      = vg_err_contexts;
+   p      = vg_errors;
    p_prev = NULL;
    while (p != NULL) {
-      if (eq_ErrContext(cheap_addr_cmp, p, ec)) {
+      if (eq_CoreError(exe_res, p, &err)) {
          /* Found it. */
          p->count++;
 	 if (p->supp != NULL) {
@@ -567,8 +302,8 @@
          if (p_prev != NULL) {
             vg_assert(p_prev->next == p);
             p_prev->next    = p->next;
-            p->next         = vg_err_contexts;
-            vg_err_contexts = p;
+            p->next         = vg_errors;
+            vg_errors = p;
 	 }
          return;
       }
@@ -578,27 +313,37 @@
 
    /* Didn't see it.  Copy and add. */
 
-   /* OK, we're really going to collect it.  First, describe any addr
-      info in the error. */
-   if (ec->addrinfo.akind == Undescribed)
-      VG_(describe_addr) ( ec->addr, &ec->addrinfo );
+   /* OK, we're really going to collect it.  First make a copy,
+      because the error context is on the stack and will disappear shortly.
+      We can duplicate the main part ourselves, but use
+      SK_(dup_extra_and_update) to duplicate the 'extra' part (unless it's
+      NULL).
+     
+      SK_(dup_extra_and_update) can also update the SkinError.  This is
+      for when there are more details to fill in which take time to work out
+      but don't affect our earlier decision to include the error -- by
+      postponing those details until now, we avoid the extra work in the
+      case where we ignore the error.
+    */
+   p = VG_(arena_malloc)(VG_AR_ERRORS, sizeof(CoreError));
+   *p = err;
+   if (NULL != err.skin_err.extra)
+      SK_(dup_extra_and_update)(&p->skin_err);
 
-   p = VG_(malloc)(VG_AR_ERRCTXT, sizeof(ErrContext));
-   *p = *ec;
-   p->next = vg_err_contexts;
-   p->supp = is_suppressible_error(ec);
-   vg_err_contexts = p;
+   p->next = vg_errors;
+   p->supp = is_suppressible_error(&err);
+   vg_errors = p;
    if (p->supp == NULL) {
       vg_n_errs_found++;
       if (!is_first_shown_context)
          VG_(message)(Vg_UserMsg, "");
-      pp_ErrContext(p, False);      
+      pp_CoreError(p, False);      
       is_first_shown_context = False;
       vg_n_errs_shown++;
       /* Perhaps we want a GDB attach at this point? */
       if (vg_is_GDB_attach_requested()) {
          VG_(swizzle_esp_then_start_GDB)(
-            ec->m_eip, ec->m_esp, ec->m_ebp);
+            err.m_eip, err.m_esp, err.m_ebp);
       }
    } else {
       vg_n_errs_suppressed++;
@@ -607,202 +352,34 @@
 }
 
 
-
-
 /*------------------------------------------------------------*/
 /*--- Exported fns                                         ---*/
 /*------------------------------------------------------------*/
 
-/* These two are called from generated code, so that the %EIP/%EBP
-   values that we need in order to create proper error messages are
-   picked up out of VG_(baseBlock) rather than from the thread table
-   (vg_threads in vg_scheduler.c). */
+/* These are called not from generated code but from the scheduler */
 
-void VG_(record_value_error) ( Int size )
+void VG_(record_pthread_error) ( ThreadId tid, Char* msg )
 {
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count = 1;
-   ec.next  = NULL;
-   ec.where = VG_(get_ExeContext)( False, VG_(baseBlock)[VGOFF_(m_eip)], 
-                                          VG_(baseBlock)[VGOFF_(m_ebp)] );
-   ec.ekind = ValueErr;
-   ec.size  = size;
-   ec.tid   = VG_(get_current_tid)();
-   ec.m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
-   ec.m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   ec.m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
-   VG_(maybe_add_context) ( &ec );
+   if (! VG_(needs).core_errors) return;
+   VG_(maybe_record_error)( &VG_(threads)[tid], PThreadErr, /*addr*/0, msg, 
+                            /*extra*/NULL );
 }
 
-void VG_(record_address_error) ( Addr a, Int size, Bool isWrite )
-{
-   ErrContext ec;
-   Bool       just_below_esp;
-   if (vg_ignore_errors) return;
-
-   just_below_esp 
-      = VG_(is_just_below_ESP)( VG_(baseBlock)[VGOFF_(m_esp)], a );
-
-   /* If this is caused by an access immediately below %ESP, and the
-      user asks nicely, we just ignore it. */
-   if (VG_(clo_workaround_gcc296_bugs) && just_below_esp)
-      return;
-
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, VG_(baseBlock)[VGOFF_(m_eip)], 
-                                            VG_(baseBlock)[VGOFF_(m_ebp)] );
-   ec.ekind   = AddrErr;
-   ec.axskind = isWrite ? WriteAxs : ReadAxs;
-   ec.size    = size;
-   ec.addr    = a;
-   ec.tid     = VG_(get_current_tid)();
-   ec.m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
-   ec.m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   ec.m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
-   ec.addrinfo.akind     = Undescribed;
-   ec.addrinfo.maybe_gcc = just_below_esp;
-   VG_(maybe_add_context) ( &ec );
-}
-
-
-/* These five are called not from generated code but in response to
-   requests passed back to the scheduler.  So we pick up %EIP/%EBP
-   values from the stored thread state, not from VG_(baseBlock).  */
-
-void VG_(record_free_error) ( ThreadState* tst, Addr a )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp );
-   ec.ekind   = FreeErr;
-   ec.addr    = a;
-   ec.tid     = tst->tid;
-   ec.m_eip   = tst->m_eip;
-   ec.m_esp   = tst->m_esp;
-   ec.m_ebp   = tst->m_ebp;
-   ec.addrinfo.akind = Undescribed;
-   VG_(maybe_add_context) ( &ec );
-}
-
-void VG_(record_freemismatch_error) ( ThreadState* tst, Addr a )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp );
-   ec.ekind   = FreeMismatchErr;
-   ec.addr    = a;
-   ec.tid     = tst->tid;
-   ec.m_eip   = tst->m_eip;
-   ec.m_esp   = tst->m_esp;
-   ec.m_ebp   = tst->m_ebp;
-   ec.addrinfo.akind = Undescribed;
-   VG_(maybe_add_context) ( &ec );
-}
-
-void VG_(record_jump_error) ( ThreadState* tst, Addr a )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp );
-   ec.ekind   = AddrErr;
-   ec.axskind = ExecAxs;
-   ec.addr    = a;
-   ec.tid     = tst->tid;
-   ec.m_eip   = tst->m_eip;
-   ec.m_esp   = tst->m_esp;
-   ec.m_ebp   = tst->m_ebp;
-   ec.addrinfo.akind = Undescribed;
-   VG_(maybe_add_context) ( &ec );
-}
-
-void VG_(record_param_err) ( ThreadState* tst, Addr a, Bool isWriteLack, 
-                             Char* msg )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp );
-   ec.ekind   = ParamErr;
-   ec.addr    = a;
-   ec.tid     = tst->tid;
-   ec.m_eip   = tst->m_eip;
-   ec.m_esp   = tst->m_esp;
-   ec.m_ebp   = tst->m_ebp;
-   ec.addrinfo.akind = Undescribed;
-   ec.syscall_param = msg;
-   ec.isWriteableLack = isWriteLack;
-   VG_(maybe_add_context) ( &ec );
-}
-
-void VG_(record_user_err) ( ThreadState* tst, Addr a, Bool isWriteLack )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp );
-   ec.ekind   = UserErr;
-   ec.addr    = a;
-   ec.tid     = tst->tid;
-   ec.m_eip   = tst->m_eip;
-   ec.m_esp   = tst->m_esp;
-   ec.m_ebp   = tst->m_ebp;
-   ec.addrinfo.akind = Undescribed;
-   ec.isWriteableLack = isWriteLack;
-   VG_(maybe_add_context) ( &ec );
-}
-
-void VG_(record_pthread_err) ( ThreadId tid, Char* msg )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   if (!VG_(clo_instrument)) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, VG_(threads)[tid].m_eip, 
-                                            VG_(threads)[tid].m_ebp );
-   ec.ekind   = PThreadErr;
-   ec.tid     = tid;
-   ec.syscall_param = msg;
-   ec.m_eip   = VG_(threads)[tid].m_eip;
-   ec.m_esp   = VG_(threads)[tid].m_esp;
-   ec.m_ebp   = VG_(threads)[tid].m_ebp;
-   VG_(maybe_add_context) ( &ec );
-}
-
-
 /*------------------------------*/
 
 void VG_(show_all_errors) ( void )
 {
-   Int         i, n_min;
-   Int         n_err_contexts, n_supp_contexts;
-   ErrContext  *p, *p_min;
-   Suppression *su;
-   Bool        any_supp;
+   Int        i, n_min;
+   Int        n_err_contexts, n_supp_contexts;
+   CoreError *p, *p_min;
+   CoreSupp   *su;
+   Bool       any_supp;
 
    if (VG_(clo_verbosity) == 0)
       return;
 
    n_err_contexts = 0;
-   for (p = vg_err_contexts; p != NULL; p = p->next) {
+   for (p = vg_errors; p != NULL; p = p->next) {
       if (p->supp == NULL)
          n_err_contexts++;
    }
@@ -826,20 +403,20 @@
    for (i = 0; i < n_err_contexts; i++) {
       n_min = (1 << 30) - 1;
       p_min = NULL;
-      for (p = vg_err_contexts; p != NULL; p = p->next) {
+      for (p = vg_errors; p != NULL; p = p->next) {
          if (p->supp != NULL) continue;
          if (p->count < n_min) {
             n_min = p->count;
             p_min = p;
          }
       }
-      if (p_min == NULL) VG_(panic)("pp_AllErrContexts");
+      if (p_min == NULL) VG_(panic)("show_all_errors()");
 
       VG_(message)(Vg_UserMsg, "");
       VG_(message)(Vg_UserMsg, "%d errors in context %d of %d:",
                    p_min->count,
                    i+1, n_err_contexts);
-      pp_ErrContext( p_min, False );
+      pp_CoreError( p_min, False );
 
       if ((i+1 == VG_(clo_dump_error))) {
 	VG_(translate) ( 0 /* dummy ThreadId; irrelevant due to below NULLs */,
@@ -855,8 +432,7 @@
    for (su = vg_suppressions; su != NULL; su = su->next) {
       if (su->count > 0) {
          any_supp = True;
-         VG_(message)(Vg_DebugMsg, "supp: %4d %s", su->count, 
-                                   su->sname);
+         VG_(message)(Vg_DebugMsg, "supp: %4d %s", su->count, su->sname);
       }
    }
 
@@ -883,7 +459,7 @@
 
 #define VG_ISSPACE(ch) (((ch)==' ') || ((ch)=='\n') || ((ch)=='\t'))
 
-static Bool getLine ( Int fd, Char* buf, Int nBuf )
+Bool VG_(getLine) ( Int fd, Char* buf, Int nBuf )
 {
    Char ch;
    Int  n, i;
@@ -924,7 +500,7 @@
    (fun: or obj:) part.
    Returns False if failed.
 */
-static Bool setLocationTy ( Char** p_caller, SuppressionLocTy* p_ty )
+static Bool setLocationTy ( Char** p_caller, SuppLocTy* p_ty )
 {
    if (VG_(strncmp)(*p_caller, "fun:", 4) == 0) {
       (*p_caller) += 4;
@@ -948,107 +524,95 @@
 #define STREQ(s1,s2) (s1 != NULL && s2 != NULL \
                       && VG_(strcmp)((s1),(s2))==0)
 
-static Char* copyStr ( Char* str )
-{
-   Int   n, i;
-   Char* str2;
-   n    = VG_(strlen)(str);
-   str2 = VG_(malloc)(VG_AR_PRIVATE, n+1);
-   vg_assert(n > 0);
-   for (i = 0; i < n+1; i++) str2[i] = str[i];
-   return str2;
-}
-
 static void load_one_suppressions_file ( Char* filename )
 {
 #  define N_BUF 200
-   Int  fd;
+   Int  fd, i;
    Bool eof;
+   Bool is_unrecognised_suppressions = False;
    Char buf[N_BUF+1];
-   fd = VG_(open_read)( filename );
+   fd = VG_(open)( filename, VKI_O_RDONLY, 0 );
    if (fd == -1) {
-      VG_(message)(Vg_UserMsg, 
-                   "FATAL: can't open suppressions file `%s'", 
+      VG_(message)(Vg_UserMsg, "FATAL: can't open suppressions file `%s'", 
                    filename );
       VG_(exit)(1);
    }
 
    while (True) {
-      Suppression* supp;
-      supp = VG_(malloc)(VG_AR_PRIVATE, sizeof(Suppression));
+      /* Assign and initialise the two suppression halves (core and skin) */
+      CoreSupp* supp;
+      supp            = VG_(arena_malloc)(VG_AR_CORE, sizeof(CoreSupp));
       supp->count = 0;
-      supp->param = supp->caller0 = supp->caller1 
-                  = supp->caller2 = supp->caller3 = NULL;
+      for (i = 0; i < VG_N_SUPP_CALLERS; i++) supp->caller[i] = NULL;
+      supp->skin_supp.string = supp->skin_supp.extra = NULL;
 
-      eof = getLine ( fd, buf, N_BUF );
+      eof = VG_(getLine) ( fd, buf, N_BUF );
       if (eof) break;
 
       if (!STREQ(buf, "{")) goto syntax_error;
       
-      eof = getLine ( fd, buf, N_BUF );
+      eof = VG_(getLine) ( fd, buf, N_BUF );
       if (eof || STREQ(buf, "}")) goto syntax_error;
-      supp->sname = copyStr(buf);
+      supp->sname = VG_(arena_strdup)(VG_AR_CORE, buf);
 
-      eof = getLine ( fd, buf, N_BUF );
+      eof = VG_(getLine) ( fd, buf, N_BUF );
+
       if (eof) goto syntax_error;
-      else if (STREQ(buf, "Param"))  supp->skind = Param;
-      else if (STREQ(buf, "Value0")) supp->skind = Value0; /* backwards compat */
-      else if (STREQ(buf, "Cond"))   supp->skind = Value0;
-      else if (STREQ(buf, "Value1")) supp->skind = Value1;
-      else if (STREQ(buf, "Value2")) supp->skind = Value2;
-      else if (STREQ(buf, "Value4")) supp->skind = Value4;
-      else if (STREQ(buf, "Value8")) supp->skind = Value8;
-      else if (STREQ(buf, "Addr1"))  supp->skind = Addr1;
-      else if (STREQ(buf, "Addr2"))  supp->skind = Addr2;
-      else if (STREQ(buf, "Addr4"))  supp->skind = Addr4;
-      else if (STREQ(buf, "Addr8"))  supp->skind = Addr8;
-      else if (STREQ(buf, "Free"))   supp->skind = FreeS;
-      else if (STREQ(buf, "PThread")) supp->skind = PThread;
-      else goto syntax_error;
 
-      if (supp->skind == Param) {
-         eof = getLine ( fd, buf, N_BUF );
-         if (eof) goto syntax_error;
-         supp->param = copyStr(buf);
+      /* Is it a core suppression? */
+      else if (VG_(needs).core_errors && STREQ(buf, "PThread")) 
+         supp->skin_supp.skind = PThreadSupp;
+
+      /* Is it a skin suppression? */
+      else if (VG_(needs).skin_errors && 
+               SK_(recognised_suppression)(buf, &(supp->skin_supp.skind))) {
+         /* do nothing, function fills in supp->skin_supp.skind */
+      }
+      //else goto syntax_error;
+      else {
+         /* SSS: if we don't recognise the suppression name, ignore entire
+          * entry.  Not sure if this is a good long-term approach -- makes
+          * it impossible to spot incorrect suppression names?  (apart
+          * from the warning given) */
+         if (! is_unrecognised_suppressions) {
+            is_unrecognised_suppressions = True;
+            VG_(start_msg)(Vg_DebugMsg);
+            VG_(add_to_msg)("Ignoring unrecognised suppressions: ");
+            VG_(add_to_msg)("'%s'", buf);
+         } else {
+            VG_(add_to_msg)(", '%s'", buf);
+         }
+         while (True) {
+            eof = VG_(getLine) ( fd, buf, N_BUF );
+            if (eof) goto syntax_error;
+            if (STREQ(buf, "}"))
+               break;
+         }
+         continue;
       }
 
-      eof = getLine ( fd, buf, N_BUF );
-      if (eof) goto syntax_error;
-      supp->caller0 = copyStr(buf);
-      if (!setLocationTy(&(supp->caller0), &(supp->caller0_ty)))
+      if (VG_(needs).skin_errors && 
+          !SK_(read_extra_suppression_info)(fd, buf, N_BUF, &supp->skin_supp)) 
          goto syntax_error;
 
-      eof = getLine ( fd, buf, N_BUF );
-      if (eof) goto syntax_error;
-      if (!STREQ(buf, "}")) {
-         supp->caller1 = copyStr(buf);
-         if (!setLocationTy(&(supp->caller1), &(supp->caller1_ty)))
-            goto syntax_error;
-      
-         eof = getLine ( fd, buf, N_BUF );
+      /* "i > 0" ensures at least one caller read. */
+      for (i = 0; i < VG_N_SUPP_CALLERS; i++) {
+         eof = VG_(getLine) ( fd, buf, N_BUF );
          if (eof) goto syntax_error;
-         if (!STREQ(buf, "}")) {
-            supp->caller2 = copyStr(buf);
-            if (!setLocationTy(&(supp->caller2), &(supp->caller2_ty)))
-               goto syntax_error;
-
-            eof = getLine ( fd, buf, N_BUF );
-            if (eof) goto syntax_error;
-            if (!STREQ(buf, "}")) {
-               supp->caller3 = copyStr(buf);
-              if (!setLocationTy(&(supp->caller3), &(supp->caller3_ty)))
-                 goto syntax_error;
-
-               eof = getLine ( fd, buf, N_BUF );
-               if (eof || !STREQ(buf, "}")) goto syntax_error;
-	    }
-         }
+         if (i > 0 && STREQ(buf, "}")) 
+            break;
+         supp->caller[i] = VG_(arena_strdup)(VG_AR_CORE, buf);
+         if (!setLocationTy(&(supp->caller[i]), &(supp->caller_ty[i])))
+            goto syntax_error;
       }
 
       supp->next = vg_suppressions;
       vg_suppressions = supp;
    }
-
+   if (is_unrecognised_suppressions) {
+      /* Print out warning about any ignored suppressions */
+      //VG_(end_msg)();
+   }
    VG_(close)(fd);
    return;
 
@@ -1083,148 +647,102 @@
    }
 }
 
+/* Return the name of an erring fn in a way which is useful
+   for comparing against the contents of a suppressions file. 
+   Doesn't demangle the fn name, because we want to refer to 
+   mangled names in the suppressions file.
+*/    
+static
+void get_objname_fnname ( Addr a,
+                          Char* obj_buf, Int n_obj_buf,
+                          Char* fun_buf, Int n_fun_buf )
+{     
+   (void)VG_(get_objname)          ( a, obj_buf, n_obj_buf );
+   (void)VG_(get_fnname_nodemangle)( a, fun_buf, n_fun_buf );
+}     
+
+static __inline__
+Bool supp_matches_error(CoreSupp* su, CoreError* err)
+{
+   switch (su->skin_supp.skind) {
+      case PThreadSupp:
+         return (err->skin_err.ekind == PThreadErr);
+      default:
+         if (VG_(needs).skin_errors) {
+            return (SK_(error_matches_suppression)(&err->skin_err, 
+                                                    &su->skin_supp));
+         } else {
+            VG_(printf)(
+               "\nUnhandled suppression type: %u.  VG_(needs).skin_errors\n"
+               "probably needs to be set.\n",
+               err->skin_err.ekind);
+            VG_(skin_error)("unhandled suppression type");
+         }
+   }
+}
+
+static __inline__
+Bool supp_matches_callers(CoreSupp* su, Char caller_obj[][M_VG_ERRTXT], 
+                                        Char caller_fun[][M_VG_ERRTXT])
+{
+   Int i;
+
+   for (i = 0; su->caller[i] != NULL; i++) {
+      switch (su->caller_ty[i]) {
+         case ObjName: if (VG_(stringMatch)(su->caller[i],
+                                            caller_obj[i])) break;
+                       return False;
+         case FunName: if (VG_(stringMatch)(su->caller[i], 
+                                            caller_fun[i])) break;
+                       return False;
+         default: VG_(panic)("is_suppressible_error");
+      }
+   }
+
+   /* If we reach here, it's a match */
+   return True;
+}
 
 /* Does an error context match a suppression?  ie is this a
-   suppressible error?  If so, return a pointer to the Suppression
+   suppressible error?  If so, return a pointer to the CoreSupp
    record, otherwise NULL.
-   Tries to minimise the number of calls to what_fn_is_this since they
-   are expensive.  
+   Tries to minimise the number of symbol searches since they are expensive.  
 */
-static Suppression* is_suppressible_error ( ErrContext* ec )
+static CoreSupp* is_suppressible_error ( CoreError* err )
 {
 #  define STREQ(s1,s2) (s1 != NULL && s2 != NULL \
                         && VG_(strcmp)((s1),(s2))==0)
+   Int i;
 
-   Char caller0_obj[M_VG_ERRTXT];
-   Char caller0_fun[M_VG_ERRTXT];
-   Char caller1_obj[M_VG_ERRTXT];
-   Char caller1_fun[M_VG_ERRTXT];
-   Char caller2_obj[M_VG_ERRTXT];
-   Char caller2_fun[M_VG_ERRTXT];
-   Char caller3_obj[M_VG_ERRTXT];
-   Char caller3_fun[M_VG_ERRTXT];
+   Char caller_obj[VG_N_SUPP_CALLERS][M_VG_ERRTXT];
+   Char caller_fun[VG_N_SUPP_CALLERS][M_VG_ERRTXT];
 
-   Suppression* su;
-   Int          su_size;
+   CoreSupp* su;
 
-   /* vg_what_fn_or_object_is_this returns:
-         <function_name>      or
-         <object_name>        or
-         ???
-      so the strings in the suppression file should match these.
+   /* get_objname_fnname() writes the function name and object name if
+      it finds them in the debug info.  so the strings in the suppression
+      file should match these.
    */
 
    /* Initialise these strs so they are always safe to compare, even
-      if what_fn_or_object_is_this doesn't write anything to them. */
-   caller0_obj[0] = caller1_obj[0] = caller2_obj[0] = caller3_obj[0] = 0;
-   caller0_fun[0] = caller1_fun[0] = caller2_obj[0] = caller3_obj[0] = 0;
+      if get_objname_fnname doesn't write anything to them. */
+   for (i = 0; i < VG_N_SUPP_CALLERS; i++)
+      caller_obj[i][0] = caller_fun[i][0] = 0;
 
-   VG_(what_obj_and_fun_is_this)
-      ( ec->where->eips[0], caller0_obj, M_VG_ERRTXT,
-                            caller0_fun, M_VG_ERRTXT );
-   VG_(what_obj_and_fun_is_this)
-      ( ec->where->eips[1], caller1_obj, M_VG_ERRTXT,
-                            caller1_fun, M_VG_ERRTXT );
-
-   if (VG_(clo_backtrace_size) > 2) {
-      VG_(what_obj_and_fun_is_this)
-         ( ec->where->eips[2], caller2_obj, M_VG_ERRTXT,
-                               caller2_fun, M_VG_ERRTXT );
-
-      if (VG_(clo_backtrace_size) > 3) {
-         VG_(what_obj_and_fun_is_this)
-            ( ec->where->eips[3], caller3_obj, M_VG_ERRTXT,
-                                  caller3_fun, M_VG_ERRTXT );
-      }
+   for (i = 0; i < VG_N_SUPP_CALLERS && i < VG_(clo_backtrace_size); i++) {
+      get_objname_fnname ( err->where->eips[i], 
+                           caller_obj[i], M_VG_ERRTXT,
+                           caller_fun[i], M_VG_ERRTXT );
    }
 
    /* See if the error context matches any suppression. */
    for (su = vg_suppressions; su != NULL; su = su->next) {
-      switch (su->skind) {
-         case FreeS:  case PThread:
-         case Param:  case Value0: su_size = 0; break;
-         case Value1: case Addr1:  su_size = 1; break;
-         case Value2: case Addr2:  su_size = 2; break;
-         case Value4: case Addr4:  su_size = 4; break;
-         case Value8: case Addr8:  su_size = 8; break;
-         default: VG_(panic)("errcontext_matches_suppression");
+      if (supp_matches_error(su, err) &&
+          supp_matches_callers(su, caller_obj, caller_fun)) {
+         return su;
       }
-      switch (su->skind) {
-         case Param:
-            if (ec->ekind != ParamErr) continue;
-            if (!STREQ(su->param, ec->syscall_param)) continue;
-            break;
-         case Value0: case Value1: case Value2: case Value4: case Value8:
-            if (ec->ekind != ValueErr) continue;
-            if (ec->size  != su_size)  continue;
-            break;
-         case Addr1: case Addr2: case Addr4: case Addr8:
-            if (ec->ekind != AddrErr) continue;
-            if (ec->size  != su_size) continue;
-            break;
-         case FreeS:
-            if (ec->ekind != FreeErr 
-                && ec->ekind != FreeMismatchErr) continue;
-            break;
-         case PThread:
-            if (ec->ekind != PThreadErr) continue;
-            break;
-      }
-
-      switch (su->caller0_ty) {
-         case ObjName: if (!VG_(stringMatch)(su->caller0, 
-                                             caller0_obj)) continue;
-                       break;
-         case FunName: if (!VG_(stringMatch)(su->caller0, 
-                                             caller0_fun)) continue;
-                       break;
-         default: goto baaaad;
-      }
-
-      if (su->caller1 != NULL) {
-         vg_assert(VG_(clo_backtrace_size) >= 2);
-         switch (su->caller1_ty) {
-            case ObjName: if (!VG_(stringMatch)(su->caller1, 
-                                                caller1_obj)) continue;
-                          break;
-            case FunName: if (!VG_(stringMatch)(su->caller1, 
-                                                caller1_fun)) continue;
-                          break;
-            default: goto baaaad;
-         }
-      }
-
-      if (VG_(clo_backtrace_size) > 2 && su->caller2 != NULL) {
-         switch (su->caller2_ty) {
-            case ObjName: if (!VG_(stringMatch)(su->caller2, 
-                                                caller2_obj)) continue;
-                          break;
-            case FunName: if (!VG_(stringMatch)(su->caller2, 
-                                                caller2_fun)) continue;
-                          break;
-            default: goto baaaad;
-         }
-      }
-
-      if (VG_(clo_backtrace_size) > 3 && su->caller3 != NULL) {
-         switch (su->caller3_ty) {
-            case ObjName: if (!VG_(stringMatch)(su->caller3,
-                                                caller3_obj)) continue;
-                          break;
-            case FunName: if (!VG_(stringMatch)(su->caller3, 
-                                                caller3_fun)) continue;
-                          break;
-            default: goto baaaad;
-         }
-      }
-
-      return su;
    }
-
-   return NULL;
-
-  baaaad:
-   VG_(panic)("is_suppressible_error");
+   return NULL;      /* no matches */
 
 #  undef STREQ
 }
diff --git a/coregrind/vg_execontext.c b/coregrind/vg_execontext.c
index 4da1b31..fe85fa0 100644
--- a/coregrind/vg_execontext.c
+++ b/coregrind/vg_execontext.c
@@ -26,11 +26,10 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
 
 
 /*------------------------------------------------------------*/
@@ -109,39 +108,40 @@
 
 
 /* Compare two ExeContexts, comparing all callers. */
-Bool VG_(eq_ExeContext_all) ( ExeContext* e1, ExeContext* e2 )
+Bool VG_(eq_ExeContext) ( VgRes res, ExeContext* e1, ExeContext* e2 )
 {
-   vg_ec_cmpAlls++;
-   /* Just do pointer comparison. */
-   if (e1 != e2) return False;
-   return True;
-}
+   if (e1 == NULL || e2 == NULL) 
+      return False;
+   switch (res) {
+   case Vg_LowRes:
+      /* Just compare the top two callers. */
+      vg_ec_cmp2s++;
+      if (e1->eips[0] != e2->eips[0]
+          || e1->eips[1] != e2->eips[1]) return False;
+      return True;
 
+   case Vg_MedRes:
+      /* Just compare the top four callers. */
+      vg_ec_cmp4s++;
+      if (e1->eips[0] != e2->eips[0]
+          || e1->eips[1] != e2->eips[1]) return False;
 
-/* Compare two ExeContexts, just comparing the top two callers. */
-Bool VG_(eq_ExeContext_top2) ( ExeContext* e1, ExeContext* e2 )
-{
-   vg_ec_cmp2s++;
-   if (e1->eips[0] != e2->eips[0]
-       || e1->eips[1] != e2->eips[1]) return False;
-   return True;
-}
+      if (VG_(clo_backtrace_size) < 3) return True;
+      if (e1->eips[2] != e2->eips[2]) return False;
 
+      if (VG_(clo_backtrace_size) < 4) return True;
+      if (e1->eips[3] != e2->eips[3]) return False;
+      return True;
 
-/* Compare two ExeContexts, just comparing the top four callers. */
-Bool VG_(eq_ExeContext_top4) ( ExeContext* e1, ExeContext* e2 )
-{
-   vg_ec_cmp4s++;
-   if (e1->eips[0] != e2->eips[0]
-       || e1->eips[1] != e2->eips[1]) return False;
+   case Vg_HighRes:
+      vg_ec_cmpAlls++;
+      /* Compare them all -- just do pointer comparison. */
+      if (e1 != e2) return False;
+      return True;
 
-   if (VG_(clo_backtrace_size) < 3) return True;
-   if (e1->eips[2] != e2->eips[2]) return False;
-
-   if (VG_(clo_backtrace_size) < 4) return True;
-   if (e1->eips[3] != e2->eips[3]) return False;
-
-   return True;
+   default:
+      VG_(panic)("VG_(eq_ExeContext): unrecognised VgRes");
+   }
 }
 
 
@@ -156,11 +156,12 @@
 
    In order to be thread-safe, we pass in the thread's %EIP and %EBP.
 */
-ExeContext* VG_(get_ExeContext) ( Bool skip_top_frame,
-                                  Addr eip, Addr ebp )
+ExeContext* VG_(get_ExeContext2) ( Addr eip, Addr ebp,
+                                   Addr ebp_min, Addr ebp_max_orig )
 {
    Int         i;
    Addr        eips[VG_DEEPEST_BACKTRACE];
+   Addr        ebp_max;
    Bool        same;
    UInt        hash;
    ExeContext* new_ec;
@@ -173,29 +174,53 @@
 
    /* First snaffle %EIPs from the client's stack into eips[0
       .. VG_(clo_backtrace_size)-1], putting zeroes in when the trail
-      goes cold. */
+      goes cold, which we guess to be when %ebp is not a reasonable
+      stack location.  We also assert that %ebp increases down the chain. */
 
-   for (i = 0; i < VG_(clo_backtrace_size); i++)
+   // Gives shorter stack trace for tests/badjump.c
+   // JRS 2002-aug-16: I don't think this is a big deal; looks ok for
+   // most "normal" backtraces.
+   // NJN 2002-sep-05: traces for pthreaded programs are particularly bad.
+
+   // JRS 2002-sep-17: hack, to round up ebp_max to the end of the
+   // current page, at least.  Dunno if it helps.
+   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
+   ebp_max = (ebp_max_orig + VKI_BYTES_PER_PAGE - 1) 
+                & ~(VKI_BYTES_PER_PAGE - 1);
+   ebp_max -= sizeof(Addr);
+
+   /* Assertion broken before main() is reached in pthreaded programs;  the
+    * offending stack traces only have one item.  --njn, 2002-aug-16 */
+   /* vg_assert(ebp_min <= ebp_max);*/
+
+   /* Checks the stack isn't riduculously big */
+   vg_assert(ebp_min + 4000000 > ebp_max);
+
+   //   VG_(printf)("%p -> %p\n", ebp_max_orig, ebp_max);
+   eips[0] = eip;
+   //   VG_(printf)("\nSNAP: %p .. %p, EBP=%p\n", ebp_min, ebp_max, ebp  );
+   //   VG_(printf)("   : %p\n", eips[0]);
+   /* Get whatever we safely can ... */
+   for (i = 1; i < VG_(clo_backtrace_size); i++) {
+      if (!(ebp_min <= ebp && ebp <= ebp_max)) {
+         //VG_(printf)("... out of range %p\n", ebp);
+         break; /* ebp gone baaaad */
+      }
+      // NJN 2002-sep-17: monotonicity doesn't work -- gives wrong traces...
+      //     if (ebp >= ((UInt*)ebp)[0]) {
+      //   VG_(printf)("nonmonotonic\n");
+      //    break; /* ebp gone nonmonotonic */
+      // }
+      eips[i] = ((UInt*)ebp)[1];  /* ret addr */
+      ebp     = ((UInt*)ebp)[0];  /* old ebp */
+      //VG_(printf)("     %p\n", eips[i]);
+   }
+
+   /* Put zeroes in the rest. */
+   for (;  i < VG_(clo_backtrace_size); i++) {
       eips[i] = 0;
-   
-#  define GET_CALLER(lval)                                        \
-   if (ebp != 0 && VGM_(check_readable)(ebp, 8, NULL)) {          \
-      lval = ((UInt*)ebp)[1];  /* ret addr */                     \
-      ebp  = ((UInt*)ebp)[0];  /* old ebp */                      \
-   } else {                                                       \
-      lval = ebp = 0;                                             \
    }
 
-   if (skip_top_frame) {
-      for (i = 0; i < VG_(clo_backtrace_size); i++)
-         GET_CALLER(eips[i]);
-   } else {
-      eips[0] = eip;
-      for (i = 1; i < VG_(clo_backtrace_size); i++)
-         GET_CALLER(eips[i]);
-   }
-#  undef GET_CALLER
-
    /* Now figure out if we've seen this one before.  First hash it so
       as to determine the list number. */
 
@@ -228,19 +253,16 @@
 
    if (list != NULL) {
       /* Yay!  We found it.  */
-      VGP_POPCC;
+      VGP_POPCC(VgpExeContext);
       return list;
    }
 
    /* Bummer.  We have to allocate a new context record. */
    vg_ec_totstored++;
 
-   new_ec 
-      = VG_(malloc)( 
-           VG_AR_EXECTXT, 
-           sizeof(struct _ExeContextRec *) 
-              + VG_(clo_backtrace_size) * sizeof(Addr) 
-        );
+   new_ec = VG_(arena_malloc)( VG_AR_EXECTXT, 
+                               sizeof(struct _ExeContext *) 
+                               + VG_(clo_backtrace_size) * sizeof(Addr) );
 
    for (i = 0; i < VG_(clo_backtrace_size); i++)
       new_ec->eips[i] = eips[i];
@@ -248,10 +270,16 @@
    new_ec->next = vg_ec_list[hash];
    vg_ec_list[hash] = new_ec;
 
-   VGP_POPCC;
+   VGP_POPCC(VgpExeContext);
    return new_ec;
 }
 
+ExeContext* VG_(get_ExeContext) ( ThreadState *tst )
+{
+   return VG_(get_ExeContext2)( tst->m_eip, tst->m_ebp, tst->m_esp, 
+                                tst->stack_highest_word );
+}
+
 
 /*--------------------------------------------------------------------*/
 /*--- end                                          vg_execontext.c ---*/
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c
index 26f1613..e99bfaa 100644
--- a/coregrind/vg_from_ucode.c
+++ b/coregrind/vg_from_ucode.c
@@ -25,7 +25,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -35,10 +35,10 @@
 /*--- Renamings of frequently-used global functions.       ---*/
 /*------------------------------------------------------------*/
 
-#define dis       VG_(disassemble)
 #define nameIReg  VG_(nameOfIntReg)
 #define nameISize VG_(nameOfIntSize)
 
+#define dis       VG_(print_codegen)
 
 /*------------------------------------------------------------*/
 /*--- Instruction emission -- turning final uinstrs back   ---*/
@@ -52,7 +52,7 @@
    do this, calls and jmps to fixed addresses must specify the address
    by first loading it into a register, and jump to/call that
    register.  Fortunately, the only jump to a literal is the jump back
-   to vg_dispatch, and only %eax is live then, conveniently.  Ucode
+   to vg_dispatch, and only %eax is live then, conveniently.  UCode
    call insns may only have a register as target anyway, so there's no
    need to do anything fancy for them.
 
@@ -71,19 +71,104 @@
 static Int    emitted_code_used;
 static Int    emitted_code_size;
 
+/* Statistics about C functions called from generated code. */
+static UInt ccalls                 = 0;
+static UInt ccall_reg_saves        = 0;
+static UInt ccall_args             = 0;
+static UInt ccall_arg_setup_instrs = 0;
+static UInt ccall_stack_clears     = 0;
+static UInt ccall_retvals          = 0;
+static UInt ccall_retval_movs      = 0;
+
+/* Statistics about frequency of each UInstr */
+typedef
+   struct {
+      UInt counts;
+      UInt size;
+   } Histogram;
+
+/* Automatically zeroed because it's static. */
+static Histogram histogram[100];     
+
+void VG_(print_ccall_stats)(void)
+{
+   VG_(message)(Vg_DebugMsg,
+                "   ccalls: %u C calls, %u%% saves+restores avoided"
+                " (%d bytes)",
+                ccalls, 
+                100-(UInt)(ccall_reg_saves/(double)(ccalls*3)*100),
+                ((ccalls*3) - ccall_reg_saves)*2);
+   VG_(message)(Vg_DebugMsg,
+                "           %u args, avg 0.%d setup instrs each (%d bytes)", 
+                ccall_args, 
+               (UInt)(ccall_arg_setup_instrs/(double)ccall_args*100),
+               (ccall_args - ccall_arg_setup_instrs)*2);
+   VG_(message)(Vg_DebugMsg,
+                "           %d%% clear the stack (%d bytes)", 
+               (UInt)(ccall_stack_clears/(double)ccalls*100),
+               (ccalls - ccall_stack_clears)*3);
+   VG_(message)(Vg_DebugMsg,
+                "           %u retvals, %u%% of reg-reg movs avoided (%d bytes)",
+                ccall_retvals,
+                ( ccall_retvals == 0 
+                ? 100
+                : 100-(UInt)(ccall_retval_movs / 
+                             (double)ccall_retvals*100)),
+                (ccall_retvals-ccall_retval_movs)*2);
+}
+
+void VG_(print_UInstr_histogram)(void)
+{
+   Int i, j;
+   UInt total_counts = 0;
+   UInt total_size   = 0;
+   
+   for (i = 0; i < 100; i++) {
+      total_counts += histogram[i].counts;
+      total_size   += histogram[i].size;
+   }
+
+   VG_(printf)("-- UInstr frequencies -----------\n");
+   for (i = 0; i < 100; i++) {
+      if (0 != histogram[i].counts) {
+
+         UInt count_pc = 
+            (UInt)(histogram[i].counts/(double)total_counts*100 + 0.5);
+         UInt size_pc  = 
+            (UInt)(histogram[i].size  /(double)total_size  *100 + 0.5);
+         UInt avg_size =
+            (UInt)(histogram[i].size / (double)histogram[i].counts + 0.5);
+
+         VG_(printf)("%-7s:%8u (%2u%%), avg %2dB (%2u%%) |", 
+                     VG_(nameUOpcode)(True, i), 
+                     histogram[i].counts, count_pc, 
+                     avg_size, size_pc);
+
+         for (j = 0; j < size_pc; j++) VG_(printf)("O");
+         VG_(printf)("\n");
+
+      } else {
+         vg_assert(0 == histogram[i].size);
+      }
+   }
+
+   VG_(printf)("total UInstrs %u, total size %u\n", total_counts, total_size);
+}
+
 static void expandEmittedCode ( void )
 {
    Int    i;
-   UChar* tmp = VG_(jitmalloc)(2 * emitted_code_size);
+   UChar *tmp = VG_(arena_malloc)(VG_AR_JITTER, 2 * emitted_code_size);
    /* VG_(printf)("expand to %d\n", 2 * emitted_code_size); */
    for (i = 0; i < emitted_code_size; i++)
       tmp[i] = emitted_code[i];
-   VG_(jitfree)(emitted_code);
+   VG_(arena_free)(VG_AR_JITTER, emitted_code);
    emitted_code = tmp;
    emitted_code_size *= 2;
 }
 
-static __inline__ void emitB ( UInt b )
+/* Local calls will be inlined, cross-module ones not */
+__inline__ void VG_(emitB) ( UInt b )
 {
    if (dis) {
       if (b < 16) VG_(printf)("0%x ", b); else VG_(printf)("%2x ", b);
@@ -95,29 +180,26 @@
    emitted_code_used++;
 }
 
-static __inline__ void emitW ( UInt l )
+__inline__ void VG_(emitW) ( UInt l )
 {
-   emitB ( (l) & 0x000000FF );
-   emitB ( (l >> 8) & 0x000000FF );
+   VG_(emitB) ( (l) & 0x000000FF );
+   VG_(emitB) ( (l >> 8) & 0x000000FF );
 }
 
-static __inline__ void emitL ( UInt l )
+__inline__ void VG_(emitL) ( UInt l )
 {
-   emitB ( (l) & 0x000000FF );
-   emitB ( (l >> 8) & 0x000000FF );
-   emitB ( (l >> 16) & 0x000000FF );
-   emitB ( (l >> 24) & 0x000000FF );
+   VG_(emitB) ( (l) & 0x000000FF );
+   VG_(emitB) ( (l >> 8) & 0x000000FF );
+   VG_(emitB) ( (l >> 16) & 0x000000FF );
+   VG_(emitB) ( (l >> 24) & 0x000000FF );
 }
 
-static __inline__ void newEmit ( void )
+__inline__ void VG_(newEmit) ( void )
 {
    if (dis)
       VG_(printf)("\t       %4d: ", emitted_code_used );
 }
 
-/* Is this a callee-save register, in the normal C calling convention?  */
-#define VG_CALLEE_SAVED(reg) (reg == R_EBX || reg == R_ESI || reg == R_EDI)
-
 
 /*----------------------------------------------------*/
 /*--- Addressing modes                             ---*/
@@ -144,8 +226,8 @@
 static __inline__ void emit_amode_litmem_reg ( Addr addr, Int reg )
 {
    /* ($ADDR), reg */
-   emitB ( mkModRegRM(0, reg, 5) );
-   emitL ( addr );
+   VG_(emitB) ( mkModRegRM(0, reg, 5) );
+   VG_(emitL) ( addr );
 }
 
 static __inline__ void emit_amode_regmem_reg ( Int regmem, Int reg )
@@ -154,26 +236,26 @@
    if (regmem == R_ESP) 
       VG_(panic)("emit_amode_regmem_reg");
    if (regmem == R_EBP) {
-      emitB ( mkModRegRM(1, reg, 5) );
-      emitB ( 0x00 );
+      VG_(emitB) ( mkModRegRM(1, reg, 5) );
+      VG_(emitB) ( 0x00 );
    } else {
-      emitB( mkModRegRM(0, reg, regmem) );
+      VG_(emitB)( mkModRegRM(0, reg, regmem) );
    }
 }
 
-static __inline__ void emit_amode_offregmem_reg ( Int off, Int regmem, Int reg )
+void VG_(emit_amode_offregmem_reg) ( Int off, Int regmem, Int reg )
 {
    if (regmem == R_ESP)
       VG_(panic)("emit_amode_offregmem_reg(ESP)");
    if (off < -128 || off > 127) {
       /* Use a large offset */
       /* d32(regmem), reg */
-      emitB ( mkModRegRM(2, reg, regmem) );
-      emitL ( off );
+      VG_(emitB) ( mkModRegRM(2, reg, regmem) );
+      VG_(emitL) ( off );
    } else {
       /* d8(regmem), reg */
-      emitB ( mkModRegRM(1, reg, regmem) );
-      emitB ( off & 0xFF );
+      VG_(emitB) ( mkModRegRM(1, reg, regmem) );
+      VG_(emitB) ( off & 0xFF );
    }
 }
 
@@ -184,27 +266,27 @@
       VG_(panic)("emit_amode_sib_reg(ESP)");
    if (off < -128 || off > 127) {
       /* Use a 32-bit offset */
-      emitB ( mkModRegRM(2, reg, 4) ); /* SIB with 32-bit displacement */
-      emitB ( mkSIB( scale, regindex, regbase ) );
-      emitL ( off );
+      VG_(emitB) ( mkModRegRM(2, reg, 4) ); /* SIB with 32-bit displacement */
+      VG_(emitB) ( mkSIB( scale, regindex, regbase ) );
+      VG_(emitL) ( off );
    } else {
       /* Use an 8-bit offset */
-      emitB ( mkModRegRM(1, reg, 4) ); /* SIB with 8-bit displacement */
-      emitB ( mkSIB( scale, regindex, regbase ) );
-      emitB ( off & 0xFF );
+      VG_(emitB) ( mkModRegRM(1, reg, 4) ); /* SIB with 8-bit displacement */
+      VG_(emitB) ( mkSIB( scale, regindex, regbase ) );
+      VG_(emitB) ( off & 0xFF );
    }
 }
 
-static __inline__ void emit_amode_ereg_greg ( Int e_reg, Int g_reg )
+void VG_(emit_amode_ereg_greg) ( Int e_reg, Int g_reg )
 {
    /* other_reg, reg */
-   emitB ( mkModRegRM(3, g_reg, e_reg) );
+   VG_(emitB) ( mkModRegRM(3, g_reg, e_reg) );
 }
 
 static __inline__ void emit_amode_greg_ereg ( Int g_reg, Int e_reg )
 {
    /* other_reg, reg */
-   emitB ( mkModRegRM(3, g_reg, e_reg) );
+   VG_(emitB) ( mkModRegRM(3, g_reg, e_reg) );
 }
 
 
@@ -285,23 +367,23 @@
 /*--- v-size (4, or 2 with OSO) insn emitters      ---*/
 /*----------------------------------------------------*/
 
-static void emit_movv_offregmem_reg ( Int sz, Int off, Int areg, Int reg )
+void VG_(emit_movv_offregmem_reg) ( Int sz, Int off, Int areg, Int reg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0x8B ); /* MOV Ev, Gv */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0x8B ); /* MOV Ev, Gv */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t0x%x(%s), %s\n", 
                    nameISize(sz), off, nameIReg(4,areg), nameIReg(sz,reg));
 }
 
-static void emit_movv_reg_offregmem ( Int sz, Int reg, Int off, Int areg )
+void VG_(emit_movv_reg_offregmem) ( Int sz, Int reg, Int off, Int areg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0x89 ); /* MOV Gv, Ev */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0x89 ); /* MOV Gv, Ev */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t%s, 0x%x(%s)\n", 
                    nameISize(sz), nameIReg(sz,reg), off, nameIReg(4,areg));
@@ -309,9 +391,9 @@
 
 static void emit_movv_regmem_reg ( Int sz, Int reg1, Int reg2 )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0x8B ); /* MOV Ev, Gv */
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0x8B ); /* MOV Ev, Gv */
    emit_amode_regmem_reg ( reg1, reg2 );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t(%s), %s\n",
@@ -320,40 +402,39 @@
 
 static void emit_movv_reg_regmem ( Int sz, Int reg1, Int reg2 )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0x89 ); /* MOV Gv, Ev */
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0x89 ); /* MOV Gv, Ev */
    emit_amode_regmem_reg ( reg2, reg1 );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t%s, (%s)\n", 
                    nameISize(sz), nameIReg(sz,reg1), nameIReg(4,reg2));
 }
 
-static void emit_movv_reg_reg ( Int sz, Int reg1, Int reg2 )
+void VG_(emit_movv_reg_reg) ( Int sz, Int reg1, Int reg2 )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0x89 ); /* MOV Gv, Ev */
-   emit_amode_ereg_greg ( reg2, reg1 );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0x89 ); /* MOV Gv, Ev */
+   VG_(emit_amode_ereg_greg) ( reg2, reg1 );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t%s, %s\n", 
                    nameISize(sz), nameIReg(sz,reg1), nameIReg(sz,reg2));
 }
 
-static void emit_nonshiftopv_lit_reg ( Int sz, Opcode opc, 
-                                       UInt lit, Int reg )
+void VG_(emit_nonshiftopv_lit_reg) ( Int sz, Opcode opc, UInt lit, Int reg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
    if (lit == VG_(extend_s_8to32)(lit & 0x000000FF)) {
       /* short form OK */
-      emitB ( 0x83 ); /* Grp1 Ib,Ev */
-      emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) );
-      emitB ( lit & 0x000000FF );
+      VG_(emitB) ( 0x83 ); /* Grp1 Ib,Ev */
+      VG_(emit_amode_ereg_greg) ( reg, mkGrp1opcode(opc) );
+      VG_(emitB) ( lit & 0x000000FF );
    } else {
-      emitB ( 0x81 ); /* Grp1 Iv,Ev */
-      emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) );
-      if (sz == 2) emitW ( lit ); else emitL ( lit );
+      VG_(emitB) ( 0x81 ); /* Grp1 Iv,Ev */
+      VG_(emit_amode_ereg_greg) ( reg, mkGrp1opcode(opc) );
+      if (sz == 2) VG_(emitW) ( lit ); else VG_(emitL) ( lit );
    }
    if (dis)
       VG_(printf)( "\n\t\t%s%c\t$0x%x, %s\n", 
@@ -361,13 +442,13 @@
                    lit, nameIReg(sz,reg));
 }
 
-static void emit_shiftopv_lit_reg ( Int sz, Opcode opc, UInt lit, Int reg )
+void VG_(emit_shiftopv_lit_reg) ( Int sz, Opcode opc, UInt lit, Int reg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0xC1 ); /* Grp2 Ib,Ev */
-   emit_amode_ereg_greg ( reg, mkGrp2opcode(opc) );
-   emitB ( lit );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0xC1 ); /* Grp2 Ib,Ev */
+   VG_(emit_amode_ereg_greg) ( reg, mkGrp2opcode(opc) );
+   VG_(emitB) ( lit );
    if (dis)
       VG_(printf)( "\n\t\t%s%c\t$%d, %s\n", 
                    VG_(nameUOpcode)(False,opc), nameISize(sz), 
@@ -376,12 +457,12 @@
 
 static void emit_shiftopv_cl_stack0 ( Int sz, Opcode opc )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0xD3 ); /* Grp2 CL,Ev */
-   emitB ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
-   emitB ( 0x24 ); /* a SIB, I think `d8(%esp)' */
-   emitB ( 0x00 ); /* the d8 displacement */
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0xD3 ); /* Grp2 CL,Ev */
+   VG_(emitB) ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
+   VG_(emitB) ( 0x24 ); /* a SIB, I think `d8(%esp)' */
+   VG_(emitB) ( 0x00 ); /* the d8 displacement */
    if (dis)
       VG_(printf)("\n\t\t%s%c %%cl, 0(%%esp)\n",
                   VG_(nameUOpcode)(False,opc), nameISize(sz) );
@@ -389,11 +470,11 @@
 
 static void emit_shiftopb_cl_stack0 ( Opcode opc )
 {
-   newEmit();
-   emitB ( 0xD2 ); /* Grp2 CL,Eb */
-   emitB ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
-   emitB ( 0x24 ); /* a SIB, I think `d8(%esp)' */
-   emitB ( 0x00 ); /* the d8 displacement */
+   VG_(newEmit)();
+   VG_(emitB) ( 0xD2 ); /* Grp2 CL,Eb */
+   VG_(emitB) ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
+   VG_(emitB) ( 0x24 ); /* a SIB, I think `d8(%esp)' */
+   VG_(emitB) ( 0x00 ); /* the d8 displacement */
    if (dis)
       VG_(printf)("\n\t\t%s%c %%cl, 0(%%esp)\n",
                   VG_(nameUOpcode)(False,opc), nameISize(1) );
@@ -402,28 +483,28 @@
 static void emit_nonshiftopv_offregmem_reg ( Int sz, Opcode opc, 
                                              Int off, Int areg, Int reg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\t%s%c\t0x%x(%s), %s\n", 
                    VG_(nameUOpcode)(False,opc), nameISize(sz),
                    off, nameIReg(4,areg), nameIReg(sz,reg));
 }
 
-static void emit_nonshiftopv_reg_reg ( Int sz, Opcode opc, 
+void VG_(emit_nonshiftopv_reg_reg) ( Int sz, Opcode opc, 
                                        Int reg1, Int reg2 )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
 #  if 0
    /* Perfectly correct, but the GNU assembler uses the other form.
       Therefore we too use the other form, to aid verification. */
-   emitB ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
-   emit_amode_ereg_greg ( reg1, reg2 );
+   VG_(emitB) ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
+   VG_(emit_amode_ereg_greg) ( reg1, reg2 );
 #  else
-   emitB ( 1 + mkPrimaryOpcode(opc) ); /* op Gv, Ev */
+   VG_(emitB) ( 1 + mkPrimaryOpcode(opc) ); /* op Gv, Ev */
    emit_amode_greg_ereg ( reg1, reg2 );
 #  endif
    if (dis)
@@ -432,134 +513,134 @@
                    nameIReg(sz,reg1), nameIReg(sz,reg2));
 }
 
-static void emit_movv_lit_reg ( Int sz, UInt lit, Int reg )
+void VG_(emit_movv_lit_reg) ( Int sz, UInt lit, Int reg )
 {
    if (lit == 0) {
-      emit_nonshiftopv_reg_reg ( sz, XOR, reg, reg );
+      VG_(emit_nonshiftopv_reg_reg) ( sz, XOR, reg, reg );
       return;
    }
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0xB8+reg ); /* MOV imm, Gv */
-   if (sz == 2) emitW ( lit ); else emitL ( lit );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0xB8+reg ); /* MOV imm, Gv */
+   if (sz == 2) VG_(emitW) ( lit ); else VG_(emitL) ( lit );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t$0x%x, %s\n", 
                    nameISize(sz), lit, nameIReg(sz,reg));
 }
 
-static void emit_unaryopv_reg ( Int sz, Opcode opc, Int reg )
+void VG_(emit_unaryopv_reg) ( Int sz, Opcode opc, Int reg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
    switch (opc) {
       case NEG:
-         emitB ( 0xF7 );
-         emit_amode_ereg_greg ( reg, mkGrp3opcode(NEG) );
+         VG_(emitB) ( 0xF7 );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp3opcode(NEG) );
          if (dis)
             VG_(printf)( "\n\t\tneg%c\t%s\n", 
                          nameISize(sz), nameIReg(sz,reg));
          break;
       case NOT:
-         emitB ( 0xF7 );
-         emit_amode_ereg_greg ( reg, mkGrp3opcode(NOT) );
+         VG_(emitB) ( 0xF7 );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp3opcode(NOT) );
          if (dis)
             VG_(printf)( "\n\t\tnot%c\t%s\n", 
                          nameISize(sz), nameIReg(sz,reg));
          break;
       case DEC:
-         emitB ( 0x48 + reg );
+         VG_(emitB) ( 0x48 + reg );
          if (dis)
             VG_(printf)( "\n\t\tdec%c\t%s\n", 
                          nameISize(sz), nameIReg(sz,reg));
          break;
       case INC:
-         emitB ( 0x40 + reg );
+         VG_(emitB) ( 0x40 + reg );
          if (dis)
             VG_(printf)( "\n\t\tinc%c\t%s\n", 
                          nameISize(sz), nameIReg(sz,reg));
          break;
       default: 
-         VG_(panic)("emit_unaryopv_reg");
+         VG_(panic)("VG_(emit_unaryopv_reg)");
    }
 }
 
-static void emit_pushv_reg ( Int sz, Int reg )
+void VG_(emit_pushv_reg) ( Int sz, Int reg )
 {
-   newEmit();
+   VG_(newEmit)();
    if (sz == 2) {
-      emitB ( 0x66 ); 
+      VG_(emitB) ( 0x66 ); 
    } else {
       vg_assert(sz == 4);
    }
-   emitB ( 0x50 + reg );
+   VG_(emitB) ( 0x50 + reg );
    if (dis)
       VG_(printf)("\n\t\tpush%c %s\n", nameISize(sz), nameIReg(sz,reg));
 }
 
-static void emit_popv_reg ( Int sz, Int reg )
+void VG_(emit_popv_reg) ( Int sz, Int reg )
 {
-   newEmit();
+   VG_(newEmit)();
    if (sz == 2) {
-      emitB ( 0x66 ); 
+      VG_(emitB) ( 0x66 ); 
    } else {
       vg_assert(sz == 4);
    }
-   emitB ( 0x58 + reg );
+   VG_(emitB) ( 0x58 + reg );
    if (dis)
       VG_(printf)("\n\t\tpop%c %s\n", nameISize(sz), nameIReg(sz,reg));
 }
 
-static void emit_pushl_lit8 ( Int lit8 )
+void VG_(emit_pushl_lit32) ( UInt int32 )
+{  
+   VG_(newEmit)();
+   VG_(emitB) ( 0x68 );
+   VG_(emitL) ( int32 );
+   if (dis)
+      VG_(printf)("\n\t\tpushl $0x%x\n", int32 );
+}  
+
+void VG_(emit_pushl_lit8) ( Int lit8 )
 {
    vg_assert(lit8 >= -128 && lit8 < 128);
-   newEmit();
-   emitB ( 0x6A );
-   emitB ( (UChar)((UInt)lit8) );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x6A );
+   VG_(emitB) ( (UChar)((UInt)lit8) );
    if (dis)
       VG_(printf)("\n\t\tpushl $%d\n", lit8 );
 }
 
-static void emit_pushl_lit32 ( UInt int32 )
+void VG_(emit_cmpl_zero_reg) ( Int reg )
 {
-   newEmit();
-   emitB ( 0x68 );
-   emitL ( int32 );
-   if (dis)
-      VG_(printf)("\n\t\tpushl $0x%x\n", int32 );
-}
-
-static void emit_cmpl_zero_reg ( Int reg )
-{
-   newEmit();
-   emitB ( 0x83 );
-   emit_amode_ereg_greg ( reg, 7 /* Grp 3 opcode for CMP */ );
-   emitB ( 0x00 );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x83 );
+   VG_(emit_amode_ereg_greg) ( reg, 7 /* Grp 3 opcode for CMP */ );
+   VG_(emitB) ( 0x00 );
    if (dis)
       VG_(printf)("\n\t\tcmpl $0, %s\n", nameIReg(4,reg));
 }
 
 static void emit_swapl_reg_ECX ( Int reg )
 {
-   newEmit();
-   emitB ( 0x87 ); /* XCHG Gv,Ev */
-   emit_amode_ereg_greg ( reg, R_ECX );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x87 ); /* XCHG Gv,Ev */
+   VG_(emit_amode_ereg_greg) ( reg, R_ECX );
    if (dis) 
       VG_(printf)("\n\t\txchgl %%ecx, %s\n", nameIReg(4,reg));
 }
 
-static void emit_swapl_reg_EAX ( Int reg )
+void VG_(emit_swapl_reg_EAX) ( Int reg )
 {
-   newEmit();
-   emitB ( 0x90 + reg ); /* XCHG Gv,eAX */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x90 + reg ); /* XCHG Gv,eAX */
    if (dis) 
       VG_(printf)("\n\t\txchgl %%eax, %s\n", nameIReg(4,reg));
 }
 
 static void emit_swapl_reg_reg ( Int reg1, Int reg2 )
 {
-   newEmit();
-   emitB ( 0x87 ); /* XCHG Gv,Ev */
-   emit_amode_ereg_greg ( reg1, reg2 );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x87 ); /* XCHG Gv,Ev */
+   VG_(emit_amode_ereg_greg) ( reg1, reg2 );
    if (dis) 
       VG_(printf)("\n\t\txchgl %s, %s\n", nameIReg(4,reg1), 
                   nameIReg(4,reg2));
@@ -567,65 +648,33 @@
 
 static void emit_bswapl_reg ( Int reg )
 {
-   newEmit();
-   emitB ( 0x0F );
-   emitB ( 0xC8 + reg ); /* BSWAP r32 */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F );
+   VG_(emitB) ( 0xC8 + reg ); /* BSWAP r32 */
    if (dis) 
       VG_(printf)("\n\t\tbswapl %s\n", nameIReg(4,reg));
 }
 
 static void emit_movl_reg_reg ( Int regs, Int regd )
 {
-   newEmit();
-   emitB ( 0x89 ); /* MOV Gv,Ev */
-   emit_amode_ereg_greg ( regd, regs );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x89 ); /* MOV Gv,Ev */
+   VG_(emit_amode_ereg_greg) ( regd, regs );
    if (dis) 
       VG_(printf)("\n\t\tmovl %s, %s\n", nameIReg(4,regs), nameIReg(4,regd));
 }
 
-static void emit_testv_lit_reg ( Int sz, UInt lit, Int reg )
+void VG_(emit_movv_lit_offregmem) ( Int sz, UInt lit, Int off, Int memreg )
 {
-   newEmit();
+   VG_(newEmit)();
    if (sz == 2) {
-      emitB ( 0x66 );
+      VG_(emitB) ( 0x66 );
    } else {
       vg_assert(sz == 4);
    }
-   emitB ( 0xF7 ); /* Grp3 Ev */
-   emit_amode_ereg_greg ( reg, 0 /* Grp3 subopcode for TEST */ );
-   if (sz == 2) emitW ( lit ); else emitL ( lit );
-   if (dis)
-      VG_(printf)("\n\t\ttest%c $0x%x, %s\n", nameISize(sz), 
-                                            lit, nameIReg(sz,reg));
-}
-
-static void emit_testv_lit_offregmem ( Int sz, UInt lit, Int off, Int reg )
-{
-   newEmit();
-   if (sz == 2) {
-      emitB ( 0x66 );
-   } else {
-      vg_assert(sz == 4);
-   }
-   emitB ( 0xF7 ); /* Grp3 Ev */
-   emit_amode_offregmem_reg ( off, reg, 0 /* Grp3 subopcode for TEST */ );
-   if (sz == 2) emitW ( lit ); else emitL ( lit );
-   if (dis)
-      VG_(printf)("\n\t\ttest%c $%d, 0x%x(%s)\n", 
-                  nameISize(sz), lit, off, nameIReg(4,reg) );
-}
-
-static void emit_movv_lit_offregmem ( Int sz, UInt lit, Int off, Int memreg )
-{
-   newEmit();
-   if (sz == 2) {
-      emitB ( 0x66 );
-   } else {
-      vg_assert(sz == 4);
-   }
-   emitB ( 0xC7 ); /* Grp11 Ev */
-   emit_amode_offregmem_reg ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
-   if (sz == 2) emitW ( lit ); else emitL ( lit );
+   VG_(emitB) ( 0xC7 ); /* Grp11 Ev */
+   VG_(emit_amode_offregmem_reg) ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
+   if (sz == 2) VG_(emitW) ( lit ); else VG_(emitL) ( lit );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t$0x%x, 0x%x(%s)\n", 
                    nameISize(sz), lit, off, nameIReg(4,memreg) );
@@ -638,35 +687,35 @@
 
 /* There is some doubt as to whether C6 (Grp 11) is in the
    486 insn set.  ToDo: investigate. */
-static void emit_movb_lit_offregmem ( UInt lit, Int off, Int memreg )
-{
-   newEmit();
-   emitB ( 0xC6 ); /* Grp11 Eb */
-   emit_amode_offregmem_reg ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
-   emitB ( lit );
+void VG_(emit_movb_lit_offregmem) ( UInt lit, Int off, Int memreg )
+{                                     
+   VG_(newEmit)();
+   VG_(emitB) ( 0xC6 ); /* Grp11 Eb */
+   VG_(emit_amode_offregmem_reg) ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
+   VG_(emitB) ( lit ); 
    if (dis)
       VG_(printf)( "\n\t\tmovb\t$0x%x, 0x%x(%s)\n", 
                    lit, off, nameIReg(4,memreg) );
-}
-
+}              
+              
 static void emit_nonshiftopb_offregmem_reg ( Opcode opc, 
                                              Int off, Int areg, Int reg )
 {
-   newEmit();
-   emitB ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   VG_(emitB) ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\t%sb\t0x%x(%s), %s\n", 
                    VG_(nameUOpcode)(False,opc), off, nameIReg(4,areg), 
                    nameIReg(1,reg));
 }
 
-static void emit_movb_reg_offregmem ( Int reg, Int off, Int areg )
+void VG_(emit_movb_reg_offregmem) ( Int reg, Int off, Int areg )
 {
    /* Could do better when reg == %al. */
-   newEmit();
-   emitB ( 0x88 ); /* MOV G1, E1 */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x88 ); /* MOV G1, E1 */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\tmovb\t%s, 0x%x(%s)\n", 
                    nameIReg(1,reg), off, nameIReg(4,areg));
@@ -674,9 +723,9 @@
 
 static void emit_nonshiftopb_reg_reg ( Opcode opc, Int reg1, Int reg2 )
 {
-   newEmit();
-   emitB ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
-   emit_amode_ereg_greg ( reg1, reg2 );
+   VG_(newEmit)();
+   VG_(emitB) ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
+   VG_(emit_amode_ereg_greg) ( reg1, reg2 );
    if (dis)
       VG_(printf)( "\n\t\t%sb\t%s, %s\n", 
                    VG_(nameUOpcode)(False,opc),
@@ -685,8 +734,8 @@
 
 static void emit_movb_reg_regmem ( Int reg1, Int reg2 )
 {
-   newEmit();
-   emitB ( 0x88 ); /* MOV G1, E1 */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x88 ); /* MOV G1, E1 */
    emit_amode_regmem_reg ( reg2, reg1 );
    if (dis)
       VG_(printf)( "\n\t\tmovb\t%s, (%s)\n", nameIReg(1,reg1), 
@@ -695,10 +744,10 @@
 
 static void emit_nonshiftopb_lit_reg ( Opcode opc, UInt lit, Int reg )
 {
-   newEmit();
-   emitB ( 0x80 ); /* Grp1 Ib,Eb */
-   emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) );
-   emitB ( lit & 0x000000FF );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x80 ); /* Grp1 Ib,Eb */
+   VG_(emit_amode_ereg_greg) ( reg, mkGrp1opcode(opc) );
+   VG_(emitB) ( lit & 0x000000FF );
    if (dis)
       VG_(printf)( "\n\t\t%sb\t$0x%x, %s\n", VG_(nameUOpcode)(False,opc),
                                              lit, nameIReg(1,reg));
@@ -706,69 +755,68 @@
 
 static void emit_shiftopb_lit_reg ( Opcode opc, UInt lit, Int reg )
 {
-   newEmit();
-   emitB ( 0xC0 ); /* Grp2 Ib,Eb */
-   emit_amode_ereg_greg ( reg, mkGrp2opcode(opc) );
-   emitB ( lit );
+   VG_(newEmit)();
+   VG_(emitB) ( 0xC0 ); /* Grp2 Ib,Eb */
+   VG_(emit_amode_ereg_greg) ( reg, mkGrp2opcode(opc) );
+   VG_(emitB) ( lit );
    if (dis)
       VG_(printf)( "\n\t\t%sb\t$%d, %s\n", 
                    VG_(nameUOpcode)(False,opc),
                    lit, nameIReg(1,reg));
 }
 
-static void emit_unaryopb_reg ( Opcode opc, Int reg )
+void VG_(emit_unaryopb_reg) ( Opcode opc, Int reg )
 {
-   newEmit();
+   VG_(newEmit)();
    switch (opc) {
       case INC:
-         emitB ( 0xFE );
-         emit_amode_ereg_greg ( reg, mkGrp4opcode(INC) );
+         VG_(emitB) ( 0xFE );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp4opcode(INC) );
          if (dis)
             VG_(printf)( "\n\t\tincb\t%s\n", nameIReg(1,reg));
          break;
       case DEC:
-         emitB ( 0xFE );
-         emit_amode_ereg_greg ( reg, mkGrp4opcode(DEC) );
+         VG_(emitB) ( 0xFE );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp4opcode(DEC) );
          if (dis)
             VG_(printf)( "\n\t\tdecb\t%s\n", nameIReg(1,reg));
          break;
       case NOT:
-         emitB ( 0xF6 );
-         emit_amode_ereg_greg ( reg, mkGrp3opcode(NOT) );
+         VG_(emitB) ( 0xF6 );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp3opcode(NOT) );
          if (dis)
             VG_(printf)( "\n\t\tnotb\t%s\n", nameIReg(1,reg));
          break;
       case NEG:
-         emitB ( 0xF6 );
-         emit_amode_ereg_greg ( reg, mkGrp3opcode(NEG) );
+         VG_(emitB) ( 0xF6 );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp3opcode(NEG) );
          if (dis)
             VG_(printf)( "\n\t\tnegb\t%s\n", nameIReg(1,reg));
          break;
       default: 
-         VG_(panic)("emit_unaryopb_reg");
+         VG_(panic)("VG_(emit_unaryopb_reg)");
    }
 }
 
-static void emit_testb_lit_reg ( UInt lit, Int reg )
+void VG_(emit_testb_lit_reg) ( UInt lit, Int reg )
 {
-   newEmit();
-   emitB ( 0xF6 ); /* Grp3 Eb */
-   emit_amode_ereg_greg ( reg, 0 /* Grp3 subopcode for TEST */ );
-   emitB ( lit );
+   VG_(newEmit)();
+   VG_(emitB) ( 0xF6 ); /* Grp3 Eb */
+   VG_(emit_amode_ereg_greg) ( reg, 0 /* Grp3 subopcode for TEST */ );
+   VG_(emitB) ( lit );
    if (dis)
       VG_(printf)("\n\t\ttestb $0x%x, %s\n", lit, nameIReg(1,reg));
 }
 
-
 /*----------------------------------------------------*/
 /*--- zero-extended load emitters                  ---*/
 /*----------------------------------------------------*/
 
-static void emit_movzbl_offregmem_reg ( Int off, Int regmem, Int reg )
+void VG_(emit_movzbl_offregmem_reg) ( Int off, Int regmem, Int reg )
 {
-   newEmit();
-   emitB ( 0x0F ); emitB ( 0xB6 ); /* MOVZBL */
-   emit_amode_offregmem_reg ( off, regmem, reg );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB6 ); /* MOVZBL */
+   VG_(emit_amode_offregmem_reg) ( off, regmem, reg );
    if (dis)
       VG_(printf)( "\n\t\tmovzbl\t0x%x(%s), %s\n", 
                    off, nameIReg(4,regmem), nameIReg(4,reg));
@@ -776,19 +824,19 @@
 
 static void emit_movzbl_regmem_reg ( Int reg1, Int reg2 )
 {
-   newEmit();
-   emitB ( 0x0F ); emitB ( 0xB6 ); /* MOVZBL */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB6 ); /* MOVZBL */
    emit_amode_regmem_reg ( reg1, reg2 );
    if (dis)
       VG_(printf)( "\n\t\tmovzbl\t(%s), %s\n", nameIReg(4,reg1), 
                                                nameIReg(4,reg2));
 }
 
-static void emit_movzwl_offregmem_reg ( Int off, Int areg, Int reg )
+void VG_(emit_movzwl_offregmem_reg) ( Int off, Int areg, Int reg )
 {
-   newEmit();
-   emitB ( 0x0F ); emitB ( 0xB7 ); /* MOVZWL */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB7 ); /* MOVZWL */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\tmovzwl\t0x%x(%s), %s\n",
                    off, nameIReg(4,areg), nameIReg(4,reg));
@@ -796,8 +844,8 @@
 
 static void emit_movzwl_regmem_reg ( Int reg1, Int reg2 )
 {
-   newEmit();
-   emitB ( 0x0F ); emitB ( 0xB7 ); /* MOVZWL */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB7 ); /* MOVZWL */
    emit_amode_regmem_reg ( reg1, reg2 );
    if (dis)
       VG_(printf)( "\n\t\tmovzwl\t(%s), %s\n", nameIReg(4,reg1), 
@@ -811,9 +859,9 @@
 static void emit_get_fpu_state ( void )
 {
    Int off = 4 * VGOFF_(m_fpustate);
-   newEmit();
-   emitB ( 0xDD ); emitB ( 0xA5 ); /* frstor d32(%ebp) */
-   emitL ( off );
+   VG_(newEmit)();
+   VG_(emitB) ( 0xDD ); VG_(emitB) ( 0xA5 ); /* frstor d32(%ebp) */
+   VG_(emitL) ( off );
    if (dis)
       VG_(printf)("\n\t\tfrstor\t%d(%%ebp)\n", off );
 }
@@ -821,9 +869,9 @@
 static void emit_put_fpu_state ( void )
 {
    Int off = 4 * VGOFF_(m_fpustate);
-   newEmit();
-   emitB ( 0xDD ); emitB ( 0xB5 ); /* fnsave d32(%ebp) */
-   emitL ( off );
+   VG_(newEmit)();
+   VG_(emitB) ( 0xDD ); VG_(emitB) ( 0xB5 ); /* fnsave d32(%ebp) */
+   VG_(emitL) ( off );
    if (dis)
       VG_(printf)("\n\t\tfnsave\t%d(%%ebp)\n", off );
 }
@@ -831,9 +879,9 @@
 static void emit_fpu_no_mem ( UChar first_byte, 
                               UChar second_byte )
 {
-   newEmit();
-   emitB ( first_byte );
-   emitB ( second_byte );
+   VG_(newEmit)();
+   VG_(emitB) ( first_byte );
+   VG_(emitB) ( second_byte );
    if (dis)
       VG_(printf)("\n\t\tfpu-0x%x:0x%x\n", 
                   (UInt)first_byte, (UInt)second_byte );
@@ -843,8 +891,8 @@
                               UChar second_byte_masked, 
                               Int reg )
 {
-   newEmit();
-   emitB ( first_byte );
+   VG_(newEmit)();
+   VG_(emitB) ( first_byte );
    emit_amode_regmem_reg ( reg, second_byte_masked >> 3 );
    if (dis)
       VG_(printf)("\n\t\tfpu-0x%x:0x%x-(%s)\n", 
@@ -857,27 +905,26 @@
 /*--- misc instruction emitters                    ---*/
 /*----------------------------------------------------*/
 
-static void emit_call_reg ( Int reg )
-{
-   newEmit();
-   emitB ( 0xFF ); /* Grp5 */
-   emit_amode_ereg_greg ( reg, mkGrp5opcode(CALLM) );
-   if (dis)
+void VG_(emit_call_reg) ( Int reg )
+{           
+   VG_(newEmit)();
+   VG_(emitB) ( 0xFF ); /* Grp5 */
+   VG_(emit_amode_ereg_greg) ( reg, mkGrp5opcode(CALLM) );
+   if (dis) 
       VG_(printf)( "\n\t\tcall\t*%s\n", nameIReg(4,reg) );
-}
-
-
+}              
+         
 static void emit_call_star_EBP_off ( Int byte_off )
 {
-  newEmit();
+  VG_(newEmit)();
   if (byte_off < -128 || byte_off > 127) {
-     emitB ( 0xFF );
-     emitB ( 0x95 );
-     emitL ( byte_off );
+     VG_(emitB) ( 0xFF );
+     VG_(emitB) ( 0x95 );
+     VG_(emitL) ( byte_off );
   } else {
-     emitB ( 0xFF );
-     emitB ( 0x55 );
-     emitB ( byte_off );
+     VG_(emitB) ( 0xFF );
+     VG_(emitB) ( 0x55 );
+     VG_(emitB) ( byte_off );
   }
   if (dis)
      VG_(printf)( "\n\t\tcall * %d(%%ebp)\n", byte_off );
@@ -887,24 +934,24 @@
 static void emit_addlit8_offregmem ( Int lit8, Int regmem, Int off )
 {
    vg_assert(lit8 >= -128 && lit8 < 128);
-   newEmit();
-   emitB ( 0x83 ); /* Grp1 Ib,Ev */
-   emit_amode_offregmem_reg ( off, regmem, 
+   VG_(newEmit)();
+   VG_(emitB) ( 0x83 ); /* Grp1 Ib,Ev */
+   VG_(emit_amode_offregmem_reg) ( off, regmem, 
                               0 /* Grp1 subopcode for ADD */ );
-   emitB ( lit8 & 0xFF );
+   VG_(emitB) ( lit8 & 0xFF );
    if (dis)
       VG_(printf)( "\n\t\taddl $%d, %d(%s)\n", lit8, off, 
                                                nameIReg(4,regmem));
 }
 
 
-static void emit_add_lit_to_esp ( Int lit )
+void VG_(emit_add_lit_to_esp) ( Int lit )
 {
-   if (lit < -128 || lit > 127) VG_(panic)("emit_add_lit_to_esp");
-   newEmit();
-   emitB ( 0x83 );
-   emitB ( 0xC4 );
-   emitB ( lit & 0xFF );
+   if (lit < -128 || lit > 127) VG_(panic)("VG_(emit_add_lit_to_esp)");
+   VG_(newEmit)();
+   VG_(emitB) ( 0x83 );
+   VG_(emitB) ( 0xC4 );
+   VG_(emitB) ( lit & 0xFF );
    if (dis)
       VG_(printf)( "\n\t\taddl $%d, %%esp\n", lit );
 }
@@ -914,11 +961,11 @@
 {
    /* movb %al, 0(%esp) */
    /* 88442400              movb    %al, 0(%esp) */
-   newEmit();
-   emitB ( 0x88 );
-   emitB ( 0x44 );
-   emitB ( 0x24 );
-   emitB ( 0x00 );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x88 );
+   VG_(emitB) ( 0x44 );
+   VG_(emitB) ( 0x24 );
+   VG_(emitB) ( 0x00 );
    if (dis)
       VG_(printf)( "\n\t\tmovb %%al, 0(%%esp)\n" );
 }
@@ -927,11 +974,11 @@
 {
    /* movb 0(%esp), %al */
    /* 8A442400              movb    0(%esp), %al */
-   newEmit();
-   emitB ( 0x8A );
-   emitB ( 0x44 );
-   emitB ( 0x24 );
-   emitB ( 0x00 );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x8A );
+   VG_(emitB) ( 0x44 );
+   VG_(emitB) ( 0x24 );
+   VG_(emitB) ( 0x00 );
    if (dis)
       VG_(printf)( "\n\t\tmovb 0(%%esp), %%al\n" );
 }
@@ -940,12 +987,12 @@
 /* Emit a jump short with an 8-bit signed offset.  Note that the
    offset is that which should be added to %eip once %eip has been
    advanced over this insn.  */
-static void emit_jcondshort_delta ( Condcode cond, Int delta )
+void VG_(emit_jcondshort_delta) ( Condcode cond, Int delta )
 {
    vg_assert(delta >= -128 && delta <= 127);
-   newEmit();
-   emitB ( 0x70 + (UInt)cond );
-   emitB ( (UChar)delta );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x70 + (UInt)cond );
+   VG_(emitB) ( (UChar)delta );
    if (dis)
       VG_(printf)( "\n\t\tj%s-8\t%%eip+%d\n", 
                    VG_(nameCondcode)(cond), delta );
@@ -955,11 +1002,11 @@
 {
    Int off = 4 * VGOFF_(m_eflags);
    vg_assert(off >= 0 && off < 128);
-   newEmit();
-   emitB ( 0xFF ); /* PUSHL off(%ebp) */
-   emitB ( 0x75 );
-   emitB ( off );
-   emitB ( 0x9D ); /* POPFL */
+   VG_(newEmit)();
+   VG_(emitB) ( 0xFF ); /* PUSHL off(%ebp) */
+   VG_(emitB) ( 0x75 );
+   VG_(emitB) ( off );
+   VG_(emitB) ( 0x9D ); /* POPFL */
    if (dis)
       VG_(printf)( "\n\t\tpushl %d(%%ebp) ; popfl\n", off );
 }
@@ -968,20 +1015,20 @@
 {
    Int off = 4 * VGOFF_(m_eflags);
    vg_assert(off >= 0 && off < 128);
-   newEmit();
-   emitB ( 0x9C ); /* PUSHFL */
-   emitB ( 0x8F ); /* POPL vg_m_state.m_eflags */
-   emitB ( 0x45 );
-   emitB ( off );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x9C ); /* PUSHFL */
+   VG_(emitB) ( 0x8F ); /* POPL vg_m_state.m_eflags */
+   VG_(emitB) ( 0x45 );
+   VG_(emitB) ( off );
    if (dis)
       VG_(printf)( "\n\t\tpushfl ; popl %d(%%ebp)\n", off );
 }
 
 static void emit_setb_reg ( Int reg, Condcode cond )
 {
-   newEmit();
-   emitB ( 0x0F ); emitB ( 0x90 + (UChar)cond );
-   emit_amode_ereg_greg ( reg, 0 );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F ); VG_(emitB) ( 0x90 + (UChar)cond );
+   VG_(emit_amode_ereg_greg) ( reg, 0 );
    if (dis)
       VG_(printf)("\n\t\tset%s %s\n", 
                   VG_(nameCondcode)(cond), nameIReg(1,reg));
@@ -989,33 +1036,33 @@
 
 static void emit_ret ( void )
 {
-   newEmit();
-   emitB ( 0xC3 ); /* RET */
+   VG_(newEmit)();
+   VG_(emitB) ( 0xC3 ); /* RET */
    if (dis)
       VG_(printf)("\n\t\tret\n");
 }
 
-static void emit_pushal ( void )
+void VG_(emit_pushal) ( void )
 {
-   newEmit();
-   emitB ( 0x60 ); /* PUSHAL */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x60 ); /* PUSHAL */
    if (dis)
       VG_(printf)("\n\t\tpushal\n");
 }
 
-static void emit_popal ( void )
+void VG_(emit_popal) ( void )
 {
-   newEmit();
-   emitB ( 0x61 ); /* POPAL */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x61 ); /* POPAL */
    if (dis)
       VG_(printf)("\n\t\tpopal\n");
 }
 
 static void emit_lea_litreg_reg ( UInt lit, Int regmem, Int reg )
 {
-   newEmit();
-   emitB ( 0x8D ); /* LEA M,Gv */
-   emit_amode_offregmem_reg ( (Int)lit, regmem, reg );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x8D ); /* LEA M,Gv */
+   VG_(emit_amode_offregmem_reg) ( (Int)lit, regmem, reg );
    if (dis)
       VG_(printf)("\n\t\tleal 0x%x(%s), %s\n",
                   lit, nameIReg(4,regmem), nameIReg(4,reg) );
@@ -1024,8 +1071,8 @@
 static void emit_lea_sib_reg ( UInt lit, Int scale,
 			       Int regbase, Int regindex, Int reg )
 {
-   newEmit();
-   emitB ( 0x8D ); /* LEA M,Gv */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x8D ); /* LEA M,Gv */
    emit_amode_sib_reg ( (Int)lit, scale, regbase, regindex, reg );
    if (dis)
       VG_(printf)("\n\t\tleal 0x%x(%s,%s,%d), %s\n",
@@ -1034,17 +1081,51 @@
                        nameIReg(4,reg) );
 }
 
-static void emit_AMD_prefetch_reg ( Int reg )
+void VG_(emit_AMD_prefetch_reg) ( Int reg )
 {
-   newEmit();
-   emitB ( 0x0F );
-   emitB ( 0x0D );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F );
+   VG_(emitB) ( 0x0D );
    emit_amode_regmem_reg ( reg, 1 /* 0 is prefetch; 1 is prefetchw */ );
    if (dis)
       VG_(printf)("\n\t\tamd-prefetch (%s)\n", nameIReg(4,reg) );
 }
 
 /*----------------------------------------------------*/
+/*--- Helper offset -> addr translation            ---*/
+/*----------------------------------------------------*/
+
+/* Finds the baseBlock offset of a skin-specified helper.
+ * Searches through compacts first, then non-compacts. */
+Int VG_(helper_offset)(Addr a)
+{
+   Int i;
+
+   for (i = 0; i < VG_(n_compact_helpers); i++)
+      if (VG_(compact_helper_addrs)[i] == a)
+         return VG_(compact_helper_offsets)[i];
+   for (i = 0; i < VG_(n_noncompact_helpers); i++)
+      if (VG_(noncompact_helper_addrs)[i] == a)
+         return VG_(noncompact_helper_offsets)[i];
+
+   /* Shouldn't get here */
+   VG_(printf)(
+      "\nCouldn't find offset of helper from its address (%p).\n"
+      "A helper function probably used hasn't been registered?\n\n", a);
+
+   VG_(printf)("      compact helpers: ");
+   for (i = 0; i < VG_(n_compact_helpers); i++)
+      VG_(printf)("%p ", VG_(compact_helper_addrs)[i]);
+
+   VG_(printf)("\n  non-compact helpers: ");
+   for (i = 0; i < VG_(n_noncompact_helpers); i++)
+      VG_(printf)("%p ", VG_(noncompact_helper_addrs)[i]);
+
+   VG_(printf)("\n");
+   VG_(skin_error)("Unfound helper");
+}
+
+/*----------------------------------------------------*/
 /*--- Instruction synthesisers                     ---*/
 /*----------------------------------------------------*/
 
@@ -1057,8 +1138,7 @@
 /* Synthesise a call to *baseBlock[offset], ie,
    call * (4 x offset)(%ebp).
 */
-static void synth_call_baseBlock_method ( Bool ensure_shortform, 
-                                          Int word_offset )
+void VG_(synth_call) ( Bool ensure_shortform, Int word_offset )
 {
    vg_assert(word_offset >= 0);
    vg_assert(word_offset < VG_BASEBLOCK_WORDS);
@@ -1067,42 +1147,237 @@
    emit_call_star_EBP_off ( 4 * word_offset );
 }
 
-static void synth_ccall_saveRegs ( void )
+static void maybe_emit_movl_reg_reg ( UInt src, UInt dst )
 {
-   emit_pushv_reg ( 4, R_EAX ); 
-   emit_pushv_reg ( 4, R_ECX ); 
-   emit_pushv_reg ( 4, R_EDX ); 
+   if (src != dst) {
+      VG_(emit_movv_reg_reg) ( 4, src, dst );
+      ccall_arg_setup_instrs++;
+   }
 }
+
+/* 'maybe' because it is sometimes skipped eg. for "movl %eax,%eax" */
+static void maybe_emit_movl_litOrReg_reg ( UInt litOrReg, Tag tag, UInt reg )
+{
+   if (RealReg == tag) {
+      maybe_emit_movl_reg_reg ( litOrReg, reg );
+   } else if (Literal == tag) {
+      VG_(emit_movv_lit_reg) ( 4, litOrReg, reg );
+      ccall_arg_setup_instrs++;
+   }
+   else
+      VG_(panic)("emit_movl_litOrReg_reg: unexpected tag");
+}
+
+static
+void emit_swapl_arg_regs ( UInt reg1, UInt reg2 )
+{
+   if        (R_EAX == reg1) {
+      VG_(emit_swapl_reg_EAX) ( reg2 );
+   } else if (R_EAX == reg2) {
+      VG_(emit_swapl_reg_EAX) ( reg1 );
+   } else {
+      emit_swapl_reg_reg ( reg1, reg2 );
+   }
+   ccall_arg_setup_instrs++;
+}
+
+static
+void emit_two_regs_args_setup ( UInt src1, UInt src2, UInt dst1, UInt dst2)
+{
+   if        (dst1 != src2) {
+      maybe_emit_movl_reg_reg ( src1, dst1 );
+      maybe_emit_movl_reg_reg ( src2, dst2 );
+
+   } else if (dst2 != src1) {
+      maybe_emit_movl_reg_reg ( src2, dst2 );
+      maybe_emit_movl_reg_reg ( src1, dst1 );
+
+   } else {
+      /* swap to break cycle */
+      emit_swapl_arg_regs ( dst1, dst2 );
+   }
+}
+
+static
+void emit_three_regs_args_setup ( UInt src1, UInt src2, UInt src3,
+                                  UInt dst1, UInt dst2, UInt dst3)
+{
+   if        (dst1 != src2 && dst1 != src3) {
+      maybe_emit_movl_reg_reg ( src1, dst1 );
+      emit_two_regs_args_setup ( src2, src3, dst2, dst3 );
+
+   } else if (dst2 != src1 && dst2 != src3) {
+      maybe_emit_movl_reg_reg ( src2, dst2 );
+      emit_two_regs_args_setup ( src1, src3, dst1, dst3 );
+
+   } else if (dst3 != src1 && dst3 != src2) {
+      maybe_emit_movl_reg_reg ( src3, dst3 );
+      emit_two_regs_args_setup ( src1, src2, dst1, dst2 );
+      
+   } else {
+      /* break cycle */
+      if        (dst1 == src2 && dst2 == src3 && dst3 == src1) {
+         emit_swapl_arg_regs ( dst1, dst2 );
+         emit_swapl_arg_regs ( dst1, dst3 );
+
+      } else if (dst1 == src3 && dst2 == src1 && dst3 == src2) {
+         emit_swapl_arg_regs ( dst1, dst3 );
+         emit_swapl_arg_regs ( dst1, dst2 );
+
+      } else {
+         VG_(panic)("impossible 3-cycle");
+      }
+   }
+}
+
+static
+void emit_two_regs_or_lits_args_setup ( UInt argv[], Tag tagv[],
+                                        UInt src1, UInt src2,
+                                        UInt dst1, UInt dst2)
+{
+   /* If either are lits, order doesn't matter */
+   if (Literal == tagv[src1] || Literal == tagv[src2]) {
+      maybe_emit_movl_litOrReg_reg ( argv[src1], tagv[src1], dst1 );
+      maybe_emit_movl_litOrReg_reg ( argv[src2], tagv[src2], dst2 );
+
+   } else {
+      emit_two_regs_args_setup ( argv[src1], argv[src2], dst1, dst2 );
+   }
+}
+
+static
+void emit_three_regs_or_lits_args_setup ( UInt argv[], Tag tagv[],
+                                          UInt src1, UInt src2, UInt src3,
+                                          UInt dst1, UInt dst2, UInt dst3)
+{
+   // SSS: fix this eventually -- make STOREV use two RealRegs?
+   /* Not supporting literals for 3-arg C functions -- they're only used
+      by STOREV which has 2 args */
+   vg_assert(RealReg == tagv[src1] &&
+             RealReg == tagv[src2] &&
+             RealReg == tagv[src3]);
+   emit_three_regs_args_setup ( argv[src1], argv[src2], argv[src3],
+                                dst1, dst2, dst3 );
+}
+
+/* Synthesise a call to a C function `fn' (which must be registered in
+   baseBlock) doing all the reg saving and arg handling work.
+ 
+   WARNING:  a UInstr should *not* be translated with synth_ccall followed
+   by some other x86 assembly code;  vg_liveness_analysis() doesn't expect
+   such behaviour and everything will fall over.
+ */
+void VG_(synth_ccall) ( Addr fn, Int argc, Int regparms_n, UInt argv[],
+                        Tag tagv[], Int ret_reg,
+                        RRegSet regs_live_before, RRegSet regs_live_after )
+{
+   Int  i;
+   Int  stack_used = 0;
+   Bool preserve_eax, preserve_ecx, preserve_edx;
+
+   vg_assert(0 <= regparms_n && regparms_n <= 3);
+
+   ccalls++;
+
+   /* If %e[acd]x is live before and after the C call, save/restore it.
+      Unless the return values clobbers the reg;  in this case we must not
+      save/restore the reg, because the restore would clobber the return
+      value.  (Before and after the UInstr really constitute separate live
+      ranges, but you miss this if you don't consider what happens during
+      the UInstr.) */
+#  define PRESERVE_REG(realReg)   \
+   (IS_RREG_LIVE(VG_(realRegNumToRank)(realReg), regs_live_before) &&   \
+    IS_RREG_LIVE(VG_(realRegNumToRank)(realReg), regs_live_after)  &&   \
+    ret_reg != realReg)
+
+   preserve_eax = PRESERVE_REG(R_EAX);
+   preserve_ecx = PRESERVE_REG(R_ECX);
+   preserve_edx = PRESERVE_REG(R_EDX);
+
+#  undef PRESERVE_REG
+
+   /* Save caller-save regs as required */
+   if (preserve_eax) { VG_(emit_pushv_reg) ( 4, R_EAX ); ccall_reg_saves++; }
+   if (preserve_ecx) { VG_(emit_pushv_reg) ( 4, R_ECX ); ccall_reg_saves++; }
+   if (preserve_edx) { VG_(emit_pushv_reg) ( 4, R_EDX ); ccall_reg_saves++; }
+
+   /* Args are passed in two groups: (a) via stack (b) via regs.  regparms_n
+      is the number of args passed in regs (maximum 3 for GCC on x86). */
+
+   ccall_args += argc;
    
-static void synth_ccall_pushOneArg ( Int r1 )
-{
-   emit_pushv_reg ( 4, r1 );
-}
+   /* First push stack args (RealRegs or Literals) in reverse order. */
+   for (i = argc-1; i >= regparms_n; i--) {
+      switch (tagv[i]) {
+      case RealReg:
+         VG_(emit_pushv_reg) ( 4, argv[i] );
+         break;
+      case Literal:
+         /* Use short form of pushl if possible. */
+         if (argv[i] == VG_(extend_s_8to32) ( argv[i] ))
+            VG_(emit_pushl_lit8) ( VG_(extend_s_8to32)(argv[i]) );
+         else
+            VG_(emit_pushl_lit32)( argv[i] );
+         break;
+      default:
+         VG_(printf)("tag=%d\n", tagv[i]);
+         VG_(panic)("VG_(synth_ccall): bad tag");
+      }
+      stack_used += 4;
+      ccall_arg_setup_instrs++;
+   }
 
-static void synth_ccall_pushTwoArgs ( Int r1, Int r2 )
-{
-   /* must push in reverse order */
-   emit_pushv_reg ( 4, r2 );
-   emit_pushv_reg ( 4, r1 );
-}
+   /* Then setup args in registers (arg[123] --> %e[adc]x;  note order!).
+      If moving values between registers, be careful not to clobber any on
+      the way.  Happily we can use xchgl to swap registers.
+   */
+   switch (regparms_n) {
 
-/* Synthesise a call to *baseBlock[offset], ie,
-   call * (4 x offset)(%ebp) with arguments
-*/
-static void synth_ccall_call_clearStack_restoreRegs ( Int word_offset, 
-                                                      UInt n_args_bytes )
-{
-   vg_assert(word_offset >= 0);
-   vg_assert(word_offset < VG_BASEBLOCK_WORDS);
-   vg_assert(n_args_bytes <= 12);           /* Max 3 word-sized args */
-   vg_assert(0 == (n_args_bytes & 0x3));    /* Divisible by four */
+   /* Trickiest.  Args passed in %eax, %edx, and %ecx. */
+   case 3:
+      emit_three_regs_or_lits_args_setup ( argv, tagv, 0, 1, 2,
+                                           R_EAX, R_EDX, R_ECX );
+      break;
 
-   emit_call_star_EBP_off ( 4 * word_offset );
-   if ( 0 != n_args_bytes )
-      emit_add_lit_to_esp ( n_args_bytes );
-   emit_popv_reg ( 4, R_EDX ); 
-   emit_popv_reg ( 4, R_ECX ); 
-   emit_popv_reg ( 4, R_EAX ); 
+   /* Less-tricky.  Args passed in %eax and %edx. */
+   case 2:
+      emit_two_regs_or_lits_args_setup ( argv, tagv, 0, 1, R_EAX, R_EDX );
+      break;
+      
+   /* Easy.  Just move arg1 into %eax (if not already in there). */
+   case 1:  
+      maybe_emit_movl_litOrReg_reg ( argv[0], tagv[0], R_EAX );
+      break;
+
+   case 0:
+      break;
+
+   default:
+      VG_(panic)("VG_(synth_call): regparms_n value not in range 0..3");
+   }
+   
+   /* Call the function */
+   VG_(synth_call) ( False, VG_(helper_offset) ( fn ) );
+
+   /* Clear any args from stack */
+   if (0 != stack_used) {
+      VG_(emit_add_lit_to_esp) ( stack_used );
+      ccall_stack_clears++;
+   }
+
+   /* Move return value into ret_reg if necessary and not already there */
+   if (INVALID_REALREG != ret_reg) {
+      ccall_retvals++;
+      if (R_EAX != ret_reg) {
+         VG_(emit_movv_reg_reg) ( 4, R_EAX, ret_reg );
+         ccall_retval_movs++;
+      }
+   }
+
+   /* Restore live caller-save regs as required */
+   if (preserve_edx) VG_(emit_popv_reg) ( 4, R_EDX ); 
+   if (preserve_ecx) VG_(emit_popv_reg) ( 4, R_ECX ); 
+   if (preserve_eax) VG_(emit_popv_reg) ( 4, R_EAX ); 
 }
 
 static void load_ebp_from_JmpKind ( JmpKind jmpkind )
@@ -1110,15 +1385,15 @@
    switch (jmpkind) {
       case JmpBoring: 
          break;
-      case JmpCall:
       case JmpRet: 
-         emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_STKADJ, R_EBP );
+         break;
+      case JmpCall:
          break;
       case JmpSyscall: 
-         emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_SYSCALL, R_EBP );
+         VG_(emit_movv_lit_reg) ( 4, VG_TRC_EBP_JMP_SYSCALL, R_EBP );
          break;
       case JmpClientReq: 
-         emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_CLIENTREQ, R_EBP );
+         VG_(emit_movv_lit_reg) ( 4, VG_TRC_EBP_JMP_CLIENTREQ, R_EBP );
          break;
       default: 
          VG_(panic)("load_ebp_from_JmpKind");
@@ -1133,7 +1408,7 @@
 {
    load_ebp_from_JmpKind ( jmpkind );
    if (reg != R_EAX)
-      emit_movv_reg_reg ( 4, reg, R_EAX );
+      VG_(emit_movv_reg_reg) ( 4, reg, R_EAX );
    emit_ret();
 }
 
@@ -1142,7 +1417,7 @@
 static void synth_jmp_lit ( Addr addr, JmpKind jmpkind )
 {
    load_ebp_from_JmpKind ( jmpkind );
-   emit_movv_lit_reg ( 4, addr, R_EAX );
+   VG_(emit_movv_lit_reg) ( 4, addr, R_EAX );
    emit_ret();
 }
 
@@ -1163,7 +1438,7 @@
    6                    xyxyxy:
   */
    emit_get_eflags();
-   emit_jcondshort_delta ( invertCondition(cond), 5+1 );
+   VG_(emit_jcondshort_delta) ( invertCondition(cond), 5+1 );
    synth_jmp_lit ( addr, JmpBoring );
 }
 
@@ -1176,8 +1451,8 @@
       000a C3                    ret
       next:
    */
-   emit_cmpl_zero_reg ( reg );
-   emit_jcondshort_delta ( CondNZ, 5+1 );
+   VG_(emit_cmpl_zero_reg) ( reg );
+   VG_(emit_jcondshort_delta) ( CondNZ, 5+1 );
    synth_jmp_lit ( addr, JmpBoring );
 }
 
@@ -1186,7 +1461,7 @@
 {
    /* Load the zero-extended literal into reg, at size l,
       regardless of the request size. */
-   emit_movv_lit_reg ( 4, lit, reg );
+   VG_(emit_movv_lit_reg) ( 4, lit, reg );
 }
 
 
@@ -1204,9 +1479,9 @@
 static void synth_mov_offregmem_reg ( Int size, Int off, Int areg, Int reg ) 
 {
    switch (size) {
-      case 4: emit_movv_offregmem_reg ( 4, off, areg, reg ); break;
-      case 2: emit_movzwl_offregmem_reg ( off, areg, reg ); break;
-      case 1: emit_movzbl_offregmem_reg ( off, areg, reg ); break;
+      case 4: VG_(emit_movv_offregmem_reg) ( 4, off, areg, reg ); break;
+      case 2: VG_(emit_movzwl_offregmem_reg) ( off, areg, reg ); break;
+      case 1: VG_(emit_movzbl_offregmem_reg) ( off, areg, reg ); break;
       default: VG_(panic)("synth_mov_offregmem_reg");
    }  
 }
@@ -1216,15 +1491,15 @@
                                       Int off, Int areg )
 {
    switch (size) {
-      case 4: emit_movv_reg_offregmem ( 4, reg, off, areg ); break;
-      case 2: emit_movv_reg_offregmem ( 2, reg, off, areg ); break;
+      case 4: VG_(emit_movv_reg_offregmem) ( 4, reg, off, areg ); break;
+      case 2: VG_(emit_movv_reg_offregmem) ( 2, reg, off, areg ); break;
       case 1: if (reg < 4) {
-                 emit_movb_reg_offregmem ( reg, off, areg ); 
+                 VG_(emit_movb_reg_offregmem) ( reg, off, areg ); 
               }
               else {
-                 emit_swapl_reg_EAX ( reg );
-                 emit_movb_reg_offregmem ( R_AL, off, areg );
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
+                 VG_(emit_movb_reg_offregmem) ( R_AL, off, areg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
               }
               break;
       default: VG_(panic)("synth_mov_reg_offregmem");
@@ -1261,23 +1536,23 @@
    /* NB! opcode is a uinstr opcode, not an x86 one! */
    switch (size) {
       case 4: //if (rd_cc) emit_get_eflags();   (never needed --njn)
-              emit_unaryopv_reg ( 4, opcode, reg );
+              VG_(emit_unaryopv_reg) ( 4, opcode, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 2: //if (rd_cc) emit_get_eflags();   (never needed --njn)
-              emit_unaryopv_reg ( 2, opcode, reg );
+              VG_(emit_unaryopv_reg) ( 2, opcode, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 1: if (reg < 4) {
                  //if (rd_cc) emit_get_eflags();    (never needed --njn)
-                 emit_unaryopb_reg ( opcode, reg );
+                 VG_(emit_unaryopb_reg) ( opcode, reg );
                  if (wr_cc) emit_put_eflags();
               } else {
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
                  //if (rd_cc) emit_get_eflags();    (never needed --njn)
-                 emit_unaryopb_reg ( opcode, R_AL );
+                 VG_(emit_unaryopb_reg) ( opcode, R_AL );
                  if (wr_cc) emit_put_eflags();
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
               }
               break;
       default: VG_(panic)("synth_unaryop_reg");
@@ -1293,11 +1568,11 @@
    /* NB! opcode is a uinstr opcode, not an x86 one! */
    switch (size) {
       case 4: if (rd_cc) emit_get_eflags();
-              emit_nonshiftopv_reg_reg ( 4, opcode, reg1, reg2 );
+              VG_(emit_nonshiftopv_reg_reg) ( 4, opcode, reg1, reg2 );
               if (wr_cc) emit_put_eflags();
               break;
       case 2: if (rd_cc) emit_get_eflags();
-              emit_nonshiftopv_reg_reg ( 2, opcode, reg1, reg2 );
+              VG_(emit_nonshiftopv_reg_reg) ( 2, opcode, reg1, reg2 );
               if (wr_cc) emit_put_eflags();
               break;
       case 1: { /* Horrible ... */
@@ -1377,11 +1652,11 @@
             emit_nonshiftopb_offregmem_reg ( opcode, off, areg, reg );
             if (wr_cc) emit_put_eflags();
          } else {
-            emit_swapl_reg_EAX ( reg );
+            VG_(emit_swapl_reg_EAX) ( reg );
             if (rd_cc) emit_get_eflags();
             emit_nonshiftopb_offregmem_reg ( opcode, off, areg, R_AL );
             if (wr_cc) emit_put_eflags();
-            emit_swapl_reg_EAX ( reg );
+            VG_(emit_swapl_reg_EAX) ( reg );
          }
          break;
       default: 
@@ -1396,11 +1671,11 @@
 {
    switch (size) {
       case 4: if (rd_cc) emit_get_eflags();
-              emit_nonshiftopv_lit_reg ( 4, opcode, lit, reg );
+              VG_(emit_nonshiftopv_lit_reg) ( 4, opcode, lit, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 2: if (rd_cc) emit_get_eflags();
-              emit_nonshiftopv_lit_reg ( 2, opcode, lit, reg );
+              VG_(emit_nonshiftopv_lit_reg) ( 2, opcode, lit, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 1: if (reg < 4) {
@@ -1408,11 +1683,11 @@
                  emit_nonshiftopb_lit_reg ( opcode, lit, reg );
                  if (wr_cc) emit_put_eflags();
               } else {
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
                  if (rd_cc) emit_get_eflags();
                  emit_nonshiftopb_lit_reg ( opcode, lit, R_AL );
                  if (wr_cc) emit_put_eflags();
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
               }
               break;
       default: VG_(panic)("synth_nonshiftop_lit_reg");
@@ -1424,19 +1699,19 @@
 {
    switch (size) {
       case 4: 
-         emit_pushv_reg ( 4, reg ); 
+         VG_(emit_pushv_reg) ( 4, reg ); 
          break;
       case 2: 
-         emit_pushv_reg ( 2, reg ); 
+         VG_(emit_pushv_reg) ( 2, reg ); 
          break;
       /* Pray that we don't have to generate this really cruddy bit of
          code very often.  Could do better, but can I be bothered? */
       case 1: 
          vg_assert(reg != R_ESP); /* duh */
-         emit_add_lit_to_esp(-1);
-         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         VG_(emit_add_lit_to_esp)(-1);
+         if (reg != R_EAX) VG_(emit_swapl_reg_EAX) ( reg );
          emit_movb_AL_zeroESPmem();
-         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         if (reg != R_EAX) VG_(emit_swapl_reg_EAX) ( reg );
          break;
      default: 
          VG_(panic)("synth_push_reg");
@@ -1448,18 +1723,18 @@
 {
    switch (size) {
       case 4: 
-         emit_popv_reg ( 4, reg ); 
+         VG_(emit_popv_reg) ( 4, reg ); 
          break;
       case 2: 
-         emit_popv_reg ( 2, reg ); 
+         VG_(emit_popv_reg) ( 2, reg ); 
          break;
       case 1:
          /* Same comment as above applies. */
          vg_assert(reg != R_ESP); /* duh */
-         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         if (reg != R_EAX) VG_(emit_swapl_reg_EAX) ( reg );
          emit_movb_zeroESPmem_AL();
-         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
-         emit_add_lit_to_esp(1);
+         if (reg != R_EAX) VG_(emit_swapl_reg_EAX) ( reg );
+         VG_(emit_add_lit_to_esp)(1);
          break;
       default: VG_(panic)("synth_pop_reg");
    }
@@ -1491,11 +1766,11 @@
 {
    switch (size) {
       case 4: if (rd_cc) emit_get_eflags();
-              emit_shiftopv_lit_reg ( 4, opcode, lit, reg );
+              VG_(emit_shiftopv_lit_reg) ( 4, opcode, lit, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 2: if (rd_cc) emit_get_eflags();
-              emit_shiftopv_lit_reg ( 2, opcode, lit, reg );
+              VG_(emit_shiftopv_lit_reg) ( 2, opcode, lit, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 1: if (reg < 4) {
@@ -1503,11 +1778,11 @@
                  emit_shiftopb_lit_reg ( opcode, lit, reg );
                  if (wr_cc) emit_put_eflags();
               } else {
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
                  if (rd_cc) emit_get_eflags();
                  emit_shiftopb_lit_reg ( opcode, lit, R_AL );
                  if (wr_cc) emit_put_eflags();
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
               }
               break;
       default: VG_(panic)("synth_shiftop_lit_reg");
@@ -1521,9 +1796,9 @@
    if (reg < 4) {
       emit_setb_reg ( reg, cond );
    } else {
-      emit_swapl_reg_EAX ( reg );
+      VG_(emit_swapl_reg_EAX) ( reg );
       emit_setb_reg ( R_AL, cond );
-      emit_swapl_reg_EAX ( reg );
+      VG_(emit_swapl_reg_EAX) ( reg );
    }
 }
 
@@ -1555,42 +1830,18 @@
 static void synth_cmovl_reg_reg ( Condcode cond, Int src, Int dst )
 {
    emit_get_eflags();
-   emit_jcondshort_delta ( invertCondition(cond), 
+   VG_(emit_jcondshort_delta) ( invertCondition(cond), 
                            2 /* length of the next insn */ );
    emit_movl_reg_reg ( src, dst );
 }
 
 
-/* Synthesise a minimal test (and which discards result) of reg32
-   against lit.  It's always safe do simply
-      emit_testv_lit_reg ( 4, lit, reg32 )
-   but we try to do better when possible.
-*/
-static void synth_minimal_test_lit_reg ( UInt lit, Int reg32 )
-{
-   if ((lit & 0xFFFFFF00) == 0 && reg32 < 4) {
-      /* We can get away with a byte insn. */
-      emit_testb_lit_reg ( lit, reg32 );
-   }
-   else 
-   if ((lit & 0xFFFF0000) == 0) {
-      /* Literal fits in 16 bits; do a word insn. */
-      emit_testv_lit_reg ( 2, lit, reg32 );
-   }
-   else {
-      /* Totally general ... */
-      emit_testv_lit_reg ( 4, lit, reg32 );
-   }
-}
-
-
 /*----------------------------------------------------*/
 /*--- Top level of the uinstr -> x86 translation.  ---*/
 /*----------------------------------------------------*/
 
 /* Return the byte offset from %ebp (ie, into baseBlock)
    for the specified ArchReg or SpillNo. */
-
 static Int spillOrArchOffset ( Int size, Tag tag, UInt value )
 {
    if (tag == SpillNo) {
@@ -1621,14 +1872,15 @@
    VG_(panic)("spillOrArchOffset");
 }
 
-
 static Int eflagsOffset ( void )
 {
    return 4 * VGOFF_(m_eflags);
 }
 
 
-static Int shadowOffset ( Int arch )
+/* Return the byte offset from %ebp (ie, into baseBlock)
+   for the specified shadow register */
+Int VG_(shadowRegOffset) ( Int arch )
 {
    switch (arch) {
       case R_EAX: return 4 * VGOFF_(sh_eax);
@@ -1643,539 +1895,44 @@
    }
 }
 
-
-static Int shadowFlagsOffset ( void )
+Int VG_(shadowFlagsOffset) ( void )
 {
    return 4 * VGOFF_(sh_eflags);
 }
 
 
-static void synth_LOADV ( Int sz, Int a_reg, Int tv_reg )
-{
-   Int i, j, helper_offw;
-   Int pushed[VG_MAX_REALREGS+2];
-   Int n_pushed;
-   switch (sz) {
-      case 4: helper_offw = VGOFF_(helperc_LOADV4); break;
-      case 2: helper_offw = VGOFF_(helperc_LOADV2); break;
-      case 1: helper_offw = VGOFF_(helperc_LOADV1); break;
-      default: VG_(panic)("synth_LOADV");
-   }
-   n_pushed = 0;
-   for (i = 0; i < VG_MAX_REALREGS; i++) {
-      j = VG_(rankToRealRegNo) ( i );
-      if (VG_CALLEE_SAVED(j)) continue;
-      if (j == tv_reg || j == a_reg) continue;
-      emit_pushv_reg ( 4, j );
-      pushed[n_pushed++] = j;
-   }
-   emit_pushv_reg ( 4, a_reg );
-   pushed[n_pushed++] = a_reg;
-   vg_assert(n_pushed <= VG_MAX_REALREGS+1);
-
-   synth_call_baseBlock_method ( False, helper_offw );
-   /* Result is in %eax; we need to get it to tv_reg. */
-   if (tv_reg != R_EAX)
-      emit_movv_reg_reg ( 4, R_EAX, tv_reg );
-
-   while (n_pushed > 0) {
-      n_pushed--;
-      if (pushed[n_pushed] == tv_reg) {
-         emit_add_lit_to_esp ( 4 );
-      } else {
-         emit_popv_reg ( 4, pushed[n_pushed] );
-      }
-   }
-}
-
-
-static void synth_STOREV ( Int sz,
-                           Int tv_tag, Int tv_val,
-                           Int a_reg )
-{
-   Int i, j, helper_offw;
-   vg_assert(tv_tag == RealReg || tv_tag == Literal);
-   switch (sz) {
-      case 4: helper_offw = VGOFF_(helperc_STOREV4); break;
-      case 2: helper_offw = VGOFF_(helperc_STOREV2); break;
-      case 1: helper_offw = VGOFF_(helperc_STOREV1); break;
-      default: VG_(panic)("synth_STOREV");
-   }
-   for (i = 0; i < VG_MAX_REALREGS; i++) {
-      j = VG_(rankToRealRegNo) ( i );
-      if (VG_CALLEE_SAVED(j)) continue;
-      if ((tv_tag == RealReg && j == tv_val) || j == a_reg) continue;
-      emit_pushv_reg ( 4, j );
-   }
-   if (tv_tag == RealReg) {
-      emit_pushv_reg ( 4, tv_val );
-   } else {
-     if (tv_val == VG_(extend_s_8to32)(tv_val))
-        emit_pushl_lit8 ( VG_(extend_s_8to32)(tv_val) );
-     else
-        emit_pushl_lit32(tv_val);
-   }
-   emit_pushv_reg ( 4, a_reg );
-   synth_call_baseBlock_method ( False, helper_offw );
-   emit_popv_reg ( 4, a_reg );
-   if (tv_tag == RealReg) {
-      emit_popv_reg ( 4, tv_val );
-   } else {
-      emit_add_lit_to_esp ( 4 );
-   }
-   for (i = VG_MAX_REALREGS-1; i >= 0; i--) {
-      j = VG_(rankToRealRegNo) ( i );
-      if (VG_CALLEE_SAVED(j)) continue;
-      if ((tv_tag == RealReg && j == tv_val) || j == a_reg) continue;
-      emit_popv_reg ( 4, j );
-   }
-}
-
 
 static void synth_WIDEN_signed ( Int sz_src, Int sz_dst, Int reg )
 {
    if (sz_src == 1 && sz_dst == 4) {
-      emit_shiftopv_lit_reg ( 4, SHL, 24, reg );
-      emit_shiftopv_lit_reg ( 4, SAR, 24, reg );
+      VG_(emit_shiftopv_lit_reg) ( 4, SHL, 24, reg );
+      VG_(emit_shiftopv_lit_reg) ( 4, SAR, 24, reg );
    }
    else if (sz_src == 2 && sz_dst == 4) {
-      emit_shiftopv_lit_reg ( 4, SHL, 16, reg );
-      emit_shiftopv_lit_reg ( 4, SAR, 16, reg );
+      VG_(emit_shiftopv_lit_reg) ( 4, SHL, 16, reg );
+      VG_(emit_shiftopv_lit_reg) ( 4, SAR, 16, reg );
    }
    else if (sz_src == 1 && sz_dst == 2) {
-      emit_shiftopv_lit_reg ( 2, SHL, 8, reg );
-      emit_shiftopv_lit_reg ( 2, SAR, 8, reg );
+      VG_(emit_shiftopv_lit_reg) ( 2, SHL, 8, reg );
+      VG_(emit_shiftopv_lit_reg) ( 2, SAR, 8, reg );
    }
    else
       VG_(panic)("synth_WIDEN");
 }
 
 
-static void synth_SETV ( Int sz, Int reg )
+static void synth_handle_esp_assignment ( Int i, Int reg,
+                                          RRegSet regs_live_before,
+                                          RRegSet regs_live_after )
 {
-   UInt val;
-   switch (sz) {
-      case 4: val = 0x00000000; break;
-      case 2: val = 0xFFFF0000; break;
-      case 1: val = 0xFFFFFF00; break;
-      case 0: val = 0xFFFFFFFE; break;
-      default: VG_(panic)("synth_SETV");
-   }
-   emit_movv_lit_reg ( 4, val, reg );
+   UInt argv[] = { reg };
+   Tag  tagv[] = { RealReg };
+
+   VG_(synth_ccall) ( (Addr) VG_(handle_esp_assignment), 1, 1, argv, tagv, 
+                      INVALID_REALREG, regs_live_before, regs_live_after);
 }
 
 
-static void synth_TESTV ( Int sz, Int tag, Int val )
-{
-   vg_assert(tag == ArchReg || tag == RealReg);
-   if (tag == ArchReg) {
-      switch (sz) {
-         case 4: 
-            emit_testv_lit_offregmem ( 
-               4, 0xFFFFFFFF, shadowOffset(val), R_EBP );
-            break;
-         case 2: 
-            emit_testv_lit_offregmem ( 
-               4, 0x0000FFFF, shadowOffset(val), R_EBP );
-            break;
-         case 1:
-            if (val < 4) {
-               emit_testv_lit_offregmem ( 
-                  4, 0x000000FF, shadowOffset(val), R_EBP );
-            } else {
-               emit_testv_lit_offregmem ( 
-                  4, 0x0000FF00, shadowOffset(val-4), R_EBP );
-            }
-            break;
-         case 0: 
-            /* should never happen */
-         default: 
-            VG_(panic)("synth_TESTV(ArchReg)");
-      }
-   } else {
-      switch (sz) {
-         case 4:
-            /* Works, but holds the entire 32-bit literal, hence
-               generating a 6-byte insn.  We want to know if any bits
-               in the reg are set, but since this is for the full reg,
-               we might as well compare it against zero, which can be
-               done with a shorter insn. */
-            /* synth_minimal_test_lit_reg ( 0xFFFFFFFF, val ); */
-            emit_cmpl_zero_reg ( val );
-            break;
-         case 2:
-            synth_minimal_test_lit_reg ( 0x0000FFFF, val );
-            break;
-         case 1:
-            synth_minimal_test_lit_reg ( 0x000000FF, val );
-            break;
-         case 0:
-            synth_minimal_test_lit_reg ( 0x00000001, val );
-            break;
-         default: 
-            VG_(panic)("synth_TESTV(RealReg)");
-      }
-   }
-   emit_jcondshort_delta ( CondZ, 3 );
-   synth_call_baseBlock_method (
-      True, /* needed to guarantee that this insn is indeed 3 bytes long */
-      (sz==4 ? VGOFF_(helper_value_check4_fail)
-             : (sz==2 ? VGOFF_(helper_value_check2_fail)
-                      : sz == 1 ? VGOFF_(helper_value_check1_fail)
-                                : VGOFF_(helper_value_check0_fail)))
-   );
-}
-
-
-static void synth_GETV ( Int sz, Int arch, Int reg )
-{
-   /* VG_(printf)("synth_GETV %d of Arch %s\n", sz, nameIReg(sz, arch)); */
-   switch (sz) {
-      case 4: 
-         emit_movv_offregmem_reg ( 4, shadowOffset(arch), R_EBP, reg );
-         break;
-      case 2: 
-         emit_movzwl_offregmem_reg ( shadowOffset(arch), R_EBP, reg );
-         emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFF0000, reg );
-         break;
-      case 1: 
-         if (arch < 4) {
-            emit_movzbl_offregmem_reg ( shadowOffset(arch), R_EBP, reg );
-         } else {
-            emit_movzbl_offregmem_reg ( shadowOffset(arch-4)+1, R_EBP, reg );
-         }
-         emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFFFF00, reg );
-         break;
-      default: 
-         VG_(panic)("synth_GETV");
-   }
-}
-
-
-static void synth_PUTV ( Int sz, Int srcTag, UInt lit_or_reg, Int arch )
-{
-   if (srcTag == Literal) {
-     /* PUTV with a Literal is only ever used to set the corresponding
-        ArchReg to `all valid'.  Should really be a kind of SETV. */
-      UInt lit = lit_or_reg;
-      switch (sz) {
-         case 4:
-            vg_assert(lit == 0x00000000);
-            emit_movv_lit_offregmem ( 4, 0x00000000, 
-                                      shadowOffset(arch), R_EBP );
-            break;
-         case 2:
-            vg_assert(lit == 0xFFFF0000);
-            emit_movv_lit_offregmem ( 2, 0x0000, 
-                                      shadowOffset(arch), R_EBP );
-            break;
-         case 1:
-            vg_assert(lit == 0xFFFFFF00);
-            if (arch < 4) {
-               emit_movb_lit_offregmem ( 0x00, 
-                                         shadowOffset(arch), R_EBP );
-            } else {
-               emit_movb_lit_offregmem ( 0x00, 
-                                         shadowOffset(arch-4)+1, R_EBP );
-            }
-            break;
-         default: 
-            VG_(panic)("synth_PUTV(lit)");
-      }
-
-   } else {
-
-      UInt reg;
-      vg_assert(srcTag == RealReg);
-
-      if (sz == 1 && lit_or_reg >= 4) {
-         emit_swapl_reg_EAX ( lit_or_reg );
-         reg = R_EAX;
-      } else {
-         reg = lit_or_reg;
-      }
-
-      if (sz == 1) vg_assert(reg < 4);
-
-      switch (sz) {
-         case 4:
-            emit_movv_reg_offregmem ( 4, reg,
-                                      shadowOffset(arch), R_EBP );
-            break;
-         case 2:
-            emit_movv_reg_offregmem ( 2, reg,
-                                      shadowOffset(arch), R_EBP );
-            break;
-         case 1:
-            if (arch < 4) {
-               emit_movb_reg_offregmem ( reg,
-                                         shadowOffset(arch), R_EBP );
-	    } else {
-               emit_movb_reg_offregmem ( reg,
-                                         shadowOffset(arch-4)+1, R_EBP );
-            }
-            break;
-         default: 
-            VG_(panic)("synth_PUTV(reg)");
-      }
-
-      if (sz == 1 && lit_or_reg >= 4) {
-         emit_swapl_reg_EAX ( lit_or_reg );
-      }
-   }
-}
-
-
-static void synth_GETVF ( Int reg )
-{
-   emit_movv_offregmem_reg ( 4, shadowFlagsOffset(), R_EBP, reg );
-   /* paranoia only; should be unnecessary ... */
-   /* emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFFFFFE, reg ); */
-}
-
-
-static void synth_PUTVF ( UInt reg )
-{
-   emit_movv_reg_offregmem ( 4, reg, shadowFlagsOffset(), R_EBP );
-}
-
-
-static void synth_handle_esp_assignment ( Int reg )
-{
-   emit_pushal();
-   emit_pushv_reg ( 4, reg );
-   synth_call_baseBlock_method ( False, VGOFF_(handle_esp_assignment) );
-   emit_add_lit_to_esp ( 4 );
-   emit_popal();
-}
-
-
-static void synth_fpu_mem_check_actions ( Bool isWrite, 
-                                          Int size, Int a_reg )
-{
-   Int helper_offw
-     = isWrite ? VGOFF_(fpu_write_check)
-               : VGOFF_(fpu_read_check);
-   emit_pushal();
-   emit_pushl_lit8 ( size );
-   emit_pushv_reg ( 4, a_reg );
-   synth_call_baseBlock_method ( False, helper_offw );
-   emit_add_lit_to_esp ( 8 );   
-   emit_popal();
-}
-
-
-#if 0
-/* FixMe.  Useful for debugging. */
-void VG_(oink) ( Int n )
-{
-   VG_(printf)("OiNk(%d): ", n );
-   VG_(show_reg_tags)( &VG_(m_shadow) );
-}
-
-static void synth_OINK ( Int n )
-{
-   emit_pushal();
-   emit_movv_lit_reg ( 4, n, R_EBP );
-   emit_pushl_reg ( R_EBP );
-   emit_movv_lit_reg ( 4, (Addr)&VG_(oink), R_EBP );
-   emit_call_reg ( R_EBP );
-   emit_add_lit_to_esp ( 4 );
-   emit_popal();
-}
-#endif
-
-static void synth_TAG1_op ( VgTagOp op, Int reg )
-{
-   switch (op) {
-
-      /* Scheme is
-            neg<sz> %reg          -- CF = %reg==0 ? 0 : 1
-            sbbl %reg, %reg       -- %reg = -CF
-            or 0xFFFFFFFE, %reg   -- invalidate all bits except lowest
-      */
-      case VgT_PCast40:
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg);
-         break;
-      case VgT_PCast20:
-         emit_unaryopv_reg(2, NEG, reg);
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg);
-         break;
-      case VgT_PCast10:
-         if (reg >= 4) {
-            emit_swapl_reg_EAX(reg);
-            emit_unaryopb_reg(NEG, R_EAX);
-            emit_swapl_reg_EAX(reg);
-         } else {
-            emit_unaryopb_reg(NEG, reg);
-         }
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg);
-         break;
-
-      /* Scheme is
-            andl $1, %reg -- %reg is 0 or 1
-            negl %reg -- %reg is 0 or 0xFFFFFFFF
-            and possibly an OR to invalidate unused bits.
-      */
-      case VgT_PCast04:
-         emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         break;
-      case VgT_PCast02:
-         emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
-         break;
-      case VgT_PCast01:
-         emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, reg);
-         break;
-
-      /* Scheme is
-            shl $24, %reg -- make irrelevant bits disappear
-            negl %reg             -- CF = %reg==0 ? 0 : 1
-            sbbl %reg, %reg       -- %reg = -CF
-            and possibly an OR to invalidate unused bits.
-      */
-      case VgT_PCast14:
-         emit_shiftopv_lit_reg(4, SHL, 24, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         break;
-      case VgT_PCast12:
-         emit_shiftopv_lit_reg(4, SHL, 24, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
-         break;
-      case VgT_PCast11:
-         emit_shiftopv_lit_reg(4, SHL, 24, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, reg);
-         break;
-
-      /* We steal %ebp (a non-allocable reg) as a temporary:
-            pushl %ebp
-            movl %reg, %ebp
-            negl %ebp
-            orl %ebp, %reg
-            popl %ebp
-         This sequence turns out to be correct regardless of the 
-         operation width.
-      */
-      case VgT_Left4:
-      case VgT_Left2:
-      case VgT_Left1:
-         vg_assert(reg != R_EDI);
-         emit_movv_reg_reg(4, reg, R_EDI);
-         emit_unaryopv_reg(4, NEG, R_EDI);
-         emit_nonshiftopv_reg_reg(4, OR, R_EDI, reg);
-         break;
-
-      /* These are all fairly obvious; do the op and then, if
-         necessary, invalidate unused bits. */
-      case VgT_SWiden14:
-         emit_shiftopv_lit_reg(4, SHL, 24, reg);
-         emit_shiftopv_lit_reg(4, SAR, 24, reg);
-         break;
-      case VgT_SWiden24:
-         emit_shiftopv_lit_reg(4, SHL, 16, reg);
-         emit_shiftopv_lit_reg(4, SAR, 16, reg);
-         break;
-      case VgT_SWiden12:
-         emit_shiftopv_lit_reg(4, SHL, 24, reg);
-         emit_shiftopv_lit_reg(4, SAR, 24, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
-         break;
-      case VgT_ZWiden14:
-         emit_nonshiftopv_lit_reg(4, AND, 0x000000FF, reg);
-         break;
-      case VgT_ZWiden24:
-         emit_nonshiftopv_lit_reg(4, AND, 0x0000FFFF, reg);
-         break;
-      case VgT_ZWiden12:
-         emit_nonshiftopv_lit_reg(4, AND, 0x000000FF, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
-         break;
-
-      default:
-         VG_(panic)("synth_TAG1_op");
-   }
-}
-
-
-static void synth_TAG2_op ( VgTagOp op, Int regs, Int regd )
-{
-   switch (op) {
-
-      /* UifU is implemented by OR, since 1 means Undefined. */
-      case VgT_UifU4:
-      case VgT_UifU2:
-      case VgT_UifU1:
-      case VgT_UifU0:
-         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
-         break;
-
-      /* DifD is implemented by AND, since 0 means Defined. */
-      case VgT_DifD4:
-      case VgT_DifD2:
-      case VgT_DifD1:
-         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
-         break;
-
-      /* ImproveAND(value, tags) = value OR tags.
-	 Defined (0) value 0s give defined (0); all other -> undefined (1).
-         value is in regs; tags is in regd. 
-         Be paranoid and invalidate unused bits; I don't know whether 
-         or not this is actually necessary. */
-      case VgT_ImproveAND4_TQ:
-         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
-         break;
-      case VgT_ImproveAND2_TQ:
-         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, regd);
-         break;
-      case VgT_ImproveAND1_TQ:
-         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, regd);
-         break;
-
-      /* ImproveOR(value, tags) = (not value) OR tags.
-	 Defined (0) value 1s give defined (0); all other -> undefined (1).
-         value is in regs; tags is in regd. 
-         To avoid trashing value, this is implemented (re de Morgan) as
-               not (value AND (not tags))
-         Be paranoid and invalidate unused bits; I don't know whether 
-         or not this is actually necessary. */
-      case VgT_ImproveOR4_TQ:
-         emit_unaryopv_reg(4, NOT, regd);
-         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
-         emit_unaryopv_reg(4, NOT, regd);
-         break;
-      case VgT_ImproveOR2_TQ:
-         emit_unaryopv_reg(4, NOT, regd);
-         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
-         emit_unaryopv_reg(4, NOT, regd);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, regd);
-         break;
-      case VgT_ImproveOR1_TQ:
-         emit_unaryopv_reg(4, NOT, regd);
-         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
-         emit_unaryopv_reg(4, NOT, regd);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, regd);
-         break;
-
-      default:
-         VG_(panic)("synth_TAG2_op");
-   }
-}
-
 /*----------------------------------------------------*/
 /*--- Generate code for a single UInstr.           ---*/
 /*----------------------------------------------------*/
@@ -2190,10 +1947,13 @@
    return (u->flags_w != FlagsEmpty); 
 }
 
-static void emitUInstr ( Int i, UInstr* u )
+static void emitUInstr ( UCodeBlock* cb, Int i, RRegSet regs_live_before )
 {
+   Int     old_emitted_code_used;
+   UInstr* u = &cb->instrs[i];
+
    if (dis)
-      VG_(ppUInstr)(i, u);
+      VG_(ppUInstrWithRegs)(i, u);
 
 #  if 0
    if (0&& VG_(translations_done) >= 600) {
@@ -2204,13 +1964,79 @@
    }
 #  endif
 
+   old_emitted_code_used = emitted_code_used;
+   
    switch (u->opcode) {
-
       case NOP: case CALLM_S: case CALLM_E: break;
 
       case INCEIP: {
-         vg_assert(u->tag1 == Lit16);
-         emit_addlit8_offregmem ( u->val1, R_EBP, 4 * VGOFF_(m_eip) );
+        /* Note: Redundant INCEIP merging.  A potentially useful
+           performance enhancementa, but currently disabled.  Reason
+           is that it needs a surefire way to know if a UInstr might
+           give rise to a stack snapshot being taken.  The logic below
+           is correct (hopefully ...) for the core UInstrs, but is
+           incorrect if a skin has its own UInstrs, since the logic
+           currently assumes that none of them can cause a stack
+           trace, and that's just wrong.  Note this isn't
+           mission-critical -- the system still functions -- but will
+           cause incorrect source locations in some situations,
+           specifically for the memcheck skin.  This is known to
+           confuse programmers, understandable.  */
+#        if 0
+         Bool    can_skip;
+         Int     j;
+
+         /* Scan forwards to see if this INCEIP dominates (in the
+            technical sense) a later one, AND there are no CCALLs in
+            between.  If so, skip this one and instead add its count
+            with the later one. */
+         can_skip = True;
+	 j = i+1;
+         while (True) {
+            if (cb->instrs[j].opcode == CCALL 
+                || cb->instrs[j].opcode == CALLM) {
+               /* CCALL -- we can't skip this INCEIP. */
+               can_skip = False; 
+               break;
+            }
+            if (cb->instrs[j].opcode == INCEIP) {
+               /* Another INCEIP.  Check that the sum will fit. */
+               if (cb->instrs[i].val1 + cb->instrs[j].val1 > 127)
+                  can_skip = False;
+               break;
+            }
+            if (cb->instrs[j].opcode == JMP || cb->instrs[j].opcode == JIFZ) {
+               /* Execution is not guaranteed to get beyond this
+                  point.  Give up. */
+               can_skip = False; 
+               break;
+            }
+            j++;
+            /* Assertion should hold because all blocks should end in an
+               unconditional JMP, so the above test should get us out of
+               the loop at the end of a block. */
+            vg_assert(j < cb->used);
+         }
+         if (can_skip) {
+            /* yay!  Accumulate the delta into the next INCEIP. */
+            // VG_(printf)("skip INCEIP %d\n", cb->instrs[i].val1);
+            vg_assert(j > i);
+            vg_assert(j < cb->used);
+            vg_assert(cb->instrs[j].opcode == INCEIP);
+            vg_assert(cb->instrs[i].opcode == INCEIP);
+            vg_assert(cb->instrs[j].tag1 == Lit16);
+            vg_assert(cb->instrs[i].tag1 == Lit16);
+            cb->instrs[j].val1 += cb->instrs[i].val1;
+            /* do nothing now */
+         } else 
+#        endif
+
+         {
+            /* no, we really have to do this, alas */
+            // VG_(printf)("  do INCEIP %d\n", cb->instrs[i].val1);
+            vg_assert(u->tag1 == Lit16);
+            emit_addlit8_offregmem ( u->val1, R_EBP, 4 * VGOFF_(m_eip) );
+         }
          break;
       }
 
@@ -2240,41 +2066,10 @@
          break;
       }
 
-      case SETV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg);
-         synth_SETV ( u->size, u->val1 );
-         break;
-      }
-
-      case STOREV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
-         vg_assert(u->tag2 == RealReg);
-         synth_STOREV ( u->size, u->tag1, 
-                                 u->tag1==Literal ? u->lit32 : u->val1, 
-                                 u->val2 );
-         break;
-      }
-
       case STORE: {
          vg_assert(u->tag1 == RealReg);
          vg_assert(u->tag2 == RealReg);
          synth_mov_reg_memreg ( u->size, u->val1, u->val2 );
-	 /* No longer possible, but retained for illustrative purposes.
-         if (u->smc_check) 
-            synth_orig_code_write_check ( u->size, u->val2 );
-	 */
-         break;
-      }
-
-      case LOADV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg);
-         vg_assert(u->tag2 == RealReg);
-         if (0 && VG_(clo_instrument))
-            emit_AMD_prefetch_reg ( u->val1 );
-         synth_LOADV ( u->size, u->val1, u->val2 );
          break;
       }
 
@@ -2285,47 +2080,6 @@
          break;
       }
 
-      case TESTV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg || u->tag1 == ArchReg);
-         synth_TESTV(u->size, u->tag1, u->val1);
-         break;
-      }
-
-      case GETV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == ArchReg);
-         vg_assert(u->tag2 == RealReg);
-         synth_GETV(u->size, u->val1, u->val2);
-         break;
-      }
-
-      case GETVF: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg);
-         vg_assert(u->size == 0);
-         synth_GETVF(u->val1);
-         break;
-      }
-
-      case PUTV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
-         vg_assert(u->tag2 == ArchReg);
-         synth_PUTV(u->size, u->tag1, 
-                             u->tag1==Literal ? u->lit32 : u->val1, 
-                             u->val2 );
-         break;
-      }
-
-      case PUTVF: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg);
-         vg_assert(u->size == 0);
-         synth_PUTVF(u->val1);
-         break;
-      }
-
       case GET: {
          vg_assert(u->tag1 == ArchReg || u->tag1 == SpillNo);
          vg_assert(u->tag2 == RealReg);
@@ -2344,15 +2098,23 @@
          if (u->tag2 == ArchReg 
              && u->val2 == R_ESP
              && u->size == 4
-             && VG_(clo_instrument)) {
-            synth_handle_esp_assignment ( u->val1 );
+             && (VG_(track_events).new_mem_stack         || 
+                 VG_(track_events).new_mem_stack_aligned ||
+                 VG_(track_events).die_mem_stack         ||
+                 VG_(track_events).die_mem_stack_aligned ||
+                 VG_(track_events).post_mem_write))
+         {
+            synth_handle_esp_assignment ( i, u->val1, regs_live_before,
+                                          u->regs_live_after );
 	 }
-         synth_mov_reg_offregmem ( 
-            u->size, 
-            u->val1, 
-            spillOrArchOffset( u->size, u->tag2, u->val2 ),
-            R_EBP
-         );
+         else {
+            synth_mov_reg_offregmem ( 
+               u->size, 
+               u->val1, 
+               spillOrArchOffset( u->size, u->tag2, u->val2 ),
+               R_EBP
+            );
+         }
          break;
       }
 
@@ -2436,7 +2198,6 @@
       case RCR:
       case RCL:
          vg_assert(u->tag2 == RealReg);
-         vg_assert(! readFlagUse ( u ));
          switch (u->tag1) {
             case Literal: synth_shiftop_lit_reg (
                              readFlagUse(u), writeFlagUse(u),
@@ -2515,55 +2276,16 @@
          synth_jmp_ifzero_reg_lit ( u->val1, u->lit32 );
          break;
 
-      case TAG1:
-         synth_TAG1_op ( u->val3, u->val1 );
-         break;
-
-      case TAG2:
-         if (u->val3 != VgT_DebugFn) {
-            synth_TAG2_op ( u->val3, u->val1, u->val2 );
-         } else {
-            /* Assume a call to VgT_DebugFn passing both args
-               and placing the result back in the second. */
-            Int j, k;
-            /* u->val2 is the reg into which the result is written.  So
-               don't save/restore it.  And it can be used at a temp for
-               the call target, too.  Since %eax is used for the return
-               value from the C procedure, it is preserved only by
-               virtue of not being mentioned as a VG_CALLEE_SAVED reg. */
-            for (k = 0; k < VG_MAX_REALREGS; k++) {
-               j = VG_(rankToRealRegNo) ( k );
-               if (VG_CALLEE_SAVED(j)) continue;
-               if (j == u->val2) continue;
-               emit_pushv_reg ( 4, j );
-            }
-            emit_pushv_reg(4, u->val2);
-            emit_pushv_reg(4, u->val1);
-            emit_movv_lit_reg ( 4, (UInt)(&VG_(DebugFn)), u->val2 );
-            emit_call_reg ( u->val2 );
-            if (u->val2 != R_EAX)
-               emit_movv_reg_reg ( 4, R_EAX, u->val2 );
-            /* nuke args */
-            emit_add_lit_to_esp(8);
-            for (k = VG_MAX_REALREGS-1; k >= 0; k--) {
-               j = VG_(rankToRealRegNo) ( k );
-               if (VG_CALLEE_SAVED(j)) continue;
-               if (j == u->val2) continue;
-               emit_popv_reg ( 4, j );
-            }
-         }
-         break;
-
       case PUSH:
          vg_assert(u->tag1 == RealReg);
          vg_assert(u->tag2 == NoValue);
-         emit_pushv_reg ( 4, u->val1 );
+         VG_(emit_pushv_reg) ( 4, u->val1 );
          break;
 
       case POP:
          vg_assert(u->tag1 == RealReg);
          vg_assert(u->tag2 == NoValue);
-         emit_popv_reg ( 4, u->val1 );
+         VG_(emit_popv_reg) ( 4, u->val1 );
          break;
 
       case CALLM:
@@ -2572,35 +2294,34 @@
          vg_assert(u->size == 0);
          if (readFlagUse ( u )) 
             emit_get_eflags();
-         synth_call_baseBlock_method ( False, u->val1 );
+         VG_(synth_call) ( False, u->val1 );
          if (writeFlagUse ( u )) 
             emit_put_eflags();
          break;
 
-      case CCALL_1_0:
-         vg_assert(u->tag1 == RealReg);
-         vg_assert(u->tag2 == NoValue);
+      case CCALL: {
+         /* Lazy: copy all three vals;  synth_ccall ignores any unnecessary
+            ones. */
+         UInt argv[]  = { u->val1, u->val2, u->val3 };
+         UInt tagv[]  = { RealReg, RealReg, RealReg };
+         UInt ret_reg = ( u->has_ret_val ? u->val3 : INVALID_REALREG );
+
+         if (u->argc >= 1)                   vg_assert(u->tag1 == RealReg);
+         else                                vg_assert(u->tag1 == NoValue);
+         if (u->argc >= 2)                   vg_assert(u->tag2 == RealReg);
+         else                                vg_assert(u->tag2 == NoValue);
+         if (u->argc == 3 || u->has_ret_val) vg_assert(u->tag3 == RealReg);
+         else                                vg_assert(u->tag3 == NoValue);
          vg_assert(u->size == 0);
 
-         synth_ccall_saveRegs();
-         synth_ccall_pushOneArg ( u->val1 );
-         synth_ccall_call_clearStack_restoreRegs ( u->lit32, 4 );
+         VG_(synth_ccall) ( u->lit32, u->argc, u->regparms_n, argv, tagv,
+                            ret_reg, regs_live_before, u->regs_live_after );
          break;
-
-      case CCALL_2_0:
-         vg_assert(u->tag1 == RealReg);
-         vg_assert(u->tag2 == RealReg);
-         vg_assert(u->size == 0);
-
-         synth_ccall_saveRegs();
-         synth_ccall_pushTwoArgs ( u->val1, u->val2 );
-         synth_ccall_call_clearStack_restoreRegs ( u->lit32, 8 );
-         break;
-
+      }
       case CLEAR:
          vg_assert(u->tag1 == Lit16);
          vg_assert(u->tag2 == NoValue);
-         emit_add_lit_to_esp ( u->val1 );
+         VG_(emit_add_lit_to_esp) ( u->val1 );
          break;
 
       case CC2VAL:
@@ -2610,23 +2331,13 @@
          synth_setb_reg ( u->val1, u->cond );
          break;
 
-      /* We assume that writes to memory done by FPU_Ws are not going
-         to be used to create new code, so there's no orig-code-write
-         checks done by default. */
       case FPU_R: 
       case FPU_W:         
          vg_assert(u->tag1 == Lit16);
          vg_assert(u->tag2 == RealReg);
-         if (VG_(clo_instrument))
-            synth_fpu_mem_check_actions ( 
-               u->opcode==FPU_W, u->size, u->val2 );
          synth_fpu_regmem ( (u->val1 >> 8) & 0xFF,
                             u->val1 & 0xFF,
                             u->val2 );
-         /* No longer possible, but retained for illustrative purposes.
-         if (u->opcode == FPU_W && u->smc_check) 
-            synth_orig_code_write_check ( u->size, u->val2 );
-         */
          break;
 
       case FPU:
@@ -2641,11 +2352,22 @@
          break;
 
       default: 
-         VG_(printf)("emitUInstr: unhandled insn:\n");
-         VG_(ppUInstr)(0,u);
-         VG_(panic)("emitUInstr: unimplemented opcode");
+         if (VG_(needs).extended_UCode)
+            SK_(emitExtUInstr)(u, regs_live_before);
+         else {
+            VG_(printf)("\nError:\n"
+                        "  unhandled opcode: %u.  Perhaps "
+                        " VG_(needs).extended_UCode should be set?\n",
+                        u->opcode);
+            VG_(ppUInstr)(0,u);
+            VG_(panic)("emitUInstr: unimplemented opcode");
+         }
    }
 
+   /* Update UInstr histogram */
+   vg_assert(u->opcode < 100);
+   histogram[u->opcode].counts++;
+   histogram[u->opcode].size += (emitted_code_used - old_emitted_code_used);
 }
 
 
@@ -2654,67 +2376,39 @@
 UChar* VG_(emit_code) ( UCodeBlock* cb, Int* nbytes )
 {
    Int i;
+   UChar regs_live_before = 0;   /* No regs live at BB start */
+   
    emitted_code_used = 0;
    emitted_code_size = 500; /* reasonable initial size */
-   emitted_code = VG_(jitmalloc)(emitted_code_size);
+   emitted_code = VG_(arena_malloc)(VG_AR_JITTER, emitted_code_size);
 
-   if (dis) VG_(printf)("Generated code:\n");
+   if (dis) VG_(printf)("Generated x86 code:\n");
 
    for (i = 0; i < cb->used; i++) {
+      UInstr* u = &cb->instrs[i];
       if (cb->instrs[i].opcode != NOP) {
-         UInstr* u = &cb->instrs[i];
-#        if 1
+
          /* Check on the sanity of this insn. */
-         Bool sane = VG_(saneUInstr)( False, u );
+         Bool sane = VG_(saneUInstr)( False, False, u );
          if (!sane) {
             VG_(printf)("\ninsane instruction\n");
-            VG_(ppUInstr)( i, u );
+            VG_(upUInstr)( i, u );
 	 }
          vg_assert(sane);
-#        endif
-#        if 0
-         /* Pass args to TAG1/TAG2 to vg_DebugFn for sanity checking.
-            Requires a suitable definition of vg_DebugFn. */
-	 if (u->opcode == TAG1) {
-            UInstr t1;
-            vg_assert(u->tag1 == RealReg);
-            VG_(emptyUInstr)( &t1 );
-            t1.opcode = TAG2;
-            t1.tag1 = t1.tag2 = RealReg;
-            t1.val1 = t1.val2 = u->val1;
-            t1.tag3 = Lit16;
-            t1.val3 = VgT_DebugFn;
-            emitUInstr( i, &t1 );
-	 }
-	 if (u->opcode == TAG2) {
-            UInstr t1;
-            vg_assert(u->tag1 == RealReg);
-            vg_assert(u->tag2 == RealReg);
-            VG_(emptyUInstr)( &t1 );
-            t1.opcode = TAG2;
-            t1.tag1 = t1.tag2 = RealReg;
-            t1.val1 = t1.val2 = u->val1;
-            t1.tag3 = Lit16;
-            t1.val3 = VgT_DebugFn;
-            if (u->val3 == VgT_UifU1 || u->val3 == VgT_UifU2 
-                || u->val3 == VgT_UifU4 || u->val3 == VgT_DifD1 
-                || u->val3 == VgT_DifD2 || u->val3 == VgT_DifD4)
-               emitUInstr( i, &t1 );
-            t1.val1 = t1.val2 = u->val2;
-            emitUInstr( i, &t1 );
-	 }
-#        endif
-         emitUInstr( i, u );
+         emitUInstr( cb, i, regs_live_before );
       }
+      regs_live_before = u->regs_live_after;
    }
+   if (dis) VG_(printf)("\n");
 
    /* Returns a pointer to the emitted code.  This will have to be
-      copied by the caller into the translation cache, and then freed
-      using VG_(jitfree). */
+      copied by the caller into the translation cache, and then freed */
    *nbytes = emitted_code_used;
    return emitted_code;
 }
 
+#undef dis
+
 /*--------------------------------------------------------------------*/
 /*--- end                                          vg_from_ucode.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_helpers.S b/coregrind/vg_helpers.S
index 8262737..2315da4 100644
--- a/coregrind/vg_helpers.S
+++ b/coregrind/vg_helpers.S
@@ -26,7 +26,7 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.
 
-  The GNU General Public License is contained in the file LICENSE.
+  The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_constants.h"
@@ -86,36 +86,6 @@
    and the incoming parameters can be modified, to return results.
 */
 
-
-.global VG_(helper_value_check0_fail)
-VG_(helper_value_check0_fail):
-	pushal
-	call	VG_(helperc_value_check0_fail)
-	popal
-	ret
-
-.global VG_(helper_value_check1_fail)
-VG_(helper_value_check1_fail):
-	pushal
-	call	VG_(helperc_value_check1_fail)
-	popal
-	ret
-
-.global VG_(helper_value_check2_fail)
-VG_(helper_value_check2_fail):
-	pushal
-	call	VG_(helperc_value_check2_fail)
-	popal
-	ret
-
-.global VG_(helper_value_check4_fail)
-VG_(helper_value_check4_fail):
-	pushal
-	call	VG_(helperc_value_check4_fail)
-	popal
-	ret
-
-
 /* Fetch the time-stamp-ctr reg.
    On entry:
 	dummy, replaced by %EAX value
diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h
index 74e1016..edf7aef 100644
--- a/coregrind/vg_include.h
+++ b/coregrind/vg_include.h
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- A header file for all parts of Valgrind.                     ---*/
+/*--- A header file for all private parts of Valgrind's core.      ---*/
 /*--- Include no other!                                            ---*/
 /*---                                                 vg_include.h ---*/
 /*--------------------------------------------------------------------*/
@@ -27,17 +27,12 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #ifndef __VG_INCLUDE_H
 #define __VG_INCLUDE_H
 
-
-#include <stdarg.h>       /* ANSI varargs stuff  */
-#include <setjmp.h>       /* for jmp_buf         */
-
-
 /* ---------------------------------------------------------------------
    Where to send bug reports to.
    ------------------------------------------------------------------ */
@@ -52,21 +47,9 @@
 
 #include "vg_constants.h"
 
-
-/* Set to 1 to enable time profiling.  Since this uses SIGPROF, we
-   don't want this permanently enabled -- only for profiling
-   builds. */
-#if 0
-#  define VG_PROFILE
-#endif
-
-
-/* Total number of integer registers available for allocation.  That's
-   all of them except %esp, %edi and %ebp.  %edi is a general spare
-   temporary.  %ebp permanently points at VG_(baseBlock).  Note that
-   it's important that this tie in with what rankToRealRegNo() says.
-   DO NOT CHANGE THIS VALUE FROM 5. !  */
-#define VG_MAX_REALREGS 5
+/* All stuff visible to core and skins goes in vg_skin.h.  Things visible
+ * to core but private to skins go here. */
+#include "vg_skin.h"
 
 /* Total number of spill slots available for allocation, if a TempReg
    doesn't make it into a RealReg.  Just bomb the entire system if
@@ -111,10 +94,6 @@
    errors at all.  Counterpart to M_VG_COLLECT_NO_ERRORS_AFTER_SHOWN. */
 #define M_VG_COLLECT_NO_ERRORS_AFTER_FOUND 30000
 
-/* These many bytes below %ESP are considered addressible if we're
-   doing the --workaround-gcc296-bugs hack. */
-#define VG_GCC296_BUG_STACK_SLOP 1024
-
 /* The maximum number of calls we're prepared to save in a
    backtrace. */
 #define VG_DEEPEST_BACKTRACE 50
@@ -132,17 +111,6 @@
    give finer interleaving but much increased scheduling overheads. */
 #define VG_SCHEDULING_QUANTUM   50000
 
-/* The maximum number of pthreads that we support.  This is
-   deliberately not very high since our implementation of some of the
-   scheduler algorithms is surely O(N) in the number of threads, since
-   that's simple, at least.  And (in practice) we hope that most
-   programs do not need many threads. */
-#define VG_N_THREADS 50
-
-/* Maximum number of pthread keys available.  Again, we start low until
-   the need for a higher number presents itself. */
-#define VG_N_THREAD_KEYS 50
-
 /* Number of file descriptors that can simultaneously be waited on for
    I/O to complete.  Perhaps this should be the same as VG_N_THREADS
    (surely a thread can't wait on more than one fd at once?.  Who
@@ -165,97 +133,43 @@
 /* Number of entries in each thread's fork-handler stack. */
 #define VG_N_FORKHANDLERSTACK 2
 
+/* Max number of callers for context in a suppression. */
+#define VG_N_SUPP_CALLERS  4
+   
 
 /* ---------------------------------------------------------------------
    Basic types
    ------------------------------------------------------------------ */
 
-typedef unsigned char          UChar;
-typedef unsigned short         UShort;
-typedef unsigned int           UInt;
-typedef unsigned long long int ULong;
-
-typedef signed char          Char;
-typedef signed short         Short;
-typedef signed int           Int;
-typedef signed long long int Long;
-
-typedef unsigned int Addr;
-
-typedef unsigned char Bool;
-#define False ((Bool)0)
-#define True ((Bool)1)
-
-#define mycat_wrk(aaa,bbb) aaa##bbb
-#define mycat(aaa,bbb) mycat_wrk(aaa,bbb)
-
 /* Just pray that gcc's constant folding works properly ... */
 #define BITS(bit7,bit6,bit5,bit4,bit3,bit2,bit1,bit0)               \
    ( ((bit7) << 7) | ((bit6) << 6) | ((bit5) << 5) | ((bit4) << 4)  \
      | ((bit3) << 3) | ((bit2) << 2) | ((bit1) << 1) | (bit0))
 
-/* For cache simulation */
-typedef struct { 
-    int size;       /* bytes */
-    int assoc;
-    int line_size;  /* bytes */
-} cache_t;
-
-#define UNDEFINED_CACHE     ((cache_t) { -1, -1, -1 })
-
-/* ---------------------------------------------------------------------
-   Now the basic types are set up, we can haul in the kernel-interface
-   definitions.
-   ------------------------------------------------------------------ */
-
-#include "./vg_kerneliface.h"
-
-
 /* ---------------------------------------------------------------------
    Command-line-settable options
    ------------------------------------------------------------------ */
 
-#define VG_CLO_SMC_NONE 0
-#define VG_CLO_SMC_SOME 1
-#define VG_CLO_SMC_ALL  2
-
 #define VG_CLO_MAX_SFILES 10
 
 /* Should we stop collecting errors if too many appear?  default: YES */
 extern Bool  VG_(clo_error_limit);
-/* Shall we V-check addrs (they are always A checked too): default: YES */
-extern Bool  VG_(clo_check_addrVs);
 /* Enquire about whether to attach to GDB at errors?   default: NO */
 extern Bool  VG_(clo_GDB_attach);
 /* Sanity-check level: 0 = none, 1 (default), > 1 = expensive. */
 extern Int   VG_(sanity_level);
-/* Verbosity level: 0 = silent, 1 (default), > 1 = more verbose. */
-extern Int   VG_(clo_verbosity);
 /* Automatically attempt to demangle C++ names?  default: YES */
 extern Bool  VG_(clo_demangle);
-/* Do leak check at exit?  default: NO */
-extern Bool  VG_(clo_leak_check);
-/* In leak check, show reachable-but-not-freed blocks?  default: NO */
-extern Bool  VG_(clo_show_reachable);
-/* How closely should we compare ExeContexts in leak records? default: 2 */
-extern Int   VG_(clo_leak_resolution);
 /* Round malloc sizes upwards to integral number of words? default:
    NO */
 extern Bool  VG_(clo_sloppy_malloc);
 /* Minimum alignment in functions that don't specify alignment explicitly.
    default: 0, i.e. use default of the machine (== 4) */
 extern Int   VG_(clo_alignment);
-/* Allow loads from partially-valid addresses?  default: YES */
-extern Bool  VG_(clo_partial_loads_ok);
 /* Simulate child processes? default: NO */
 extern Bool  VG_(clo_trace_children);
 /* The file id on which we send all messages.  default: 2 (stderr). */
 extern Int   VG_(clo_logfile_fd);
-/* Max volume of the freed blocks queue. */
-extern Int   VG_(clo_freelist_vol);
-/* Assume accesses immediately below %esp are due to gcc-2.96 bugs.
-   default: NO */
-extern Bool  VG_(clo_workaround_gcc296_bugs);
 
 /* The number of suppression files specified. */
 extern Int   VG_(clo_n_suppressions);
@@ -266,20 +180,8 @@
 extern Bool  VG_(clo_single_step);
 /* Code improvement?  default: YES */
 extern Bool  VG_(clo_optimise);
-/* Memory-check instrumentation?  default: YES */
-extern Bool  VG_(clo_instrument);
-/* DEBUG: clean up instrumented code?  default: YES */
-extern Bool  VG_(clo_cleanup);
-/* Cache simulation instrumentation?  default: NO */
-extern Bool  VG_(clo_cachesim);
-/* I1 cache configuration.  default: undefined */
-extern cache_t VG_(clo_I1_cache);
-/* D1 cache configuration.  default: undefined */
-extern cache_t VG_(clo_D1_cache);
-/* L2 cache configuration.  default: undefined */
-extern cache_t VG_(clo_L2_cache);
-/* SMC write checks?  default: SOME (1,2,4 byte movs to mem) */
-extern Int   VG_(clo_smc_check);
+/* DEBUG: print generated code?  default: 00000 ( == NO ) */
+extern Bool  VG_(clo_trace_codegen);
 /* DEBUG: print system calls?  default: NO */
 extern Bool  VG_(clo_trace_syscalls);
 /* DEBUG: print signal details?  default: NO */
@@ -308,78 +210,35 @@
    Debugging and profiling stuff
    ------------------------------------------------------------------ */
 
+/* Change to 1 to get more accurate but more expensive core profiling. */
+#if 0
+#  define VGP_ACCURATE_PROFILING
+#endif
+
 /* No, really.  I _am_ that strange. */
 #define OINK(nnn) VG_(message)(Vg_DebugMsg, "OINK %d",nnn)
 
-/* Tools for building messages from multiple parts. */
-typedef
-   enum { Vg_UserMsg, Vg_DebugMsg, Vg_DebugExtraMsg }
-   VgMsgKind;
-
-extern void VG_(start_msg)  ( VgMsgKind kind );
-extern void VG_(add_to_msg) ( Char* format, ... );
-extern void VG_(end_msg)    ( void );
-
-/* Send a simple, single-part message. */
-extern void VG_(message)    ( VgMsgKind kind, Char* format, ... );
-
 /* Create a logfile into which messages can be dumped. */
 extern void VG_(startup_logging) ( void );
-extern void VG_(shutdown_logging) ( void );
-
-
-/* Profiling stuff */
-#ifdef VG_PROFILE
-
-#define VGP_M_STACK 10
-
-#define VGP_M_CCS 26  /* == the # of elems in VGP_LIST */
-#define VGP_LIST \
-   VGP_PAIR(VgpUnc=0,      "unclassified"),           \
-   VGP_PAIR(VgpRun,        "running"),                \
-   VGP_PAIR(VgpSched,      "scheduler"),              \
-   VGP_PAIR(VgpMalloc,     "low-lev malloc/free"),    \
-   VGP_PAIR(VgpCliMalloc,  "client  malloc/free"),    \
-   VGP_PAIR(VgpTranslate,  "translate-main"),         \
-   VGP_PAIR(VgpToUCode,    "to-ucode"),               \
-   VGP_PAIR(VgpFromUcode,  "from-ucode"),             \
-   VGP_PAIR(VgpImprove,    "improve"),                \
-   VGP_PAIR(VgpInstrument, "instrument"),             \
-   VGP_PAIR(VgpCleanup,    "cleanup"),                \
-   VGP_PAIR(VgpRegAlloc,   "reg-alloc"),              \
-   VGP_PAIR(VgpDoLRU,      "do-lru"),                 \
-   VGP_PAIR(VgpSlowFindT,  "slow-search-transtab"),   \
-   VGP_PAIR(VgpInitAudit,  "init-mem-audit"),         \
-   VGP_PAIR(VgpExeContext, "exe-context"),            \
-   VGP_PAIR(VgpReadSyms,   "read-syms"),              \
-   VGP_PAIR(VgpAddToT,     "add-to-transtab"),        \
-   VGP_PAIR(VgpSARP,       "set-addr-range-perms"),   \
-   VGP_PAIR(VgpSyscall,    "syscall wrapper"),        \
-   VGP_PAIR(VgpCacheInstrument, "cache instrument"),  \
-   VGP_PAIR(VgpCacheGetBBCC,"cache get BBCC"),        \
-   VGP_PAIR(VgpCacheSimulate, "cache simulate"),      \
-   VGP_PAIR(VgpCacheDump,  "cache stats dump"),       \
-   VGP_PAIR(VgpSpare1,     "spare 1"),                \
-   VGP_PAIR(VgpSpare2,     "spare 2")
-
-#define VGP_PAIR(enumname,str) enumname
-typedef enum { VGP_LIST } VgpCC;
-#undef VGP_PAIR
+extern void VG_(shutdown_logging)( void );
 
 extern void VGP_(init_profiling) ( void );
 extern void VGP_(done_profiling) ( void );
-extern void VGP_(pushcc) ( VgpCC );
-extern void VGP_(popcc) ( void );
 
-#define VGP_PUSHCC(cc) VGP_(pushcc)(cc)
-#define VGP_POPCC      VGP_(popcc)()
+#undef  VGP_PUSHCC
+#undef  VGP_POPCC
+#define VGP_PUSHCC(x)   if (VG_(clo_profile)) VGP_(pushcc)(x)
+#define VGP_POPCC(x)    if (VG_(clo_profile)) VGP_(popcc)(x)
 
+/* Use this for ones that happen a lot and thus we don't want to put in
+   all the time, eg. for %esp assignment. */
+#ifdef VGP_ACCURATE_PROFILING
+#  define VGP_MAYBE_PUSHCC(x)   if (VG_(clo_profile)) VGP_(pushcc)(x)
+#  define VGP_MAYBE_POPCC(x)    if (VG_(clo_profile)) VGP_(popcc)(x)
 #else
-
-#define VGP_PUSHCC(cc) /* */
-#define VGP_POPCC      /* */
-
-#endif /* VG_PROFILE */
+#  define VGP_MAYBE_PUSHCC(x)
+#  define VGP_MAYBE_POPCC(x)
+#endif
 
 
 /* ---------------------------------------------------------------------
@@ -387,37 +246,40 @@
    ------------------------------------------------------------------ */
 
 /* Allocation arenas.  
+      CORE      is for the core's general use.
+      SKIN      is for the skin to use (and the only one it uses).
       SYMTAB    is for Valgrind's symbol table storage.
+      JITTER    is for small storage during translation.
       CLIENT    is for the client's mallocs/frees.
       DEMANGLE  is for the C++ demangler.
       EXECTXT   is for storing ExeContexts.
-      ERRCTXT   is for storing ErrContexts.
-      PRIVATE   is for Valgrind general stuff.
+      ERRORS    is for storing CoreErrors.
       TRANSIENT is for very short-term use.  It should be empty
                 in between uses.
-   When adding a new arena, remember also to add it
-   to ensure_mm_init(). 
+   When adding a new arena, remember also to add it to ensure_mm_init(). 
 */
 typedef Int ArenaId;
 
-#define VG_N_ARENAS 7
+#define VG_N_ARENAS 9
 
-#define VG_AR_PRIVATE   0    /* :: ArenaId */
-#define VG_AR_SYMTAB    1    /* :: ArenaId */
-#define VG_AR_CLIENT    2    /* :: ArenaId */
-#define VG_AR_DEMANGLE  3    /* :: ArenaId */
-#define VG_AR_EXECTXT   4    /* :: ArenaId */
-#define VG_AR_ERRCTXT   5    /* :: ArenaId */
-#define VG_AR_TRANSIENT 6    /* :: ArenaId */
+#define VG_AR_CORE      0    /* :: ArenaId */
+#define VG_AR_SKIN      1    /* :: ArenaId */
+#define VG_AR_SYMTAB    2    /* :: ArenaId */
+#define VG_AR_JITTER    3    /* :: ArenaId */
+#define VG_AR_CLIENT    4    /* :: ArenaId */
+#define VG_AR_DEMANGLE  5    /* :: ArenaId */
+#define VG_AR_EXECTXT   6    /* :: ArenaId */
+#define VG_AR_ERRORS    7    /* :: ArenaId */
+#define VG_AR_TRANSIENT 8    /* :: ArenaId */
 
-extern void* VG_(malloc)  ( ArenaId arena, Int nbytes );
-extern void  VG_(free)    ( ArenaId arena, void* ptr );
-extern void* VG_(calloc)  ( ArenaId arena, Int nmemb, Int nbytes );
-extern void* VG_(realloc) ( ArenaId arena, void* ptr, Int size );
-extern void* VG_(malloc_aligned) ( ArenaId aid, Int req_alignB, 
+extern void* VG_(arena_malloc)  ( ArenaId arena, Int nbytes );
+extern void  VG_(arena_free)    ( ArenaId arena, void* ptr );
+extern void* VG_(arena_calloc)  ( ArenaId arena, Int nmemb, Int nbytes );
+extern void* VG_(arena_realloc) ( ArenaId arena, void* ptr, Int alignment,
+                                  Int size );
+extern void* VG_(arena_malloc_aligned) ( ArenaId aid, Int req_alignB, 
                                                 Int req_pszB );
 
-extern void  VG_(mallocSanityCheckArena) ( ArenaId arena );
 extern void  VG_(mallocSanityCheckAll)   ( void );
 
 extern void  VG_(show_all_arena_stats) ( void );
@@ -433,13 +295,13 @@
 
 
 /* ---------------------------------------------------------------------
-   Exports of vg_clientfuns.c
+   Exports of vg_clientfuncs.c
    ------------------------------------------------------------------ */
 
 /* This doesn't export code or data that valgrind.so needs to link
    against.  However, the scheduler does need to know the following
    request codes.  A few, publically-visible, request codes are also
-   defined in valgrind.h. */
+   defined in valgrind.h, and similar headers for some skins. */
 
 #define VG_USERREQ__MALLOC              0x2001
 #define VG_USERREQ__BUILTIN_NEW         0x2002
@@ -552,16 +414,6 @@
    Exports of vg_scheduler.c
    ------------------------------------------------------------------ */
 
-/* ThreadIds are simply indices into the vg_threads[] array. */
-typedef 
-   UInt 
-   ThreadId;
-
-/* Special magic value for an invalid ThreadId.  It corresponds to
-   LinuxThreads using zero as the initial value for
-   pthread_mutex_t.__m_owner and pthread_cond_t.__c_waiting. */
-#define VG_INVALID_THREADID ((ThreadId)(0))
-
 typedef
    enum { 
       VgTs_Empty,      /* this slot is not in use */
@@ -594,140 +446,138 @@
    ForkHandlerEntry;
 
 
-typedef
-   struct {
-      /* ThreadId == 0 (and hence vg_threads[0]) is NEVER USED.
-         The thread identity is simply the index in vg_threads[].
-         ThreadId == 1 is the root thread and has the special property
-         that we don't try and allocate or deallocate its stack.  For
-         convenience of generating error message, we also put the
-         ThreadId in this tid field, but be aware that it should
-         ALWAYS == the index in vg_threads[]. */
-      ThreadId tid;
+struct _ThreadState {
+   /* ThreadId == 0 (and hence vg_threads[0]) is NEVER USED.
+      The thread identity is simply the index in vg_threads[].
+      ThreadId == 1 is the root thread and has the special property
+      that we don't try and allocate or deallocate its stack.  For
+      convenience of generating error message, we also put the
+      ThreadId in this tid field, but be aware that it should
+      ALWAYS == the index in vg_threads[]. */
+   ThreadId tid;
 
-      /* Current scheduling status. 
+   /* Current scheduling status. 
 
-         Complications: whenever this is set to VgTs_WaitMX, you
-         should also set .m_edx to whatever the required return value
-         is for pthread_mutex_lock / pthread_cond_timedwait for when
-         the mutex finally gets unblocked. */
-      ThreadStatus status;
+      Complications: whenever this is set to VgTs_WaitMX, you
+      should also set .m_edx to whatever the required return value
+      is for pthread_mutex_lock / pthread_cond_timedwait for when
+      the mutex finally gets unblocked. */
+   ThreadStatus status;
 
-      /* When .status == WaitMX, points to the mutex I am waiting for.
-         When .status == WaitCV, points to the mutex associated with
-         the condition variable indicated by the .associated_cv field.
-         In all other cases, should be NULL. */
-      void* /* pthread_mutex_t* */ associated_mx;
+   /* When .status == WaitMX, points to the mutex I am waiting for.
+      When .status == WaitCV, points to the mutex associated with
+      the condition variable indicated by the .associated_cv field.
+      In all other cases, should be NULL. */
+   void* /*pthread_mutex_t* */ associated_mx;
 
-      /* When .status == WaitCV, points to the condition variable I am
-         waiting for.  In all other cases, should be NULL. */
-      void* /* pthread_cond_t* */ associated_cv;
+   /* When .status == WaitCV, points to the condition variable I am
+      waiting for.  In all other cases, should be NULL. */
+   void* /*pthread_cond_t* */ associated_cv;
 
-      /* If VgTs_Sleeping, this is when we should wake up, measured in
-         milliseconds as supplied by VG_(read_millisecond_counter). 
- 
-         If VgTs_WaitCV, this indicates the time at which
-         pthread_cond_timedwait should wake up.  If == 0xFFFFFFFF,
-         this means infinitely far in the future, viz,
-         pthread_cond_wait. */
-      UInt awaken_at;
+   /* If VgTs_Sleeping, this is when we should wake up, measured in
+      milliseconds as supplied by VG_(read_millisecond_counter). 
 
-      /* If VgTs_WaitJoiner, return value, as generated by joinees. */
-      void* joinee_retval;
+      If VgTs_WaitCV, this indicates the time at which
+      pthread_cond_timedwait should wake up.  If == 0xFFFFFFFF,
+      this means infinitely far in the future, viz,
+      pthread_cond_wait. */
+   UInt awaken_at;
 
-      /* If VgTs_WaitJoinee, place to copy the return value to, and
-         the identity of the thread we're waiting for. */
-      void**   joiner_thread_return;
-      ThreadId joiner_jee_tid;      
+   /* If VgTs_WaitJoiner, return value, as generated by joinees. */
+   void* joinee_retval;
 
-      /* Whether or not detached. */
-      Bool detached;
+   /* If VgTs_WaitJoinee, place to copy the return value to, and
+      the identity of the thread we're waiting for. */
+   void**   joiner_thread_return;
+   ThreadId joiner_jee_tid;      
 
-      /* Cancelability state and type. */
-      Bool cancel_st; /* False==PTH_CANCEL_DISABLE; True==.._ENABLE */
-      Bool cancel_ty; /* False==PTH_CANC_ASYNCH; True==..._DEFERRED */
-     
-      /* Pointer to fn to call to do cancellation.  Indicates whether
-         or not cancellation is pending.  If NULL, not pending.  Else
-         should be &thread_exit_wrapper(), indicating that
-         cancallation is pending. */
-      void (*cancel_pend)(void*);
+   /* Whether or not detached. */
+   Bool detached;
 
-      /* The cleanup stack. */
-      Int          custack_used;
-      CleanupEntry custack[VG_N_CLEANUPSTACK];
+   /* Cancelability state and type. */
+   Bool cancel_st; /* False==PTH_CANCEL_DISABLE; True==.._ENABLE */
+   Bool cancel_ty; /* False==PTH_CANC_ASYNCH; True==..._DEFERRED */
+  
+   /* Pointer to fn to call to do cancellation.  Indicates whether
+      or not cancellation is pending.  If NULL, not pending.  Else
+      should be &thread_exit_wrapper(), indicating that
+      cancallation is pending. */
+   void (*cancel_pend)(void*);
 
-      /* thread-specific data */
-      void* specifics[VG_N_THREAD_KEYS];
+   /* The cleanup stack. */
+   Int          custack_used;
+   CleanupEntry custack[VG_N_CLEANUPSTACK];
 
-      /* This thread's blocked-signals mask.  Semantics is that for a
-         signal to be delivered to this thread, the signal must not be
-         blocked by either the process-wide signal mask nor by this
-         one.  So, if this thread is prepared to handle any signal that
-         the process as a whole is prepared to handle, this mask should
-         be made empty -- and that it is its default, starting
-         state. */
-      vki_ksigset_t sig_mask;
+   /* thread-specific data */
+   void* specifics[VG_N_THREAD_KEYS];
 
-      /* When not VgTs_WaitSIG, has no meaning.  When VgTs_WaitSIG,
-         is the set of signals for which we are sigwait()ing. */
-      vki_ksigset_t sigs_waited_for;
+   /* This thread's blocked-signals mask.  Semantics is that for a
+      signal to be delivered to this thread, the signal must not be
+      blocked by either the process-wide signal mask nor by this
+      one.  So, if this thread is prepared to handle any signal that
+      the process as a whole is prepared to handle, this mask should
+      be made empty -- and that it is its default, starting
+      state. */
+   vki_ksigset_t sig_mask;
 
-      /* Counts the number of times a signal handler for this thread
-         has returned.  This makes it easy to implement pause(), by
-         polling this value, of course interspersed with nanosleeps,
-         and waiting till it changes. */
-      UInt n_signals_returned;
+   /* When not VgTs_WaitSIG, has no meaning.  When VgTs_WaitSIG,
+      is the set of signals for which we are sigwait()ing. */
+   vki_ksigset_t sigs_waited_for;
 
-      /* Stacks.  When a thread slot is freed, we don't deallocate its
-         stack; we just leave it lying around for the next use of the
-         slot.  If the next use of the slot requires a larger stack,
-         only then is the old one deallocated and a new one
-         allocated. 
- 
-         For the main thread (threadid == 0), this mechanism doesn't
-         apply.  We don't know the size of the stack since we didn't
-         allocate it, and furthermore we never reallocate it. */
+   /* Counts the number of times a signal handler for this thread
+      has returned.  This makes it easy to implement pause(), by
+      polling this value, of course interspersed with nanosleeps,
+      and waiting till it changes. */
+   UInt n_signals_returned;
 
-      /* The allocated size of this thread's stack (permanently zero
-         if this is ThreadId == 0, since we didn't allocate its stack) */
-      UInt stack_size;
+   /* Stacks.  When a thread slot is freed, we don't deallocate its
+      stack; we just leave it lying around for the next use of the
+      slot.  If the next use of the slot requires a larger stack,
+      only then is the old one deallocated and a new one
+      allocated. 
 
-      /* Address of the lowest word in this thread's stack.  NULL means
-         not allocated yet.
-      */
-      Addr stack_base;
+      For the main thread (threadid == 0), this mechanism doesn't
+      apply.  We don't know the size of the stack since we didn't
+      allocate it, and furthermore we never reallocate it. */
 
-     /* Address of the highest legitimate word in this stack.  This is
-        used for error messages only -- not critical for execution
-        correctness.  Is is set for all stacks, specifically including
-        ThreadId == 0 (the main thread). */
-      Addr stack_highest_word;
+   /* The allocated size of this thread's stack (permanently zero
+      if this is ThreadId == 0, since we didn't allocate its stack) */
+   UInt stack_size;
 
-      /* Saved machine context. */
-      UInt m_eax;
-      UInt m_ebx;
-      UInt m_ecx;
-      UInt m_edx;
-      UInt m_esi;
-      UInt m_edi;
-      UInt m_ebp;
-      UInt m_esp;
-      UInt m_eflags;
-      UInt m_eip;
-      UInt m_fpu[VG_SIZE_OF_FPUSTATE_W];
+   /* Address of the lowest word in this thread's stack.  NULL means
+      not allocated yet.
+   */
+   Addr stack_base;
 
-      UInt sh_eax;
-      UInt sh_ebx;
-      UInt sh_ecx;
-      UInt sh_edx;
-      UInt sh_esi;
-      UInt sh_edi;
-      UInt sh_ebp;
-      UInt sh_esp;
-      UInt sh_eflags;
-   }
-   ThreadState;
+  /* Address of the highest legitimate word in this stack.  This is
+     used for error messages only -- not critical for execution
+     correctness.  Is is set for all stacks, specifically including
+     ThreadId == 0 (the main thread). */
+   Addr stack_highest_word;
+
+   /* Saved machine context. */
+   UInt m_eax;
+   UInt m_ebx;
+   UInt m_ecx;
+   UInt m_edx;
+   UInt m_esi;
+   UInt m_edi;
+   UInt m_ebp;
+   UInt m_esp;
+   UInt m_eflags;
+   UInt m_eip;
+   UInt m_fpu[VG_SIZE_OF_FPUSTATE_W];
+
+   UInt sh_eax;
+   UInt sh_ebx;
+   UInt sh_ecx;
+   UInt sh_edx;
+   UInt sh_esi;
+   UInt sh_edi;
+   UInt sh_ebp;
+   UInt sh_esp;
+   UInt sh_eflags;
+};
 
 
 /* The thread table. */
@@ -753,10 +603,6 @@
 /* Similarly ... */
 extern ThreadId VG_(get_current_tid) ( void );
 
-/* Which thread is this address in the stack of, if any?  Used for
-   error message generation. */
-extern ThreadId VG_(identify_stack_addr)( Addr a );
-
 /* Nuke all threads except tid. */
 extern void VG_(nuke_all_threads_except) ( ThreadId me );
 
@@ -795,12 +641,14 @@
    the initial stack, which we can't move, is allocated here.
    VG_(scheduler_init) checks this.  Andrea Archelangi's 2.4 kernels
    have been rumoured to start stacks at 0x80000000, so that too is
-   considered. It seems systems with longer uptimes tend to to use
-   stacks which start at 0x40000000 sometimes.  
-*/
+   considered.  It seems systems with longer uptimes tend to to use
+   stacks which start at 0x40000000 sometimes.  JRS 2002-Aug-21: I
+   also have reports of stacks starting at 0xE0000000.*/
+
 #define VG_STARTUP_STACK_BASE_1  (Addr)0xC0000000
 #define VG_STARTUP_STACK_BASE_2  (Addr)0x80000000
 #define VG_STARTUP_STACK_BASE_3  (Addr)0x40000000
+#define VG_STARTUP_STACK_BASE_4  (Addr)0xE0000000
 #define VG_STARTUP_STACK_SMALLERTHAN  0x100000 /* 1024k */
 
 #define VG_STACK_MATCHES_BASE(zzstack, zzbase)                 \
@@ -819,17 +667,24 @@
 #define VG_AR_CLIENT_STACKBASE_REDZONE_SZB \
    (VG_AR_CLIENT_STACKBASE_REDZONE_SZW * VKI_BYTES_PER_WORD)
 
+/* Junk to fill up a thread's shadow regs with when shadow regs aren't
+ * being used. */
+#define VG_UNUSED_SHADOW_REG_VALUE  0x27182818
+
+/* What we set a shadow register to when written by SET_EAX and similar
+ * things. */
+extern UInt VG_(written_shadow_reg);
 
 /* Write a value to the client's %EDX (request return value register)
    and set the shadow to indicate it is defined. */
-#define SET_EDX(zztid, zzval)                          \
-   do { VG_(threads)[zztid].m_edx = (zzval);             \
-        VG_(threads)[zztid].sh_edx = VGM_WORD_VALID;     \
+#define SET_EDX(zztid, zzval)                                  \
+   do { VG_(threads)[zztid].m_edx = (zzval);                   \
+        VG_(threads)[zztid].sh_edx = VG_(written_shadow_reg);  \
    } while (0)
 
-#define SET_EAX(zztid, zzval)                          \
-   do { VG_(threads)[zztid].m_eax = (zzval);             \
-        VG_(threads)[zztid].sh_eax = VGM_WORD_VALID;     \
+#define SET_EAX(zztid, zzval)                                  \
+   do { VG_(threads)[zztid].m_eax = (zzval);                   \
+        VG_(threads)[zztid].sh_eax = VG_(written_shadow_reg);  \
    } while (0)
 
 
@@ -875,87 +730,21 @@
    Exports of vg_mylibc.c
    ------------------------------------------------------------------ */
 
+__attribute__((noreturn))
+extern void VG_(skin_error) ( Char* s );
 
-#if !defined(NULL)
-#  define NULL ((void*)0)
-#endif
+/* VG_(brk) not public so skins cannot screw with curr_dataseg_end */
+extern void* VG_(brk) ( void* end_data_segment );
 
-extern void VG_(exit)( Int status )
-            __attribute__ ((__noreturn__));
+/* Skins use VG_(strdup)() which doesn't expose ArenaId */
+extern Char* VG_(arena_strdup) ( ArenaId aid, const Char* s);
 
-extern void VG_(printf) ( const char *format, ... );
-/* too noisy ...  __attribute__ ((format (printf, 1, 2))) ; */
-
-extern void VG_(sprintf) ( Char* buf, Char *format, ... );
-
-extern void VG_(vprintf) ( void(*send)(Char), 
-                          const Char *format, va_list vargs );
-
-extern Bool VG_(isspace) ( Char c );
-extern Bool VG_(isdigit) ( Char c );
-
-extern Int VG_(strlen) ( const Char* str );
-
-extern Long VG_(atoll) ( Char* str );
-extern Long VG_(atoll36) ( Char* str );
-
-extern Char* VG_(strcat) ( Char* dest, const Char* src );
-extern Char* VG_(strncat) ( Char* dest, const Char* src, Int n );
-extern Char* VG_(strpbrk) ( const Char* s, const Char* accept );
-
-extern Char* VG_(strcpy) ( Char* dest, const Char* src );
-
-extern Int VG_(strcmp)    ( const Char* s1, const Char* s2 );
-extern Int VG_(strcmp_ws) ( const Char* s1, const Char* s2 );
-
-extern Int VG_(strncmp)    ( const Char* s1, const Char* s2, Int nmax );
-extern Int VG_(strncmp_ws) ( const Char* s1, const Char* s2, Int nmax );
-
-extern Char* VG_(strstr) ( const Char* haystack, Char* needle );
-extern Char* VG_(strchr) ( const Char* s, Char c );
-extern Char* VG_(strdup) ( ArenaId aid, const Char* s);
-
-extern Char* VG_(getenv) ( Char* name );
-extern Int   VG_(getpid) ( void );
-
+/* Skins shouldn't need these...(?) */
 extern void VG_(start_rdtsc_calibration) ( void );
 extern void VG_(end_rdtsc_calibration) ( void );
 extern UInt VG_(read_millisecond_timer) ( void );
 
-
-extern Char VG_(toupper) ( Char c );
-
-extern void VG_(strncpy_safely) ( Char* dest, const Char* src, Int ndest );
-
-extern void VG_(strncpy) ( Char* dest, const Char* src, Int ndest );
-
-extern Bool VG_(stringMatch) ( Char* pat, Char* str );
-
-
-#define VG__STRING(__str)  #__str
-
-/* Asserts are permanently enabled.  Hurrah! */
-#define vg_assert(expr)                                               \
-  ((void) ((expr) ? 0 :						      \
-	   (VG_(assert_fail) (VG__STRING(expr),			      \
-			      __FILE__, __LINE__,                     \
-                              __PRETTY_FUNCTION__), 0)))
-
-extern void VG_(assert_fail) ( Char* expr, Char* file, 
-                               Int line, Char* fn )
-            __attribute__ ((__noreturn__));
-
-/* Reading and writing files. */
-extern Int  VG_(open_read) ( Char* pathname );
-extern Int  VG_(open_write)       ( Char* pathname );
-extern Int  VG_(create_and_write) ( Char* pathname );
-extern void VG_(close)     ( Int fd );
-extern Int  VG_(read)      ( Int fd, void* buf, Int count);
-extern Int  VG_(write)     ( Int fd, void* buf, Int count);
-extern Int  VG_(stat) ( Char* file_name, struct vki_stat* buf );
-
-extern Int  VG_(fcntl) ( Int fd, Int cmd, Int arg );
-
+extern Int VG_(fcntl) ( Int fd, Int cmd, Int arg );
 extern Int VG_(select)( Int n, 
                         vki_fd_set* readfds, 
                         vki_fd_set* writefds, 
@@ -964,306 +753,37 @@
 extern Int VG_(nanosleep)( const struct vki_timespec *req, 
                            struct vki_timespec *rem );
 
-
-/* mmap-ery ... */
-extern void* VG_(mmap)( void* start, UInt length, 
-                        UInt prot, UInt flags, UInt fd, UInt offset );
-
-extern Int  VG_(munmap)( void* start, Int length );
-
-extern void* VG_(brk) ( void* end_data_segment );
-
-
-/* Print a (panic) message, and abort. */
-extern void VG_(panic) ( Char* str )
-            __attribute__ ((__noreturn__));
-
-/* Get memory by anonymous mmap. */
-extern void* VG_(get_memory_from_mmap) ( Int nBytes, Char* who );
-
-/* Crude stand-in for the glibc system() call. */
-extern Int VG_(system) ( Char* cmd );
-
-
-/* Signal stuff.  Note that these use the vk_ (kernel) structure
-   definitions, which are different in places from those that glibc
-   defines.  Since we're operating right at the kernel interface,
-   glibc's view of the world is entirely irrelevant. */
-
-/* --- Signal set ops --- */
-extern Int  VG_(ksigfillset)( vki_ksigset_t* set );
-extern Int  VG_(ksigemptyset)( vki_ksigset_t* set );
-
-extern Bool VG_(kisfullsigset)( vki_ksigset_t* set );
-extern Bool VG_(kisemptysigset)( vki_ksigset_t* set );
-
-extern Int  VG_(ksigaddset)( vki_ksigset_t* set, Int signum );
-extern Int  VG_(ksigdelset)( vki_ksigset_t* set, Int signum );
-extern Int  VG_(ksigismember) ( vki_ksigset_t* set, Int signum );
-
-extern void VG_(ksigaddset_from_set)( vki_ksigset_t* dst, 
-                                      vki_ksigset_t* src );
-extern void VG_(ksigdelset_from_set)( vki_ksigset_t* dst, 
-                                      vki_ksigset_t* src );
-
-/* --- Mess with the kernel's sig state --- */
-extern Int VG_(ksigprocmask)( Int how, const vki_ksigset_t* set, 
-                                       vki_ksigset_t* oldset );
-extern Int VG_(ksigaction) ( Int signum,  
-                             const vki_ksigaction* act,  
-                             vki_ksigaction* oldact );
-
-extern Int VG_(ksignal)(Int signum, void (*sighandler)(Int));
-
-extern Int VG_(ksigaltstack)( const vki_kstack_t* ss, vki_kstack_t* oss );
-
-extern Int VG_(kill)( Int pid, Int signo );
-extern Int VG_(sigpending) ( vki_ksigset_t* set );
-
-
 /* ---------------------------------------------------------------------
    Definitions for the JITter (vg_translate.c, vg_to_ucode.c,
    vg_from_ucode.c).
    ------------------------------------------------------------------ */
 
-/* Tags which describe what operands are. */
-typedef
-   enum { TempReg=0, ArchReg=1, RealReg=2, 
-          SpillNo=3, Literal=4, Lit16=5, 
-          NoValue=6 }
-   Tag;
-
-
-/* Microinstruction opcodes. */
-typedef
-   enum {
-      NOP,
-      GET,
-      PUT,
-      LOAD,
-      STORE,
-      MOV,
-      CMOV, /* Used for cmpxchg and cmov */
-      WIDEN,
-      JMP,
-
-      /* Read/write the %EFLAGS register into a TempReg. */
-      GETF, PUTF,
-
-      ADD, ADC, AND, OR,  XOR, SUB, SBB,
-      SHL, SHR, SAR, ROL, ROR, RCL, RCR,
-      NOT, NEG, INC, DEC, BSWAP,
-      CC2VAL,
-
-      /* Not strictly needed, but useful for making better
-         translations of address calculations. */
-      LEA1,  /* reg2 := const + reg1 */
-      LEA2,  /* reg3 := const + reg1 + reg2 * 1,2,4 or 8 */
-
-      /* not for translating x86 calls -- only to call helpers */
-      CALLM_S, CALLM_E, /* Mark start and end of push/pop sequences
-                           for CALLM. */
-      PUSH, POP, CLEAR, /* Add/remove/zap args for helpers. */
-      CALLM,  /* call to a machine-code helper */
-
-      /* for calling C functions -- CCALL_M_N passes M arguments and returns N
-       * (0 or 1) return values */
-      CCALL_1_0, CCALL_2_0,
-
-      /* Hack for translating string (REP-) insns.  Jump to literal if
-         TempReg/RealReg is zero. */
-      JIFZ,
-
-      /* FPU ops which read/write mem or don't touch mem at all. */
-      FPU_R,
-      FPU_W,
-      FPU,
-
-      /* Advance the simulated %eip by some small (< 128) number. */
-      INCEIP,
-
-      /* uinstrs which are not needed for mere translation of x86 code,
-         only for instrumentation of it. */
-      LOADV,
-      STOREV,
-      GETV,
-      PUTV,
-      TESTV,
-      SETV,
-      /* Get/set the v-bit (and it is only one bit) for the simulated
-         %eflags register. */
-      GETVF,
-      PUTVF,
-
-      /* Do a unary or binary tag op.  Only for post-instrumented
-         code.  For TAG1, first and only arg is a TempReg, and is both
-         arg and result reg.  For TAG2, first arg is src, second is
-         dst, in the normal way; both are TempRegs.  In both cases,
-         3rd arg is a RiCHelper with a Lit16 tag.  This indicates
-         which tag op to do. */
-      TAG1,
-      TAG2
-   }
-   Opcode;
-
-
-/* Condition codes, observing the Intel encoding.  CondAlways is an
-   extra. */
-typedef
-   enum {
-      CondO      = 0,  /* overflow           */
-      CondNO     = 1,  /* no overflow        */
-      CondB      = 2,  /* below              */
-      CondNB     = 3,  /* not below          */
-      CondZ      = 4,  /* zero               */
-      CondNZ     = 5,  /* not zero           */
-      CondBE     = 6,  /* below or equal     */
-      CondNBE    = 7,  /* not below or equal */
-      CondS      = 8,  /* negative           */
-      ConsNS     = 9,  /* not negative       */
-      CondP      = 10, /* parity even        */
-      CondNP     = 11, /* not parity even    */
-      CondL      = 12, /* jump less          */
-      CondNL     = 13, /* not less           */
-      CondLE     = 14, /* less or equal      */
-      CondNLE    = 15, /* not less or equal  */
-      CondAlways = 16  /* Jump always        */
-   } 
-   Condcode;
-
-
-/* Descriptions of additional properties of *unconditional* jumps. */
-typedef
-   enum {
-     JmpBoring=0,   /* boring unconditional jump */
-     JmpCall=1,     /* jump due to an x86 call insn */
-     JmpRet=2,      /* jump due to an x86 ret insn */
-     JmpSyscall=3,  /* do a system call, then jump */
-     JmpClientReq=4 /* do a client request, then jump */
-   }
-   JmpKind;
-
-
-/* Flags.  User-level code can only read/write O(verflow), S(ign),
-   Z(ero), A(ux-carry), C(arry), P(arity), and may also write
-   D(irection).  That's a total of 7 flags.  A FlagSet is a bitset,
-   thusly: 
-      76543210
-       DOSZACP
-   and bit 7 must always be zero since it is unused.
-*/
-typedef UChar FlagSet;
-
-#define FlagD (1<<6)
-#define FlagO (1<<5)
-#define FlagS (1<<4)
-#define FlagZ (1<<3)
-#define FlagA (1<<2)
-#define FlagC (1<<1)
-#define FlagP (1<<0)
-
-#define FlagsOSZACP (FlagO | FlagS | FlagZ | FlagA | FlagC | FlagP)
-#define FlagsOSZAP  (FlagO | FlagS | FlagZ | FlagA |         FlagP)
-#define FlagsOSZCP  (FlagO | FlagS | FlagZ |         FlagC | FlagP)
-#define FlagsOSACP  (FlagO | FlagS |         FlagA | FlagC | FlagP)
-#define FlagsSZACP  (        FlagS | FlagZ | FlagA | FlagC | FlagP)
-#define FlagsSZAP   (        FlagS | FlagZ | FlagA |         FlagP)
-#define FlagsZCP    (                FlagZ         | FlagC | FlagP)
-#define FlagsOC     (FlagO |                         FlagC        )
-#define FlagsAC     (                        FlagA | FlagC        )
-
-#define FlagsALL    (FlagsOSZACP | FlagD)
-#define FlagsEmpty  (FlagSet)0
-
 #define VG_IS_FLAG_SUBSET(set1,set2) \
    (( ((FlagSet)set1) & ((FlagSet)set2) ) == ((FlagSet)set1) )
 
 #define VG_UNION_FLAG_SETS(set1,set2) \
    ( ((FlagSet)set1) | ((FlagSet)set2) )
 
-
-
-/* A Micro (u)-instruction. */
-typedef
-   struct {
-      /* word 1 */
-      UInt    lit32;      /* 32-bit literal */
-
-      /* word 2 */
-      UShort  val1;       /* first operand */
-      UShort  val2;       /* second operand */
-
-      /* word 3 */
-      UShort  val3;       /* third operand */
-      UChar   opcode;     /* opcode */
-      UChar   size;       /* data transfer size */
-
-      /* word 4 */
-      FlagSet flags_r;    /* :: FlagSet */
-      FlagSet flags_w;    /* :: FlagSet */
-      UChar   tag1:4;     /* first  operand tag */
-      UChar   tag2:4;     /* second operand tag */
-      UChar   tag3:4;     /* third  operand tag */
-      UChar   extra4b:4;  /* Spare field, used by WIDEN for src
-                             -size, and by LEA2 for scale 
-                             (1,2,4 or 8), and by unconditional JMPs for
-                             orig x86 instr size if --cachesim=yes */
-
-
-      /* word 5 */
-      UChar   cond;            /* condition, for jumps */
-      Bool    smc_check:1;     /* do a smc test, if writes memory. */
-      Bool    signed_widen:1;  /* signed or unsigned WIDEN ? */
-      JmpKind jmpkind:3;       /* additional properties of unconditional JMP */
-   }
-   UInstr;
-
-
-/* Expandable arrays of uinstrs. */
-typedef 
-   struct { 
-      Int     used; 
-      Int     size; 
-      UInstr* instrs;
-      Int     nextTemp;
-   }
-   UCodeBlock;
-
-/* Refer to `the last instruction stuffed in', including as an
-   lvalue. */
-#define LAST_UINSTR(cb) (cb)->instrs[(cb)->used-1]
-
-/* An invalid temporary number :-) */
-#define INVALID_TEMPREG 999999999
-
-
 /* ---------------------------------------------------------------------
    Exports of vg_demangle.c
    ------------------------------------------------------------------ */
 
 extern void VG_(demangle) ( Char* orig, Char* result, Int result_size );
 
-
 /* ---------------------------------------------------------------------
    Exports of vg_from_ucode.c
    ------------------------------------------------------------------ */
 
 extern UChar* VG_(emit_code) ( UCodeBlock* cb, Int* nbytes );
 
+extern void   VG_(print_ccall_stats)      ( void );
+extern void   VG_(print_UInstr_histogram) ( void );
 
 /* ---------------------------------------------------------------------
    Exports of vg_to_ucode.c
    ------------------------------------------------------------------ */
 
 extern Int   VG_(disBB)          ( UCodeBlock* cb, Addr eip0 );
-extern Char* VG_(nameOfIntReg)   ( Int size, Int reg );
-extern Char  VG_(nameOfIntSize)  ( Int size );
-extern UInt  VG_(extend_s_8to32) ( UInt x );
-extern Int   VG_(getNewTemp)     ( UCodeBlock* cb );
-extern Int   VG_(getNewShadow)   ( UCodeBlock* cb );
-
-#define SHADOW(tempreg)  ((tempreg)+1)
-
 
 /* ---------------------------------------------------------------------
    Exports of vg_translate.c
@@ -1275,41 +795,11 @@
                                Addr* trans_addr,
                                UInt* trans_size );
 
-extern void  VG_(emptyUInstr) ( UInstr* u );
-extern void  VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz );
-extern void  VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
-                               Tag tag1, UInt val1 );
-extern void  VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
-                               Tag tag1, UInt val1,
-                               Tag tag2, UInt val2 );
-extern void  VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
-                               Tag tag1, UInt val1,
-                               Tag tag2, UInt val2,
-                               Tag tag3, UInt val3 );
-extern void VG_(setFlagRW) ( UInstr* u, 
-                             FlagSet fr, FlagSet fw );
-
-extern void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 );
-extern Bool VG_(anyFlagUse) ( UInstr* u );
-
-
-
-extern void  VG_(ppUInstr)        ( Int instrNo, UInstr* u );
-extern void  VG_(ppUCodeBlock)    ( UCodeBlock* cb, Char* title );
-
-extern UCodeBlock* VG_(allocCodeBlock) ( void );
-extern void  VG_(freeCodeBlock)        ( UCodeBlock* cb );
-extern void  VG_(copyUInstr)                ( UCodeBlock* cb, UInstr* instr );
-
-extern Char* VG_(nameCondcode)    ( Condcode cond );
-extern Bool  VG_(saneUInstr)      ( Bool beforeRA, UInstr* u );
-extern Bool  VG_(saneUCodeBlock)  ( UCodeBlock* cb );
-extern Char* VG_(nameUOpcode)     ( Bool upper, Opcode opc );
-extern Int   VG_(rankToRealRegNo) ( Int rank );
-
-extern void* VG_(jitmalloc) ( Int nbytes );
-extern void  VG_(jitfree)   ( void* ptr );
-
+extern Char* VG_(nameCondcode)        ( Condcode cond );
+extern Bool  VG_(saneUInstr)          ( Bool beforeRA, Bool beforeLiveness,
+                                        UInstr* u );
+extern void  VG_(saneUCodeBlock)      ( UCodeBlock* cb );
+extern Bool  VG_(saneUCodeBlockCalls) ( UCodeBlock* cb );
 
 /* ---------------------------------------------------------------------
    Exports of vg_execontext.c.
@@ -1320,15 +810,13 @@
    comparing against suppression specifications.  The rest are purely
    informational (but often important). */
 
-typedef
-   struct _ExeContextRec {
-      struct _ExeContextRec * next;
-      /* The size of this array is VG_(clo_backtrace_size); at least
-         2, at most VG_DEEPEST_BACKTRACE.  [0] is the current %eip,
-         [1] is its caller, [2] is the caller of [1], etc. */
-      Addr eips[0];
-   }
-   ExeContext;
+struct _ExeContext {
+   struct _ExeContext * next;
+   /* Variable-length array.  The size is VG_(clo_backtrace_size); at
+      least 2, at most VG_DEEPEST_BACKTRACE.  [0] is the current %eip,
+      [1] is its caller, [2] is the caller of [1], etc. */
+   Addr eips[0];
+};
 
 
 /* Initialise the ExeContext storage mechanism. */
@@ -1337,91 +825,86 @@
 /* Print stats (informational only). */
 extern void VG_(show_ExeContext_stats) ( void );
 
-
-/* Take a snapshot of the client's stack.  Search our collection of
-   ExeContexts to see if we already have it, and if not, allocate a
-   new one.  Either way, return a pointer to the context. */
-extern ExeContext* VG_(get_ExeContext) ( Bool skip_top_frame,
-                                         Addr eip, Addr ebp );
-
-/* Print an ExeContext. */
-extern void VG_(pp_ExeContext) ( ExeContext* );
-
-/* Compare two ExeContexts, just comparing the top two callers. */
-extern Bool VG_(eq_ExeContext_top2) ( ExeContext* e1, ExeContext* e2 );
-
-/* Compare two ExeContexts, just comparing the top four callers. */
-extern Bool VG_(eq_ExeContext_top4) ( ExeContext* e1, ExeContext* e2 );
-
-/* Compare two ExeContexts, comparing all callers. */
-extern Bool VG_(eq_ExeContext_all) ( ExeContext* e1, ExeContext* e2 );
-
+/* Like VG_(get_ExeContext), but with a slightly different type */
+extern ExeContext* VG_(get_ExeContext2) ( Addr eip, Addr ebp,
+                                          Addr ebp_min, Addr ebp_max );
 
 
 /* ---------------------------------------------------------------------
    Exports of vg_errcontext.c.
    ------------------------------------------------------------------ */
 
-extern void VG_(load_suppressions)    ( void );
-extern void VG_(show_all_errors)      ( void );
-extern void VG_(record_value_error)   ( Int size );
-extern void VG_(record_free_error)    ( ThreadState* tst, Addr a );
-extern void VG_(record_freemismatch_error)    ( ThreadState* tst, Addr a );
-extern void VG_(record_address_error) ( Addr a, Int size, 
-                                        Bool isWrite );
-
-extern void VG_(record_jump_error) ( ThreadState* tst, Addr a );
-
-extern void VG_(record_param_err) ( ThreadState* tst,
-                                    Addr a, 
-                                    Bool isWriteLack, 
-                                    Char* msg );
-extern void VG_(record_user_err) ( ThreadState* tst,
-                                   Addr a, Bool isWriteLack );
-extern void VG_(record_pthread_err) ( ThreadId tid, Char* msg );
-
-
-
-/* The classification of a faulting address. */
-typedef 
-   enum { Undescribed, /* as-yet unclassified */
-          Stack, 
-          Unknown, /* classification yielded nothing useful */
-          Freed, Mallocd, 
-          UserG, UserS }
-   AddrKind;
-
-/* Records info about a faulting address. */
+/* Note: it is imperative this doesn't overlap with (0..) at all, as skins
+ * effectively extend it by defining their own enums in the (0..) range. */
 typedef
-   struct {
-      /* ALL */
-      AddrKind akind;
-      /* Freed, Mallocd */
-      Int blksize;
-      /* Freed, Mallocd */
-      Int rwoffset;
-      /* Freed, Mallocd */
-      ExeContext* lastchange;
-      /* Stack */
-      ThreadId stack_tid;
-      /* True if is just-below %esp -- could be a gcc bug. */
-      Bool maybe_gcc;
+   enum {
+      PThreadSupp = -1,    /* Matches PThreadErr */
    }
-   AddrInfo;
+   CoreSuppKind;
+
+/* For each caller specified for a suppression, record the nature of
+   the caller name.  Not of interest to skins. */
+typedef
+   enum { 
+      ObjName,    /* Name is of an shared object file. */
+      FunName     /* Name is of a function. */
+   }
+   SuppLocTy;
+
+/* Suppressions.  Skin part `SkinSupp' (which is all skins have to deal
+   with) is in vg_skin.h */
+typedef
+   struct _CoreSupp {
+      struct _CoreSupp* next;
+      /* The number of times this error has been suppressed. */
+      Int count;
+      /* The name by which the suppression is referred to. */
+      Char* sname;
+      /* First two (name of fn where err occurs, and immediate caller)
+       * are mandatory;  extra two are optional. */
+      SuppLocTy caller_ty[VG_N_SUPP_CALLERS];
+      Char*     caller   [VG_N_SUPP_CALLERS];
+      /* The skin-specific part */
+      SkinSupp  skin_supp;
+   } 
+   CoreSupp;
+
+/* Note: it is imperative this doesn't overlap with (0..) at all, as skins
+ * effectively extend it by defining their own enums in the (0..) range. */
+typedef
+   enum { 
+      PThreadErr      = -1,   /* Pthreading error */
+   }
+   CoreErrorKind;
+
+/* Errors.  Skin part `SkinError' (which is all skins have to deal
+   with) is in vg_skin.h */
+typedef
+   struct _CoreErrContext {
+      struct _CoreErrContext* next;
+      /* NULL if unsuppressed; or ptr to suppression record. */
+      CoreSupp* supp;
+      Int count;
+      ExeContext* where;
+      ThreadId tid;
+      /* These record %EIP, %ESP and %EBP at the error point.  They
+         are only used to make GDB-attaching convenient; there is no
+         other purpose; specifically they are not used to do
+         comparisons between errors. */
+      UInt m_eip;
+      UInt m_esp;
+      UInt m_ebp;
+      /* The skin-specific part */
+      SkinError skin_err;
+   } 
+   CoreError;
 
 
-/* ---------------------------------------------------------------------
-   Exports of vg_clientperms.c
-   ------------------------------------------------------------------ */
+extern void VG_(load_suppressions)    ( void );
 
-extern Bool VG_(client_perm_maybe_describe)( Addr a, AddrInfo* ai );
+extern void VG_(record_pthread_error) ( ThreadId tid, Char* msg );
 
-extern UInt VG_(handle_client_request) ( ThreadState* tst, UInt* arg_block );
-
-extern void VG_(delete_client_stack_blocks_following_ESP_change) ( void );
-
-extern void VG_(show_client_block_stats) ( void );
-
+extern void VG_(show_all_errors)      ( void );
 
 /* ---------------------------------------------------------------------
    Exports of vg_procselfmaps.c
@@ -1438,52 +921,26 @@
    ------------------------------------------------------------------ */
 
 /* We assume the executable is loaded here ... can't really find
-   out.  There is a hacky sanity check in vg_init_memory_audit()
+   out.  There is a hacky sanity check in VG_(init_memory)()
    which should trip up most stupidities.
 */
 #define VG_ASSUMED_EXE_BASE  (Addr)0x8048000
 
-extern void VG_(read_symbols) ( void );
-extern void VG_(mini_stack_dump) ( ExeContext* ec );
-extern void VG_(what_obj_and_fun_is_this)
-                                     ( Addr a,
-                                       Char* obj_buf, Int n_obj_buf,
-                                       Char* fun_buf, Int n_fun_buf );
-extern Bool VG_(what_line_is_this) ( Addr a,
-                                     UChar* filename, Int n_filename,
-                                     UInt* lineno );
-extern Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
-                                     Char* fn_name, Int n_fn_name);
+extern void VG_(maybe_read_symbols)   ( void );
+extern void VG_(read_symtab_callback) ( Addr start, UInt size, 
+                                        Char rr, Char ww, Char xx,
+                                        UInt foffset, UChar* filename );
+extern void VG_(maybe_unload_symbols) ( Addr start, UInt length );
 
-extern Bool VG_(symtab_notify_munmap) ( Addr start, UInt length );
+extern Bool VG_(get_fnname_nodemangle)( Addr a, Char* fnname, Int n_fnname );
+extern void VG_(mini_stack_dump)      ( ExeContext* ec );
 
 
 /* ---------------------------------------------------------------------
    Exports of vg_clientmalloc.c
    ------------------------------------------------------------------ */
 
-typedef
-   enum { 
-      Vg_AllocMalloc = 0,
-      Vg_AllocNew    = 1,
-      Vg_AllocNewVec = 2 
-   }
-   VgAllocKind;
-
-/* Description of a malloc'd chunk. */
-typedef 
-   struct _ShadowChunk {
-      struct _ShadowChunk* next;
-      ExeContext*   where;          /* where malloc'd/free'd */
-      UInt          size : 30;      /* size requested.       */
-      VgAllocKind   allockind : 2;  /* which wrapper did the allocation */
-      Addr          data;           /* ptr to actual block.  */
-   } 
-   ShadowChunk;
-
-extern void          VG_(clientmalloc_done) ( void );
-extern void          VG_(describe_addr) ( Addr a, AddrInfo* ai );
-extern ShadowChunk** VG_(get_malloc_shadows) ( /*OUT*/ UInt* n_shadows );
+extern void  VG_(client_malloc_init)();
 
 /* These are called from the scheduler, when it intercepts a user
    request. */
@@ -1503,11 +960,14 @@
    Exports of vg_main.c
    ------------------------------------------------------------------ */
 
+/* Sanity checks which may be done at any time.  The scheduler decides when. */
+extern void VG_(do_sanity_checks) ( Bool force_expensive );
+
 /* A structure used as an intermediary when passing the simulated
    CPU's state to some assembly fragments, particularly system calls.
    Stuff is copied from baseBlock to here, the assembly magic runs,
-   and then the inverse copy is done. */
-
+   and then the inverse copy is done. 
+ */
 extern UInt VG_(m_state_static) [8 /* int regs, in Intel order */ 
                                  + 1 /* %eflags */ 
                                  + 1 /* %eip */
@@ -1520,30 +980,27 @@
 
 /* Called when some unhandleable client behaviour is detected.
    Prints a msg and aborts. */
-extern void VG_(unimplemented) ( Char* msg );
+extern void VG_(unimplemented) ( Char* msg )
+            __attribute__((__noreturn__));
 extern void VG_(nvidia_moan) ( void );
 
 /* The stack on which Valgrind runs.  We can't use the same stack as the
    simulatee -- that's an important design decision.  */
 extern UInt VG_(stack)[10000];
 
-/* Similarly, we have to ask for signals to be delivered on an
-   alternative stack, since it is possible, although unlikely, that
-   we'll have to run client code from inside the Valgrind-installed
-   signal handler.  If this happens it will be done by
-   vg_deliver_signal_immediately(). */
+/* Similarly, we have to ask for signals to be delivered on an alternative
+   stack, since it is possible, although unlikely, that we'll have to run
+   client code from inside the Valgrind-installed signal handler.  If this
+   happens it will be done by vg_deliver_signal_immediately(). */
 extern UInt VG_(sigstack)[10000];
 
 /* Holds client's %esp at the point we gained control.  From this the
    client's argc, argv and envp are deduced. */
 extern Addr   VG_(esp_at_startup);
-extern Int    VG_(client_argc);
-extern Char** VG_(client_argv);
-extern Char** VG_(client_envp);
 
-/* Remove valgrind.so from a LD_PRELOAD=... string so child processes
-   don't get traced into.  Also mess up $libdir/valgrind so that our
-   libpthread.so disappears from view. */
+/* Remove valgrind.so and skin's .so from a LD_PRELOAD=... string so child
+   processes don't get traced into.  Also mess up $libdir/valgrind so that
+   our libpthread.so disappears from view. */
 void VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH) ( Char* ld_preload_str,
                                                 Char* ld_library_path_str );
 
@@ -1553,9 +1010,6 @@
    the client program really was running on the real cpu. */
 extern void VG_(start_GDB_whilst_on_client_stack) ( void );
 
-/* Spew out vast amounts of junk during JITting? */
-extern Bool  VG_(disassemble);
-
 /* 64-bit counter for the number of basic blocks done. */
 extern ULong VG_(bbs_done);
 /* 64-bit counter for the number of bbs to go before a debug exit. */
@@ -1573,6 +1027,11 @@
 /* This is the ThreadId of the last thread the scheduler ran. */
 extern ThreadId VG_(last_run_tid);
 
+/* This is the argument to __NR_exit() supplied by the first thread to
+   call that syscall.  We eventually pass that to __NR_exit() for
+   real. */
+extern UInt VG_(exitcode);
+
 
 /* --- Counters, for informational purposes only. --- */
 
@@ -1628,83 +1087,38 @@
    Exports of vg_memory.c
    ------------------------------------------------------------------ */
 
-extern void VGM_(init_memory_audit) ( void );
-extern Addr VGM_(curr_dataseg_end);
-extern void VG_(show_reg_tags) ( void );
-extern void VG_(detect_memory_leaks) ( void );
-extern void VG_(done_prof_mem) ( void );
+extern void VG_(init_memory)            ( void );
+extern void VG_(new_exe_segment)        ( Addr a, UInt len );
+extern void VG_(remove_if_exe_segment)  ( Addr a, UInt len );
 
-/* Set permissions for an address range.  Not speed-critical. */
-extern void VGM_(make_noaccess) ( Addr a, UInt len );
-extern void VGM_(make_writable) ( Addr a, UInt len );
-extern void VGM_(make_readable) ( Addr a, UInt len );
-/* Use with care! (read: use for shmat only) */
-extern void VGM_(make_readwritable) ( Addr a, UInt len );
-extern void VGM_(copy_address_range_perms) ( Addr src, Addr dst,
-                                             UInt len );
-
-/* Check permissions for an address range.  Not speed-critical. */
-extern Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr );
-extern Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr );
-extern Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr );
-
-/* Sanity checks which may be done at any time.  The scheduler decides
-   when. */
-extern void VG_(do_sanity_checks) ( Bool force_expensive );
-/* Very cheap ... */
-extern Bool VG_(first_and_last_secondaries_look_plausible) ( void );
-
-/* These functions are called from generated code. */
-extern void VG_(helperc_STOREV4) ( UInt, Addr );
-extern void VG_(helperc_STOREV2) ( UInt, Addr );
-extern void VG_(helperc_STOREV1) ( UInt, Addr );
-
-extern UInt VG_(helperc_LOADV1) ( Addr );
-extern UInt VG_(helperc_LOADV2) ( Addr );
-extern UInt VG_(helperc_LOADV4) ( Addr );
-
-extern void VGM_(handle_esp_assignment) ( Addr new_espA );
-extern void VGM_(fpu_write_check) ( Addr addr, Int size );
-extern void VGM_(fpu_read_check)  ( Addr addr, Int size );
-
-/* Safely (avoiding SIGSEGV / SIGBUS) scan the entire valid address
-   space and pass the addresses and values of all addressible,
-   defined, aligned words to notify_word.  This is the basis for the
-   leak detector.  Returns the number of calls made to notify_word.  */
-UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) );
-
-/* Is this address within some small distance below %ESP?  Used only
-   for the --workaround-gcc296-bugs kludge. */
-extern Bool VG_(is_just_below_ESP)( Addr esp, Addr aa );
+/* Called from generated code. */
+extern void VG_(handle_esp_assignment) ( Addr new_espA );
 
 /* Nasty kludgery to deal with applications which switch stacks,
    like netscape. */
 #define VG_PLAUSIBLE_STACK_SIZE 8000000
 
-/* Needed by the pthreads implementation. */
-#define VGM_WORD_VALID     0
-#define VGM_WORD_INVALID   0xFFFFFFFF
-
-
 /* ---------------------------------------------------------------------
-   Exports of vg_syscall_mem.c
+   Exports of vg_syscalls.c
    ------------------------------------------------------------------ */
 
+extern void VG_(init_dataseg_end_for_brk) ( void );
+
 extern void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid );
 
-extern void VG_(check_known_blocking_syscall) ( ThreadId tid, 
-                                                Int syscallno,
-                                                Int* /*IN*/ res );
+extern void* VG_(pre_known_blocking_syscall) ( ThreadId tid, Int syscallno );
+extern void  VG_(post_known_blocking_syscall)( ThreadId tid, Int syscallno,
+                                               void* pre_res, Int res );
 
 extern Bool VG_(is_kerror) ( Int res );
 
-#define KERNEL_DO_SYSCALL(thread_id, result_lvalue)        \
-         VG_(load_thread_state)(thread_id);                \
-         VG_(copy_baseBlock_to_m_state_static)();          \
-         VG_(do_syscall)();                                \
-         VG_(copy_m_state_static_to_baseBlock)();          \
-         VG_(save_thread_state)(thread_id);                \
-         VG_(threads)[thread_id].sh_eax = VGM_WORD_VALID;  \
+#define KERNEL_DO_SYSCALL(thread_id, result_lvalue)               \
+         VG_(load_thread_state)(thread_id);                       \
+         VG_(copy_baseBlock_to_m_state_static)();                 \
+         VG_(do_syscall)();                                       \
+         VG_(copy_m_state_static_to_baseBlock)();                 \
+         VG_(save_thread_state)(thread_id);                       \
+         VG_(threads)[thread_id].sh_eax = VG_(written_shadow_reg);\
          result_lvalue = VG_(threads)[thread_id].m_eax;
 
 
@@ -1726,6 +1140,9 @@
 /* The number of basic blocks in an epoch (one age-step). */
 #define VG_BBS_PER_EPOCH 20000
 
+/* The fast-cache for tt-lookup. */
+extern Addr VG_(tt_fast)[VG_TT_FAST_SIZE];
+
 extern void VG_(get_tt_tc_used) ( UInt* tt_used, UInt* tc_used );
 extern void VG_(maybe_do_lru_pass) ( void );
 extern void VG_(flush_transtab) ( void );
@@ -1742,40 +1159,6 @@
 
 
 /* ---------------------------------------------------------------------
-   Exports of vg_vtagops.c
-   ------------------------------------------------------------------ */
-
-/* Lists the names of value-tag operations used in instrumented
-   code.  These are the third argument to TAG1 and TAG2 uinsns. */
-
-typedef
-   enum { 
-     /* Unary. */
-     VgT_PCast40, VgT_PCast20, VgT_PCast10,
-     VgT_PCast01, VgT_PCast02, VgT_PCast04,
-
-     VgT_PCast14, VgT_PCast12, VgT_PCast11,
-
-     VgT_Left4, VgT_Left2, VgT_Left1,
-
-     VgT_SWiden14, VgT_SWiden24, VgT_SWiden12,
-     VgT_ZWiden14, VgT_ZWiden24, VgT_ZWiden12,
-
-     /* Binary; 1st is rd; 2nd is rd+wr */
-     VgT_UifU4, VgT_UifU2, VgT_UifU1, VgT_UifU0,
-     VgT_DifD4, VgT_DifD2, VgT_DifD1,
-
-     VgT_ImproveAND4_TQ, VgT_ImproveAND2_TQ, VgT_ImproveAND1_TQ, 
-     VgT_ImproveOR4_TQ, VgT_ImproveOR2_TQ, VgT_ImproveOR1_TQ,
-     VgT_DebugFn
-   }
-   VgTagOp;
-
-extern Char* VG_(nameOfTagOp) ( VgTagOp );
-extern UInt VG_(DebugFn) ( UInt a1, UInt a2 );
-
-
-/* ---------------------------------------------------------------------
    Exports of vg_syscall.S
    ------------------------------------------------------------------ */
 
@@ -1844,60 +1227,24 @@
 extern void VG_(helper_DAS);
 extern void VG_(helper_DAA);
 
-extern void VG_(helper_value_check4_fail);
-extern void VG_(helper_value_check2_fail);
-extern void VG_(helper_value_check1_fail);
-extern void VG_(helper_value_check0_fail);
-
 /* NOT A FUNCTION; this is a bogus RETURN ADDRESS. */
 extern void VG_(signalreturn_bogusRA)( void );
 
-
 /* ---------------------------------------------------------------------
-   Exports of vg_cachesim.c
+   Things relating to the used skin
    ------------------------------------------------------------------ */
 
-extern Int VG_(log2) ( Int x );
-
-extern UCodeBlock* VG_(cachesim_instrument) ( UCodeBlock* cb_in, 
-                                              Addr orig_addr );
-
-typedef struct  _iCC  iCC;
-typedef struct _idCC idCC;
-
-extern void VG_(init_cachesim)      ( void );
-extern void VG_(do_cachesim_results)( Int client_argc, Char** client_argv );
-
-extern void VG_(cachesim_log_non_mem_instr)(  iCC* cc );
-extern void VG_(cachesim_log_mem_instr)    ( idCC* cc, Addr data_addr );
-
-extern void VG_(cachesim_notify_discard) ( TTEntry* tte );
+#define VG_TRACK(fn, args...)          \
+   do {                                \
+      if (VG_(track_events).fn)        \
+         VG_(track_events).fn(args);   \
+   } while (0)
 
 
 /* ---------------------------------------------------------------------
    The state of the simulated CPU.
    ------------------------------------------------------------------ */
 
-/* This is the Intel register encoding. */
-#define R_EAX 0
-#define R_ECX 1
-#define R_EDX 2
-#define R_EBX 3
-#define R_ESP 4
-#define R_EBP 5
-#define R_ESI 6
-#define R_EDI 7
-
-#define R_AL (0+R_EAX)
-#define R_CL (0+R_ECX)
-#define R_DL (0+R_EDX)
-#define R_BL (0+R_EBX)
-#define R_AH (4+R_EAX)
-#define R_CH (4+R_ECX)
-#define R_DH (4+R_EDX)
-#define R_BH (4+R_EBX)
-
-
 /* ---------------------------------------------------------------------
    Offsets into baseBlock for everything which needs to referred to
    from generated code.  The order of these decls does not imply 
@@ -1948,7 +1295,6 @@
 extern Int VGOFF_(sh_edi);
 extern Int VGOFF_(sh_eflags);
 
-
 /* -----------------------------------------------------
    Read-only parts of baseBlock.
    -------------------------------------------------- */
@@ -1993,25 +1339,22 @@
 extern Int VGOFF_(helper_DAS);
 extern Int VGOFF_(helper_DAA);
 
-extern Int VGOFF_(helper_value_check4_fail);
-extern Int VGOFF_(helper_value_check2_fail);
-extern Int VGOFF_(helper_value_check1_fail);
-extern Int VGOFF_(helper_value_check0_fail);
-
-extern Int VGOFF_(helperc_STOREV4); /* :: UInt -> Addr -> void */
-extern Int VGOFF_(helperc_STOREV2); /* :: UInt -> Addr -> void */
-extern Int VGOFF_(helperc_STOREV1); /* :: UInt -> Addr -> void */
-
-extern Int VGOFF_(helperc_LOADV4); /* :: Addr -> UInt -> void */
-extern Int VGOFF_(helperc_LOADV2); /* :: Addr -> UInt -> void */
-extern Int VGOFF_(helperc_LOADV1); /* :: Addr -> UInt -> void */
-
 extern Int VGOFF_(handle_esp_assignment); /* :: Addr -> void */
-extern Int VGOFF_(fpu_write_check);       /* :: Addr -> Int -> void */
-extern Int VGOFF_(fpu_read_check);        /* :: Addr -> Int -> void */
 
-extern Int VGOFF_(cachesim_log_non_mem_instr);
-extern Int VGOFF_(cachesim_log_mem_instr);
+/* For storing extension-specific helpers, determined at runtime.  The addr 
+ * and offset arrays together form a (addr, offset) map that allows a 
+ * helper's baseBlock offset to be computed from its address.  It's done 
+ * like this so CCALL_M_Ns and other helper calls can use the function 
+ * address rather than having to much around with offsets. */
+extern UInt VG_(n_compact_helpers);
+extern UInt VG_(n_noncompact_helpers);
+
+extern Addr VG_(compact_helper_addrs)  [];
+extern Int  VG_(compact_helper_offsets)[];
+
+extern Addr VG_(noncompact_helper_addrs)  [];
+extern Int  VG_(noncompact_helper_offsets)[];
+
 
 #endif /* ndef __VG_INCLUDE_H */
 
diff --git a/coregrind/vg_instrument.c b/coregrind/vg_instrument.c
new file mode 100644
index 0000000..9a062ee
--- /dev/null
+++ b/coregrind/vg_instrument.c
@@ -0,0 +1,96 @@
+/*--------------------------------------------------------------------*/
+/*--- Higher-level UCode sequence builders                         ---*/
+/*---                                              vg_instrument.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+// SSS: should this file should eventually not be in core, but included in
+// skins that use it??  Reduces size of core, but increases size of every
+// skin that uses it...
+
+/* We only import vg_skin.h here, because this file only provides functions
+   for doing things that could be done directly by the skin -- it's just to
+   make skins' lives easier, rather than let them do something they
+   couldn't otherwise do. */
+#include "vg_skin.h"
+
+#define uInstr0   VG_(newUInstr0)
+#define uInstr1   VG_(newUInstr1)
+#define uInstr2   VG_(newUInstr2)
+#define uLiteral  VG_(setLiteralField)
+#define uCCall    VG_(setCCallFields)
+#define newTemp   VG_(getNewTemp)
+
+
+void VG_(callHelper_0_0)(UCodeBlock* cb, Addr f)
+{
+   uInstr0(cb, CCALL, 0);
+   uCCall(cb, f, 0, 0, 0);
+}
+
+void VG_(callHelper_1_0)(UCodeBlock* cb, Addr f, UInt arg1, UInt regparms_n)
+{
+   UInt t1 = newTemp(cb);
+
+   vg_assert(regparms_n <= 1);
+   uInstr2(cb, MOV,   4, Literal, 0, TempReg, t1);
+   uLiteral(cb, arg1);
+   uInstr1(cb, CCALL, 0, TempReg, t1);
+   uCCall(cb, f, 1, regparms_n, 0);
+}
+
+void VG_(callHelper_2_0)(UCodeBlock* cb, Addr f, UInt arg1, UInt arg2,
+                         UInt regparms_n)
+{
+   UInt t1 = newTemp(cb);
+   UInt t2 = newTemp(cb);
+
+   vg_assert(regparms_n <= 2);
+   uInstr2(cb, MOV,   4, Literal, 0, TempReg, t1);
+   uLiteral(cb, arg1);
+   uInstr2(cb, MOV,   4, Literal, 0, TempReg, t2);
+   uLiteral(cb, arg2);
+   uInstr2(cb, CCALL, 0, TempReg, t1, TempReg, t2);
+   uCCall(cb, f, 2, regparms_n, 0);
+}
+
+void VG_(set_global_var)(UCodeBlock* cb, Addr globvar_ptr, UInt val)
+{
+   Int t_gv  = newTemp(cb);        
+   Int t_val = newTemp(cb);        
+
+   uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_val);
+   uLiteral(cb, val);
+   uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_gv);
+   uLiteral(cb, globvar_ptr);
+   uInstr2(cb, STORE, 4, TempReg, t_val, TempReg, t_gv);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                          vg_instrument.c ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/coregrind/vg_kerneliface.h b/coregrind/vg_kerneliface.h
index bcc10f5..ede3049 100644
--- a/coregrind/vg_kerneliface.h
+++ b/coregrind/vg_kerneliface.h
@@ -27,7 +27,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #ifndef __VG_KERNELIFACE_H
@@ -139,6 +139,40 @@
 #define VKI_MAP_PRIVATE    0x02            /* Changes are private.  */
 #define VKI_MAP_FIXED      0x10            /* Interpret addr exactly */
 
+/* Copied from linux-2.4.19/include/asm-i386/fcntl.h */
+
+#define VKI_O_RDONLY             00
+#define VKI_O_WRONLY             01
+#define VKI_O_RDWR               02
+#define VKI_O_CREAT            0100 /* not fcntl */
+#define VKI_O_EXCL             0200 /* not fcntl */
+#define VKI_O_TRUNC           01000 /* not fcntl */
+#define VKI_O_APPEND          02000
+#define VKI_O_NONBLOCK        04000
+#define VKI_O_SYNC           010000
+#define VKI_FASYNC           020000 /* fcntl, for BSD compatibility */
+#define VKI_O_DIRECT         040000 /* direct disk access hint */
+#define VKI_O_LARGEFILE     0100000
+#define VKI_O_DIRECTORY     0200000 /* must be a directory */
+#define VKI_O_NOFOLLOW      0400000 /* don't follow links */
+
+/* Copied from linux-2.4.19/include/linux/stat.h */
+
+#define VKI_S_IRWXU 00700
+#define VKI_S_IRUSR 00400
+#define VKI_S_IWUSR 00200
+#define VKI_S_IXUSR 00100
+
+#define VKI_S_IRWXG 00070
+#define VKI_S_IRGRP 00040
+#define VKI_S_IWGRP 00020
+#define VKI_S_IXGRP 00010
+
+#define VKI_S_IRWXO 00007
+#define VKI_S_IROTH 00004
+#define VKI_S_IWOTH 00002
+#define VKI_S_IXOTH 00001
+
 
 /* Copied from /usr/src/linux-2.4.9-13/include/asm/errno.h */
 
diff --git a/coregrind/vg_libpthread.c b/coregrind/vg_libpthread.c
index 994cdb7..5972dfa 100644
--- a/coregrind/vg_libpthread.c
+++ b/coregrind/vg_libpthread.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 /* ALL THIS CODE RUNS ON THE SIMULATED CPU.
@@ -257,6 +257,12 @@
    return 0;
 }
 
+int pthread_attr_getdetachstate(const pthread_attr_t *attr, int *detachstate)
+{
+   *detachstate = attr->__detachstate;
+   return 0;
+}
+
 int pthread_attr_setinheritsched(pthread_attr_t *attr, int inherit)
 {
    static int moans = N_MOANS;
@@ -1044,6 +1050,7 @@
 void __my_pthread_testcancel(void)
 {
    int res;
+   ensure_valgrind("__my_pthread_testcancel");
    VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
                            VG_USERREQ__TESTCANCEL,
                            0, 0, 0, 0);
@@ -1178,7 +1185,7 @@
       if (n_now != n_orig) break;
 
       nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 52 * 1000 * 1000; /* 52 milliseconds */
+      nanosleep_interval.tv_nsec = 12 * 1000 * 1000; /* 12 milliseconds */
       /* It's critical here that valgrind's nanosleep implementation
          is nonblocking. */
       (void)my_do_syscall2(__NR_nanosleep, 
@@ -1381,13 +1388,14 @@
 /* Relies on assumption that initial private data is NULL.  This
    should be fixed somehow. */
 
-/* The allowable keys (indices) (all 2 of them). 
+/* The allowable keys (indices) (all 3 of them). 
    From sysdeps/pthread/bits/libc-tsd.h
 */
-#define N_LIBC_TSD_EXTRA_KEYS 1
+#define N_LIBC_TSD_EXTRA_KEYS 0
 
 enum __libc_tsd_key_t { _LIBC_TSD_KEY_MALLOC = 0,
                         _LIBC_TSD_KEY_DL_ERROR,
+                        _LIBC_TSD_KEY_RPC_VARS,
                         _LIBC_TSD_KEY_N };
 
 /* Auto-initialising subsystem.  libc_specifics_inited is set 
@@ -1877,6 +1885,10 @@
 }
 
 
+pid_t __vfork(void)
+{
+   return __fork();
+}
 
 
 /* ---------------------------------------------------------------------
@@ -1965,7 +1977,7 @@
    Basic idea is: modify the timeout parameter to select so that it
    returns immediately.  Poll like this until select returns non-zero,
    indicating something interesting happened, or until our time is up.
-   Space out the polls with nanosleeps of say 20 milliseconds, which
+   Space out the polls with nanosleeps of say 11 milliseconds, which
    is required to be nonblocking; this allows other threads to run.  
 
    Assumes:
@@ -2083,7 +2095,7 @@
       /* fprintf(stderr, "MY_SELECT: nanosleep\n"); */
       /* nanosleep and go round again */
       nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 50 * 1000 * 1000; /* 50 milliseconds */
+      nanosleep_interval.tv_nsec = 11 * 1000 * 1000; /* 11 milliseconds */
       /* It's critical here that valgrind's nanosleep implementation
          is nonblocking. */
       res = my_do_syscall2(__NR_nanosleep, 
@@ -2193,7 +2205,7 @@
       /* fprintf(stderr, "MY_POLL: nanosleep\n"); */
       /* nanosleep and go round again */
       nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 51 * 1000 * 1000; /* 51 milliseconds */
+      nanosleep_interval.tv_nsec = 13 * 1000 * 1000; /* 13 milliseconds */
       /* It's critical here that valgrind's nanosleep implementation
          is nonblocking. */
       (void)my_do_syscall2(__NR_nanosleep, 
@@ -2810,6 +2822,7 @@
 weak_alias (__pread64, pread64)
 weak_alias (__pwrite64, pwrite64)
 weak_alias(__fork, fork)
+weak_alias(__vfork, vfork)
 
 weak_alias (__pthread_kill_other_threads_np, pthread_kill_other_threads_np)
 
diff --git a/coregrind/vg_libpthread_unimp.c b/coregrind/vg_libpthread_unimp.c
index f413887..f3938ec 100644
--- a/coregrind/vg_libpthread_unimp.c
+++ b/coregrind/vg_libpthread_unimp.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 /* ---------------------------------------------------------------------
@@ -82,7 +82,7 @@
 //void longjmp ( void )  { unimp("longjmp"); }
 //void pthread_atfork ( void )  { unimp("pthread_atfork"); }
 //void pthread_attr_destroy ( void )  { unimp("pthread_attr_destroy"); }
-void pthread_attr_getdetachstate ( void )  { unimp("pthread_attr_getdetachstate"); }
+//void pthread_attr_getdetachstate ( void )  { unimp("pthread_attr_getdetachstate"); }
 void pthread_attr_getinheritsched ( void )  { unimp("pthread_attr_getinheritsched"); }
 //void pthread_attr_getschedparam ( void )  { unimp("pthread_attr_getschedparam"); }
 //void pthread_attr_getschedpolicy ( void )  { unimp("pthread_attr_getschedpolicy"); }
diff --git a/coregrind/vg_main.c b/coregrind/vg_main.c
index 5cce13d..582b652 100644
--- a/coregrind/vg_main.c
+++ b/coregrind/vg_main.c
@@ -26,12 +26,10 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
-
 
 /* ---------------------------------------------------------------------
    Compute offsets into baseBlock.  See comments in vg_include.h.
@@ -62,6 +60,7 @@
 Int VGOFF_(sh_esi) = INVALID_OFFSET;
 Int VGOFF_(sh_edi) = INVALID_OFFSET;
 Int VGOFF_(sh_eflags) = INVALID_OFFSET;
+
 Int VGOFF_(helper_idiv_64_32) = INVALID_OFFSET;
 Int VGOFF_(helper_div_64_32) = INVALID_OFFSET;
 Int VGOFF_(helper_idiv_32_16) = INVALID_OFFSET;
@@ -92,25 +91,25 @@
 Int VGOFF_(helper_SAHF) = INVALID_OFFSET;
 Int VGOFF_(helper_DAS) = INVALID_OFFSET;
 Int VGOFF_(helper_DAA) = INVALID_OFFSET;
-Int VGOFF_(helper_value_check4_fail) = INVALID_OFFSET;
-Int VGOFF_(helper_value_check2_fail) = INVALID_OFFSET;
-Int VGOFF_(helper_value_check1_fail) = INVALID_OFFSET;
-Int VGOFF_(helper_value_check0_fail) = INVALID_OFFSET;
-Int VGOFF_(helperc_LOADV4) = INVALID_OFFSET;
-Int VGOFF_(helperc_LOADV2) = INVALID_OFFSET;
-Int VGOFF_(helperc_LOADV1) = INVALID_OFFSET;
-Int VGOFF_(helperc_STOREV4) = INVALID_OFFSET;
-Int VGOFF_(helperc_STOREV2) = INVALID_OFFSET;
-Int VGOFF_(helperc_STOREV1) = INVALID_OFFSET;
 Int VGOFF_(handle_esp_assignment) = INVALID_OFFSET;
-Int VGOFF_(fpu_write_check) = INVALID_OFFSET;
-Int VGOFF_(fpu_read_check) = INVALID_OFFSET;
-Int VGOFF_(cachesim_log_non_mem_instr) = INVALID_OFFSET;
-Int VGOFF_(cachesim_log_mem_instr)     = INVALID_OFFSET;
+
+/* MAX_NONCOMPACT_HELPERS can be increased easily.  If MAX_COMPACT_HELPERS is
+ * increased too much, they won't really be compact any more... */
+#define  MAX_COMPACT_HELPERS     8
+#define  MAX_NONCOMPACT_HELPERS  8 
+
+UInt VG_(n_compact_helpers)    = 0;
+UInt VG_(n_noncompact_helpers) = 0;
+
+Addr VG_(compact_helper_addrs)  [MAX_COMPACT_HELPERS];
+Int  VG_(compact_helper_offsets)[MAX_COMPACT_HELPERS];
+Addr VG_(noncompact_helper_addrs)  [MAX_NONCOMPACT_HELPERS];
+Int  VG_(noncompact_helper_offsets)[MAX_NONCOMPACT_HELPERS];
 
 /* This is the actual defn of baseblock. */
 UInt VG_(baseBlock)[VG_BASEBLOCK_WORDS];
 
+
 /* Words. */
 static Int baB_off = 0;
 
@@ -133,6 +132,41 @@
    return off;
 }
 
+/* Registers a function in compact_helper_addrs;  compact_helper_offsets is
+ * filled in later.
+ */
+void VG_(register_compact_helper)(Addr a)
+{
+   if (MAX_COMPACT_HELPERS <= VG_(n_compact_helpers)) {
+      VG_(printf)("Can only register %d compact helpers\n", 
+                  MAX_COMPACT_HELPERS);
+      VG_(panic)("Too many compact helpers registered");
+   }
+   VG_(compact_helper_addrs)[VG_(n_compact_helpers)] = a;
+   VG_(n_compact_helpers)++;
+}
+
+/* Registers a function in noncompact_helper_addrs;  noncompact_helper_offsets
+ * is filled in later.
+ */
+void VG_(register_noncompact_helper)(Addr a)
+{
+   if (MAX_NONCOMPACT_HELPERS <= VG_(n_noncompact_helpers)) {
+      VG_(printf)("Can only register %d non-compact helpers\n", 
+                  MAX_NONCOMPACT_HELPERS);
+      VG_(printf)("Try increasing MAX_NON_COMPACT_HELPERS\n");
+      VG_(panic)("Too many non-compact helpers registered");
+   }
+   VG_(noncompact_helper_addrs)[VG_(n_noncompact_helpers)] = a;
+   VG_(n_noncompact_helpers)++;
+}
+
+/* Allocate offsets in baseBlock for the skin helpers */
+static void assign_helpers_in_baseBlock(UInt n, Int offsets[], Addr addrs[])
+{
+   Int i;
+   for (i = 0; i < n; i++) offsets[i] = alloc_BaB_1_set( addrs[i] );
+}
 
 /* Here we assign actual offsets.  It's important to get the most
    popular referents within 128 bytes of the start, so we can take
@@ -143,8 +177,6 @@
 
 static void vg_init_baseBlock ( void )
 {
-   baB_off = 0;
-
    /* Those with offsets under 128 are carefully chosen. */
 
    /* WORD offsets in this column */
@@ -158,82 +190,42 @@
    /* 7   */ VGOFF_(m_edi)     = alloc_BaB(1);
    /* 8   */ VGOFF_(m_eflags)  = alloc_BaB(1);
 
-   /* 9   */ VGOFF_(sh_eax)    = alloc_BaB(1);
-   /* 10  */ VGOFF_(sh_ecx)    = alloc_BaB(1);
-   /* 11  */ VGOFF_(sh_edx)    = alloc_BaB(1);
-   /* 12  */ VGOFF_(sh_ebx)    = alloc_BaB(1);
-   /* 13  */ VGOFF_(sh_esp)    = alloc_BaB(1);
-   /* 14  */ VGOFF_(sh_ebp)    = alloc_BaB(1);
-   /* 15  */ VGOFF_(sh_esi)    = alloc_BaB(1);
-   /* 16  */ VGOFF_(sh_edi)    = alloc_BaB(1);
-   /* 17  */ VGOFF_(sh_eflags) = alloc_BaB(1);
+   if (VG_(needs).shadow_regs) {
+      /* 9   */ VGOFF_(sh_eax)    = alloc_BaB(1);
+      /* 10  */ VGOFF_(sh_ecx)    = alloc_BaB(1);
+      /* 11  */ VGOFF_(sh_edx)    = alloc_BaB(1);
+      /* 12  */ VGOFF_(sh_ebx)    = alloc_BaB(1);
+      /* 13  */ VGOFF_(sh_esp)    = alloc_BaB(1);
+      /* 14  */ VGOFF_(sh_ebp)    = alloc_BaB(1);
+      /* 15  */ VGOFF_(sh_esi)    = alloc_BaB(1);
+      /* 16  */ VGOFF_(sh_edi)    = alloc_BaB(1);
+      /* 17  */ VGOFF_(sh_eflags) = alloc_BaB(1);
+   }
 
-   /* 17a */ 
-   VGOFF_(cachesim_log_non_mem_instr)  
-      = alloc_BaB_1_set( (Addr) & VG_(cachesim_log_non_mem_instr) );
-   /* 17b */ 
-   VGOFF_(cachesim_log_mem_instr)  
-      = alloc_BaB_1_set( (Addr) & VG_(cachesim_log_mem_instr) );
+   /* 9,10,11 or 18,19,20... depends on number whether shadow regs are used
+    * and on compact helpers registered */ 
 
-   /* 18  */ 
-   VGOFF_(helper_value_check4_fail) 
-      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check4_fail) );
-   /* 19 */
-   VGOFF_(helper_value_check0_fail)
-      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check0_fail) );
+   /* (9 or 18) + n_compact_helpers  */
+   /* Register VG_(handle_esp_assignment) if needed. */
+   if (VG_(track_events).new_mem_stack_aligned || 
+       VG_(track_events).die_mem_stack_aligned) 
+      VG_(register_compact_helper)( (Addr) & VG_(handle_esp_assignment) );
 
-   /* 20  */
-   VGOFF_(helperc_STOREV4)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV4) );
-   /* 21  */
-   VGOFF_(helperc_STOREV1)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV1) );
+   /* Allocate slots for compact helpers */
+   assign_helpers_in_baseBlock(VG_(n_compact_helpers), 
+                               VG_(compact_helper_offsets), 
+                               VG_(compact_helper_addrs));
 
-   /* 22  */
-   VGOFF_(helperc_LOADV4)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV4) );
-   /* 23  */
-   VGOFF_(helperc_LOADV1)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV1) );
-
-   /* 24  */
-   VGOFF_(handle_esp_assignment)
-      = alloc_BaB_1_set( (Addr) & VGM_(handle_esp_assignment) );
-
-   /* 25 */
+   /* (9/10 or 18/19) + n_compact_helpers */
    VGOFF_(m_eip) = alloc_BaB(1);
 
    /* There are currently 24 spill slots */
-   /* 26 .. 49  This overlaps the magic boundary at >= 32 words, but
-      most spills are to low numbered spill slots, so the ones above
-      the boundary don't see much action. */
+   /* (11+/20+ .. 32+/43+) + n_compact_helpers.  This can overlap the magic
+    * boundary at >= 32 words, but most spills are to low numbered spill
+    * slots, so the ones above the boundary don't see much action. */
    VGOFF_(spillslots) = alloc_BaB(VG_MAX_SPILLSLOTS);
 
-   /* These two pushed beyond the boundary because 2-byte transactions
-      are rare. */
-   /* 50  */
-   VGOFF_(helperc_STOREV2)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV2) );
-   /* 51  */
-   VGOFF_(helperc_LOADV2)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV2) );
-
-   /* 52  */
-   VGOFF_(fpu_write_check)
-      = alloc_BaB_1_set( (Addr) & VGM_(fpu_write_check) );
-   /* 53  */
-   VGOFF_(fpu_read_check)
-      = alloc_BaB_1_set( (Addr) & VGM_(fpu_read_check) );
-
-   /* Actually I don't think these two are ever used. */
-   /* 54  */ 
-   VGOFF_(helper_value_check2_fail)
-      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check2_fail) );
-   /* 55  */ 
-   VGOFF_(helper_value_check1_fail)
-      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check1_fail) );
-
-   /* I gave up counting at this point.  Since they're way above the
+   /* I gave up counting at this point.  Since they're above the
       short-amode-boundary, there's no point. */
 
    VGOFF_(m_fpustate) = alloc_BaB(VG_SIZE_OF_FPUSTATE_W);
@@ -303,6 +295,31 @@
       = alloc_BaB_1_set( (Addr) & VG_(helper_DAS) );
    VGOFF_(helper_DAA)
       = alloc_BaB_1_set( (Addr) & VG_(helper_DAA) );
+
+   /* Allocate slots for compact helpers */
+   assign_helpers_in_baseBlock(VG_(n_noncompact_helpers), 
+                               VG_(noncompact_helper_offsets), 
+                               VG_(noncompact_helper_addrs));
+}
+
+static void vg_init_shadow_regs ( void )
+{
+   if (VG_(needs).shadow_regs) {
+      UInt eflags;
+   
+      SK_(written_shadow_regs_values) ( & VG_(written_shadow_reg), & eflags );
+      VG_(baseBlock)[VGOFF_(sh_esp)]    = 
+      VG_(baseBlock)[VGOFF_(sh_ebp)]    =
+      VG_(baseBlock)[VGOFF_(sh_eax)]    =
+      VG_(baseBlock)[VGOFF_(sh_ecx)]    =
+      VG_(baseBlock)[VGOFF_(sh_edx)]    =
+      VG_(baseBlock)[VGOFF_(sh_ebx)]    =
+      VG_(baseBlock)[VGOFF_(sh_esi)]    =
+      VG_(baseBlock)[VGOFF_(sh_edi)]    = VG_(written_shadow_reg);
+      VG_(baseBlock)[VGOFF_(sh_eflags)] = eflags;
+
+   } else
+      VG_(written_shadow_reg) = VG_UNUSED_SHADOW_REG_VALUE;
 }
 
 
@@ -330,15 +347,17 @@
 /* 64-bit counter for the number of bbs to go before a debug exit. */
 ULong VG_(bbs_to_go);
 
-/* Produce debugging output? */
-Bool VG_(disassemble) = False;
-
 /* The current LRU epoch. */
 UInt VG_(current_epoch) = 0;
 
 /* This is the ThreadId of the last thread the scheduler ran. */
 ThreadId VG_(last_run_tid) = 0;
 
+/* This is the argument to __NR_exit() supplied by the first thread to
+   call that syscall.  We eventually pass that to __NR_exit() for
+   real. */
+UInt VG_(exitcode) = 0;
+
 
 /* ---------------------------------------------------------------------
    Counters, for informational purposes only.
@@ -396,46 +415,111 @@
 
 
 /* ---------------------------------------------------------------------
+   Skin data structure initialisation
+   ------------------------------------------------------------------ */
+
+/* Init with default values. */
+VgNeeds VG_(needs) = {
+   .name                    = NULL,
+   .description             = NULL,
+
+   .core_errors             = False,
+   .skin_errors             = False,
+   .run_libc_freeres        = False,
+
+   .sizeof_shadow_block     = 0,
+
+   .basic_block_discards    = False,
+   .shadow_regs             = False,
+   .command_line_options    = False,
+   .client_requests         = False,
+   .extended_UCode          = False,
+   .syscall_wrapper         = False,
+   .alternative_free        = False,
+   .sanity_checks           = False,
+};
+
+VgTrackEvents VG_(track_events) = {
+   /* Memory events */
+   .new_mem_startup       = NULL,
+   .new_mem_heap          = NULL,
+   .new_mem_stack         = NULL,
+   .new_mem_stack_aligned = NULL,
+   .new_mem_stack_signal  = NULL,
+   .new_mem_brk           = NULL,
+   .new_mem_mmap          = NULL,
+
+   .copy_mem_heap         = NULL,
+   .change_mem_mprotect   = NULL,
+
+   .ban_mem_heap          = NULL,
+   .ban_mem_stack         = NULL,
+
+   .die_mem_heap          = NULL,
+   .die_mem_stack         = NULL,
+   .die_mem_stack_aligned = NULL,
+   .die_mem_stack_signal  = NULL,
+   .die_mem_brk           = NULL,
+   .die_mem_munmap        = NULL,
+
+   .bad_free              = NULL,
+   .mismatched_free       = NULL,
+
+   .pre_mem_read          = NULL,
+   .pre_mem_read_asciiz   = NULL,
+   .pre_mem_write         = NULL,
+   .post_mem_write        = NULL,
+
+   /* Mutex events */
+   .post_mutex_lock       = NULL,
+   .post_mutex_unlock     = NULL,
+};
+
+static void sanity_check_needs ( void )
+{
+#define CHECK_NOT(var, value)                                     \
+   if ((var)==(value)) {                                          \
+      VG_(printf)("\n`%s' not initialised\n", VG__STRING(var));   \
+      VG_(skin_error)("Uninitialised needs field\n");             \
+   }
+   
+   CHECK_NOT(VG_(needs).name,        NULL);
+   CHECK_NOT(VG_(needs).description, NULL);
+
+#undef CHECK_NOT
+#undef INVALID_Bool
+}
+
+/* ---------------------------------------------------------------------
    Values derived from command-line options.
    ------------------------------------------------------------------ */
 
-Bool   VG_(clo_error_limit);
-Bool   VG_(clo_check_addrVs);
-Bool   VG_(clo_GDB_attach);
-Int    VG_(sanity_level);
-Int    VG_(clo_verbosity);
-Bool   VG_(clo_demangle);
-Bool   VG_(clo_leak_check);
-Bool   VG_(clo_show_reachable);
-Int    VG_(clo_leak_resolution);
-Bool   VG_(clo_sloppy_malloc);
-Int    VG_(clo_alignment);
-Bool   VG_(clo_partial_loads_ok);
-Bool   VG_(clo_trace_children);
-Int    VG_(clo_logfile_fd);
-Int    VG_(clo_freelist_vol);
-Bool   VG_(clo_workaround_gcc296_bugs);
-Int    VG_(clo_n_suppressions);
+/* Define, and set defaults. */
+Bool   VG_(clo_error_limit)    = True;
+Bool   VG_(clo_GDB_attach)     = False;
+Int    VG_(sanity_level)       = 1;
+Int    VG_(clo_verbosity)      = 1;
+Bool   VG_(clo_demangle)       = True;
+Bool   VG_(clo_sloppy_malloc)  = False;
+Int    VG_(clo_alignment)      = 4;
+Bool   VG_(clo_trace_children) = False;
+Int    VG_(clo_logfile_fd)     = 2;
+Int    VG_(clo_n_suppressions) = 0;
 Char*  VG_(clo_suppressions)[VG_CLO_MAX_SFILES];
-Bool   VG_(clo_single_step);
-Bool   VG_(clo_optimise);
-Bool   VG_(clo_instrument);
-Bool   VG_(clo_cleanup);
-Bool   VG_(clo_cachesim);
-cache_t VG_(clo_I1_cache);
-cache_t VG_(clo_D1_cache);
-cache_t VG_(clo_L2_cache);
-Int    VG_(clo_smc_check);
-Bool   VG_(clo_trace_syscalls);
-Bool   VG_(clo_trace_signals);
-Bool   VG_(clo_trace_symtab);
-Bool   VG_(clo_trace_malloc);
-Bool   VG_(clo_trace_sched);
-Int    VG_(clo_trace_pthread_level);
-ULong  VG_(clo_stop_after);
-Int    VG_(clo_dump_error);
-Int    VG_(clo_backtrace_size);
-Char*  VG_(clo_weird_hacks);
+Bool   VG_(clo_profile)        = False;
+Bool   VG_(clo_single_step)    = False;
+Bool   VG_(clo_optimise)       = True;
+UChar  VG_(clo_trace_codegen)  = 0; // 00000000b
+Bool   VG_(clo_trace_syscalls) = False;
+Bool   VG_(clo_trace_signals)  = False;
+Bool   VG_(clo_trace_symtab)   = False;
+Bool   VG_(clo_trace_malloc)   = False;
+Bool   VG_(clo_trace_sched)    = False;
+Int    VG_(clo_trace_pthread_level) = 0;
+ULong  VG_(clo_stop_after)     = 1000000000000LL;
+Int    VG_(clo_dump_error)     = 0;
+Int    VG_(clo_backtrace_size) = 4;
+Char*  VG_(clo_weird_hacks)    = NULL;
 
 /* This Bool is needed by wrappers in vg_clientmalloc.c to decide how
    to behave.  Initially we say False. */
@@ -454,12 +538,11 @@
    don't have to modify the original. */
 static Char vg_cmdline_copy[M_VG_CMDLINE_STRLEN];
 
-
 /* ---------------------------------------------------------------------
    Processing of command-line options.
    ------------------------------------------------------------------ */
 
-static void bad_option ( Char* opt )
+void VG_(bad_option) ( Char* opt )
 {
    VG_(shutdown_logging)();
    VG_(clo_logfile_fd) = 2; /* stderr */
@@ -487,91 +570,85 @@
    config_error("couldn't find client's argc/argc/envp");
 }   
 
-static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
+static void usage ( void )
 {
-   int   i1, i2, i3;
-   int   i;
-   char *opt = VG_(strdup)(VG_AR_PRIVATE, orig_opt);
+   Char* usage1 = 
+"usage: valgrind [options] prog-and-args\n"
+"\n"
+"  core user options, with defaults in [ ], are:\n"
+"    --help                    show this message\n"
+"    --version                 show version\n"
+"    --skin=<name>             main task (skin to use) [Valgrind]\n"
+"    -q --quiet                run silently; only print error msgs\n"
+"    -v --verbose              be more verbose, incl counts of errors\n"
+"    --gdb-attach=no|yes       start GDB when errors detected? [no]\n"
+"    --demangle=no|yes         automatically demangle C++ names? [yes]\n"
+"    --num-callers=<number>    show <num> callers in stack traces [4]\n"
+"    --error-limit=no|yes      stop showing new errors if too many? [yes]\n"
+"    --sloppy-malloc=no|yes    round malloc sizes to next word? [no]\n"
+"    --alignment=<number>      set minimum alignment of allocations [4]\n"
+"    --trace-children=no|yes   Valgrind-ise child processes? [no]\n"
+"    --logfile-fd=<number>     file descriptor for messages [2=stderr]\n"
+"    --suppressions=<filename> suppress errors described in\n"
+"                              suppressions file <filename>\n"
+"    --weird-hacks=hack1,hack2,...  [no hacks selected]\n"
+"         recognised hacks are: ioctl-VTIME truncate-writes\n"
+"\n"
+"  %s skin user options:\n";
 
-   i = i1 = opt_len;
 
-   /* Option looks like "--I1=65536,2,64".
-    * Find commas, replace with NULs to make three independent 
-    * strings, then extract numbers.  Yuck. */
-   while (VG_(isdigit)(opt[i])) i++;
-   if (',' == opt[i]) {
-      opt[i++] = '\0';
-      i2 = i;
-   } else goto bad;
-   while (VG_(isdigit)(opt[i])) i++;
-   if (',' == opt[i]) {
-      opt[i++] = '\0';
-      i3 = i;
-   } else goto bad;
-   while (VG_(isdigit)(opt[i])) i++;
-   if ('\0' != opt[i]) goto bad;
+   Char* usage2 = 
+"\n"
+"  core options for debugging Valgrind itself are:\n"
+"    --sanity-level=<number>   level of sanity checking to do [1]\n"
+"    --single-step=no|yes      translate each instr separately? [no]\n"
+"    --optimise=no|yes         improve intermediate code? [yes]\n"
+"    --profile=no|yes          profile? (skin must be built for it) [no]\n"
+"    --trace-codegen=<XXXXX>   show generated code? (X = 0|1) [00000]\n"
+"    --trace-syscalls=no|yes   show all system calls? [no]\n"
+"    --trace-signals=no|yes    show signal handling details? [no]\n"
+"    --trace-symtab=no|yes     show symbol table details? [no]\n"
+"    --trace-malloc=no|yes     show client malloc details? [no]\n"
+"    --trace-sched=no|yes      show thread scheduler details? [no]\n"
+"    --trace-pthread=none|some|all  show pthread event details? [no]\n"
+"    --stop-after=<number>     switch to real CPU after executing\n"
+"                              <number> basic blocks [infinity]\n"
+"    --dump-error=<number>     show translation for basic block\n"
+"                              associated with <number>'th\n"
+"                              error context [0=don't show any]\n"
+"\n"
+"  Extra options are read from env variable $VALGRIND_OPTS\n"
+"\n"
+"  Valgrind is Copyright (C) 2000-2002 Julian Seward\n"
+"  and licensed under the GNU General Public License, version 2.\n"
+"  Bug reports, feedback, admiration, abuse, etc, to: %s.\n"
+"\n";
 
-   cache->size      = (Int)VG_(atoll)(opt + i1);
-   cache->assoc     = (Int)VG_(atoll)(opt + i2);
-   cache->line_size = (Int)VG_(atoll)(opt + i3);
+   VG_(printf)(usage1, VG_(needs).name);
+   /* Don't print skin string directly for security, ha! */
+   if (VG_(needs).command_line_options)
+      VG_(printf)("%s", SK_(usage)());
+   else
+      VG_(printf)("    (none)\n");
+   VG_(printf)(usage2, VG_EMAIL_ADDR);
 
-   VG_(free)(VG_AR_PRIVATE, opt);
-   return;
-
-  bad:    
-   bad_option(orig_opt);
+   VG_(shutdown_logging)();
+   VG_(clo_logfile_fd) = 2; /* stderr */
+   VG_(exit)(1);
 }
 
 static void process_cmd_line_options ( void )
 {
-   UChar* argv[M_VG_CMDLINE_OPTS];
-   UInt   argc;
-   UChar* p;
-   UChar* str;
-   Int    i, eventually_logfile_fd, ctr;
+   Char* argv[M_VG_CMDLINE_OPTS];
+   UInt  argc;
+   Char* p;
+   Char* str;
+   Int   i, eventually_logfile_fd, ctr;
 
 #  define ISSPACE(cc)      ((cc) == ' ' || (cc) == '\t' || (cc) == '\n')
 #  define STREQ(s1,s2)     (0==VG_(strcmp_ws)((s1),(s2)))
 #  define STREQN(nn,s1,s2) (0==VG_(strncmp_ws)((s1),(s2),(nn)))
 
-   /* Set defaults. */
-   VG_(clo_error_limit)      = True;
-   VG_(clo_check_addrVs)     = True;
-   VG_(clo_GDB_attach)       = False;
-   VG_(sanity_level)         = 1;
-   VG_(clo_verbosity)        = 1;
-   VG_(clo_demangle)         = True;
-   VG_(clo_leak_check)       = False;
-   VG_(clo_show_reachable)   = False;
-   VG_(clo_leak_resolution)  = 2;
-   VG_(clo_sloppy_malloc)    = False;
-   VG_(clo_alignment)        = 4;
-   VG_(clo_partial_loads_ok) = True;
-   VG_(clo_trace_children)   = False;
-   VG_(clo_logfile_fd)       = 2; /* stderr */
-   VG_(clo_freelist_vol)     = 1000000;
-   VG_(clo_workaround_gcc296_bugs) = False;
-   VG_(clo_n_suppressions)   = 0;
-   VG_(clo_single_step)      = False;
-   VG_(clo_optimise)         = True;
-   VG_(clo_instrument)       = True;
-   VG_(clo_cachesim)         = False;
-   VG_(clo_I1_cache)         = UNDEFINED_CACHE;
-   VG_(clo_D1_cache)         = UNDEFINED_CACHE;
-   VG_(clo_L2_cache)         = UNDEFINED_CACHE;
-   VG_(clo_cleanup)          = True;
-   VG_(clo_smc_check)        = /* VG_CLO_SMC_SOME */ VG_CLO_SMC_NONE;
-   VG_(clo_trace_syscalls)   = False;
-   VG_(clo_trace_signals)    = False;
-   VG_(clo_trace_symtab)     = False;
-   VG_(clo_trace_malloc)     = False;
-   VG_(clo_trace_sched)      = False;
-   VG_(clo_trace_pthread_level) = 0;
-   VG_(clo_stop_after)       = 1000000000000LL;
-   VG_(clo_dump_error)       = 0;
-   VG_(clo_backtrace_size)   = 4;
-   VG_(clo_weird_hacks)      = NULL;
-
    eventually_logfile_fd = VG_(clo_logfile_fd);
 
    /* Once logging is started, we can safely send messages pertaining
@@ -603,7 +680,10 @@
        if (VG_STACK_MATCHES_BASE( VG_(esp_at_startup), 
                                   VG_STARTUP_STACK_BASE_3 )) {
           sp = (UInt*)VG_STARTUP_STACK_BASE_3;
- 
+       } else 
+       if (VG_STACK_MATCHES_BASE( VG_(esp_at_startup), 
+                                  VG_STARTUP_STACK_BASE_4 )) {
+          sp = (UInt*)VG_STARTUP_STACK_BASE_4;
        } else {
           args_grok_error(
              "startup %esp is not near any VG_STARTUP_STACK_BASE_*\n   "
@@ -723,7 +803,7 @@
 
    for (i = 0; i < argc; i++) {
 
-      if (STREQ(argv[i], "-v") || STREQ(argv[i], "--verbose"))
+      if      (STREQ(argv[i], "-v") || STREQ(argv[i], "--verbose"))
          VG_(clo_verbosity)++;
       else if (STREQ(argv[i], "-q") || STREQ(argv[i], "--quiet"))
          VG_(clo_verbosity)--;
@@ -733,11 +813,6 @@
       else if (STREQ(argv[i], "--error-limit=no"))
          VG_(clo_error_limit) = False;
 
-      else if (STREQ(argv[i], "--check-addrVs=yes"))
-         VG_(clo_check_addrVs) = True;
-      else if (STREQ(argv[i], "--check-addrVs=no"))
-         VG_(clo_check_addrVs) = False;
-
       else if (STREQ(argv[i], "--gdb-attach=yes"))
          VG_(clo_GDB_attach) = True;
       else if (STREQ(argv[i], "--gdb-attach=no"))
@@ -748,28 +823,6 @@
       else if (STREQ(argv[i], "--demangle=no"))
          VG_(clo_demangle) = False;
 
-      else if (STREQ(argv[i], "--partial-loads-ok=yes"))
-         VG_(clo_partial_loads_ok) = True;
-      else if (STREQ(argv[i], "--partial-loads-ok=no"))
-         VG_(clo_partial_loads_ok) = False;
-
-      else if (STREQ(argv[i], "--leak-check=yes"))
-         VG_(clo_leak_check) = True;
-      else if (STREQ(argv[i], "--leak-check=no"))
-         VG_(clo_leak_check) = False;
-
-      else if (STREQ(argv[i], "--show-reachable=yes"))
-         VG_(clo_show_reachable) = True;
-      else if (STREQ(argv[i], "--show-reachable=no"))
-         VG_(clo_show_reachable) = False;
-
-      else if (STREQ(argv[i], "--leak-resolution=low"))
-         VG_(clo_leak_resolution) = 2;
-      else if (STREQ(argv[i], "--leak-resolution=med"))
-         VG_(clo_leak_resolution) = 4;
-      else if (STREQ(argv[i], "--leak-resolution=high"))
-         VG_(clo_leak_resolution) = VG_DEEPEST_BACKTRACE;
-
       else if (STREQ(argv[i], "--sloppy-malloc=yes"))
          VG_(clo_sloppy_malloc) = True;
       else if (STREQ(argv[i], "--sloppy-malloc=no"))
@@ -783,32 +836,27 @@
       else if (STREQ(argv[i], "--trace-children=no"))
          VG_(clo_trace_children) = False;
 
-      else if (STREQ(argv[i], "--workaround-gcc296-bugs=yes"))
-         VG_(clo_workaround_gcc296_bugs) = True;
-      else if (STREQ(argv[i], "--workaround-gcc296-bugs=no"))
-         VG_(clo_workaround_gcc296_bugs) = False;
-
       else if (STREQN(15, argv[i], "--sanity-level="))
          VG_(sanity_level) = (Int)VG_(atoll)(&argv[i][15]);
 
       else if (STREQN(13, argv[i], "--logfile-fd="))
          eventually_logfile_fd = (Int)VG_(atoll)(&argv[i][13]);
 
-      else if (STREQN(15, argv[i], "--freelist-vol=")) {
-         VG_(clo_freelist_vol) = (Int)VG_(atoll)(&argv[i][15]);
-         if (VG_(clo_freelist_vol) < 0) VG_(clo_freelist_vol) = 2;
-      }
-
       else if (STREQN(15, argv[i], "--suppressions=")) {
          if (VG_(clo_n_suppressions) >= VG_CLO_MAX_SFILES) {
-            VG_(message)(Vg_UserMsg, "Too many logfiles specified.");
+            VG_(message)(Vg_UserMsg, "Too many suppression files specified.");
             VG_(message)(Vg_UserMsg, 
                          "Increase VG_CLO_MAX_SFILES and recompile.");
-            bad_option(argv[i]);
+            VG_(bad_option)(argv[i]);
          }
          VG_(clo_suppressions)[VG_(clo_n_suppressions)] = &argv[i][15];
          VG_(clo_n_suppressions)++;
       }
+      else if (STREQ(argv[i], "--profile=yes"))
+         VG_(clo_profile) = True;
+      else if (STREQ(argv[i], "--profile=no"))
+         VG_(clo_profile) = False;
+
       else if (STREQ(argv[i], "--single-step=yes"))
          VG_(clo_single_step) = True;
       else if (STREQ(argv[i], "--single-step=no"))
@@ -819,35 +867,26 @@
       else if (STREQ(argv[i], "--optimise=no"))
          VG_(clo_optimise) = False;
 
-      else if (STREQ(argv[i], "--instrument=yes"))
-         VG_(clo_instrument) = True;
-      else if (STREQ(argv[i], "--instrument=no"))
-         VG_(clo_instrument) = False;
-
-      else if (STREQ(argv[i], "--cleanup=yes"))
-         VG_(clo_cleanup) = True;
-      else if (STREQ(argv[i], "--cleanup=no"))
-         VG_(clo_cleanup) = False;
-
-      else if (STREQ(argv[i], "--cachesim=yes"))
-         VG_(clo_cachesim) = True;     
-      else if (STREQ(argv[i], "--cachesim=no"))
-         VG_(clo_cachesim) = False;
-
-      /* 5 is length of "--I1=" */
-      else if (0 == VG_(strncmp)(argv[i], "--I1=",    5))
-         parse_cache_opt(&VG_(clo_I1_cache), argv[i], 5);
-      else if (0 == VG_(strncmp)(argv[i], "--D1=",    5))
-         parse_cache_opt(&VG_(clo_D1_cache), argv[i], 5);
-      else if (0 == VG_(strncmp)(argv[i], "--L2=",    5))
-         parse_cache_opt(&VG_(clo_L2_cache), argv[i], 5);
-
-      else if (STREQ(argv[i], "--smc-check=none"))
-         VG_(clo_smc_check) = VG_CLO_SMC_NONE;
-      else if (STREQ(argv[i], "--smc-check=some"))
-         VG_(clo_smc_check) = VG_CLO_SMC_SOME;
-      else if (STREQ(argv[i], "--smc-check=all"))
-         VG_(clo_smc_check) = VG_CLO_SMC_ALL;
+      /* "vwxyz" --> 000zyxwv (binary) */
+      else if (STREQN(16, argv[i], "--trace-codegen=")) {
+         Int j;
+         char* opt = & argv[i][16];
+   
+         if (5 != VG_(strlen)(opt)) {
+            VG_(message)(Vg_UserMsg, 
+                         "--trace-codegen argument must have 5 digits");
+            VG_(bad_option)(argv[i]);
+         }
+         for (j = 0; j < 5; j++) {
+            if      ('0' == opt[j]) { /* do nothing */ }
+            else if ('1' == opt[j]) VG_(clo_trace_codegen) |= (1 << j);
+            else {
+               VG_(message)(Vg_UserMsg, "--trace-codegen argument can only "
+                                        "contain 0s and 1s");
+               VG_(bad_option)(argv[i]);
+            }
+         }
+      }
 
       else if (STREQ(argv[i], "--trace-syscalls=yes"))
          VG_(clo_trace_syscalls) = True;
@@ -899,8 +938,13 @@
             VG_(clo_backtrace_size) = VG_DEEPEST_BACKTRACE;
       }
 
+      else if (VG_(needs).command_line_options) {
+         Bool ok = SK_(process_cmd_line_option)(argv[i]);
+         if (!ok)
+            usage();
+      }
       else
-         bad_option(argv[i]);
+         usage();
    }
 
 #  undef ISSPACE
@@ -917,7 +961,7 @@
       VG_(message)(Vg_UserMsg, 
          "Invalid --alignment= setting.  "
          "Should be a power of 2, >= 4, <= 4096.");
-      bad_option("--alignment");
+      VG_(bad_option)("--alignment");
    }
 
    if (VG_(clo_GDB_attach) && VG_(clo_trace_children)) {
@@ -926,26 +970,14 @@
          "--gdb-attach=yes conflicts with --trace-children=yes");
       VG_(message)(Vg_UserMsg, 
          "Please choose one or the other, but not both.");
-      bad_option("--gdb-attach=yes and --trace-children=yes");
+      VG_(bad_option)("--gdb-attach=yes and --trace-children=yes");
    }
 
    VG_(clo_logfile_fd) = eventually_logfile_fd;
 
-   /* Don't do memory checking if simulating the cache. */
-   if (VG_(clo_cachesim)) {
-       VG_(clo_instrument) = False;
-   }
-
    if (VG_(clo_verbosity > 0)) {
-      if (VG_(clo_cachesim)) {
-         VG_(message)(Vg_UserMsg, 
-            "cachegrind-%s, an I1/D1/L2 cache profiler for x86 GNU/Linux.",
-            VERSION);
-      } else {
-         VG_(message)(Vg_UserMsg, 
-            "valgrind-%s, a memory error detector for x86 GNU/Linux.",
-            VERSION);
-      }
+      VG_(message)(Vg_UserMsg, "%s-%s, %s for x86 GNU/Linux.",
+         VG_(needs).name, VERSION, VG_(needs).description);
    }
 
    if (VG_(clo_verbosity > 0))
@@ -958,12 +990,12 @@
       }
    }
 
-   if (VG_(clo_n_suppressions) == 0 && !VG_(clo_cachesim)) {
+   if (VG_(clo_n_suppressions) == 0 && 
+       (VG_(needs).core_errors || VG_(needs).skin_errors)) {
       config_error("No error-suppression files were specified.");
    }
 }
 
-
 /* ---------------------------------------------------------------------
    Copying to/from m_state_static.
    ------------------------------------------------------------------ */
@@ -1015,11 +1047,40 @@
          = VG_(m_state_static)[40/4 + i];
 }
 
+Addr VG_(get_stack_pointer) ( void )
+{
+   return VG_(baseBlock)[VGOFF_(m_esp)];
+}
+
+/* Some random tests needed for leak checking */
+
+Bool VG_(within_stack)(Addr a)
+{
+   if (a >= ((Addr)(&VG_(stack)))
+       && a <= ((Addr)(&VG_(stack))) + sizeof(VG_(stack)))
+      return True;
+   else
+      return False;
+}
+
+Bool VG_(within_m_state_static)(Addr a)
+{
+   if (a >= ((Addr)(&VG_(m_state_static)))
+       && a <= ((Addr)(&VG_(m_state_static))) + sizeof(VG_(m_state_static)))
+      return True;
+   else
+      return False;
+}
 
 /* ---------------------------------------------------------------------
    Show accumulated counts.
    ------------------------------------------------------------------ */
 
+static __inline__ Int safe_idiv(Int a, Int b)
+{
+   return (b == 0 ? 0 : a / b);
+}
+
 static void vg_show_counts ( void )
 {
    VG_(message)(Vg_DebugMsg,
@@ -1027,13 +1088,17 @@
 		VG_(current_epoch),
                 VG_(number_of_lrus) );
    VG_(message)(Vg_DebugMsg,
-                "translate: new %d (%d -> %d), discard %d (%d -> %d).",
+                "translate: new     %d (%d -> %d; ratio %d:10)",
                 VG_(overall_in_count),
                 VG_(overall_in_osize),
                 VG_(overall_in_tsize),
+                safe_idiv(10*VG_(overall_in_tsize), VG_(overall_in_osize)));
+   VG_(message)(Vg_DebugMsg,
+                "           discard %d (%d -> %d; ratio %d:10).",
                 VG_(overall_out_count),
                 VG_(overall_out_osize),
-                VG_(overall_out_tsize) );
+                VG_(overall_out_tsize),
+                safe_idiv(10*VG_(overall_out_tsize), VG_(overall_out_osize)));
    VG_(message)(Vg_DebugMsg,
       " dispatch: %lu basic blocks, %d/%d sched events, %d tt_fast misses.", 
       VG_(bbs_done), VG_(num_scheduling_events_MAJOR), 
@@ -1050,6 +1115,7 @@
                 "   sanity: %d cheap, %d expensive checks.",
                 VG_(sanity_fast_count), 
                 VG_(sanity_slow_count) );
+   VG_(print_ccall_stats)();
 }
 
 
@@ -1072,21 +1138,32 @@
       VG_(stack)[10000-1-i] = (UInt)(&VG_(stack)[10000-i-1]) ^ 0xABCD4321;
    }
 
-   /* Set up baseBlock offsets and copy the saved machine's state into
-      it. */
+   /* Setup stuff that depends on the skin.  Must be before:
+      - vg_init_baseBlock(): to register helpers
+      - process_cmd_line_options(): to register skin name and description,
+        and turn on/off 'command_line_options' need
+      - init_memory() (to setup memory event trackers).
+    */
+   SK_(pre_clo_init) ( & VG_(needs), & VG_(track_events) );
+   sanity_check_needs();
+
+   /* Set up baseBlock offsets and copy the saved machine's state into it. */
    vg_init_baseBlock();
    VG_(copy_m_state_static_to_baseBlock)();
+   vg_init_shadow_regs();
 
    /* Process Valgrind's command-line opts (from env var VG_OPTS). */
    process_cmd_line_options();
 
    /* Hook to delay things long enough so we can get the pid and
       attach GDB in another shell. */
-   if (0) { 
+#if 0
+   { 
       Int p, q;
       for (p = 0; p < 50000; p++)
          for (q = 0; q < 50000; q++) ;
    }
+#endif
 
    /* Initialise the scheduler, and copy the client's state from
       baseBlock into VG_(threads)[1].  This has to come before signal
@@ -1098,31 +1175,34 @@
    VG_(sigstartup_actions)();
 
    /* Perhaps we're profiling Valgrind? */
-#  ifdef VG_PROFILE
-   VGP_(init_profiling)();
-#  endif
+   if (VG_(clo_profile))
+      VGP_(init_profiling)();
 
    /* Start calibration of our RDTSC-based clock. */
    VG_(start_rdtsc_calibration)();
 
-   if (VG_(clo_instrument) || VG_(clo_cachesim)) {
-      VGP_PUSHCC(VgpInitAudit);
-      VGM_(init_memory_audit)();
-      VGP_POPCC;
-   }
+   /* Do this here just to give rdtsc calibration more time */
+   SK_(post_clo_init)();
 
-   VGP_PUSHCC(VgpReadSyms);
-   VG_(read_symbols)();
-   VGP_POPCC;
+   /* Must come after SK_(init) so memory handler accompaniments (eg.
+    * shadow memory) can be setup ok */
+   VGP_PUSHCC(VgpInitMem);
+   VG_(init_memory)();
+   VGP_POPCC(VgpInitMem);
+
+   /* Read the list of errors to suppress.  This should be found in
+      the file specified by vg_clo_suppressions. */
+   if (VG_(needs).core_errors || VG_(needs).skin_errors)
+      VG_(load_suppressions)();
 
    /* End calibration of our RDTSC-based clock, leaving it as long as
       we can. */
    VG_(end_rdtsc_calibration)();
 
-   /* This should come after init_memory_audit; otherwise the latter
-      carefully sets up the permissions maps to cover the anonymous
-      mmaps for the translation table and translation cache, which
-      wastes > 20M of virtual address space. */
+   /* This should come after init_memory_and_symbols(); otherwise the 
+      latter carefully sets up the permissions maps to cover the 
+      anonymous mmaps for the translation table and translation cache, 
+      which wastes > 20M of virtual address space. */
    VG_(init_tt_tc)();
 
    if (VG_(clo_verbosity) == 1) {
@@ -1132,26 +1212,18 @@
 
    /* Now it is safe for malloc et al in vg_clientmalloc.c to act
       instrumented-ly. */
-   VG_(running_on_simd_CPU) = True;
-   if (VG_(clo_instrument)) {
-      VGM_(make_readable) ( (Addr)&VG_(running_on_simd_CPU), 1 );
-      VGM_(make_readable) ( (Addr)&VG_(clo_instrument), 1 );
-      VGM_(make_readable) ( (Addr)&VG_(clo_trace_malloc), 1 );
-      VGM_(make_readable) ( (Addr)&VG_(clo_sloppy_malloc), 1 );
-   }
-
-   if (VG_(clo_cachesim)) 
-      VG_(init_cachesim)();
-
    if (VG_(clo_verbosity) > 0)
       VG_(message)(Vg_UserMsg, "");
 
    VG_(bbs_to_go) = VG_(clo_stop_after);
 
+
    /* Run! */
+   VG_(running_on_simd_CPU) = True;
    VGP_PUSHCC(VgpSched);
    src = VG_(scheduler)();
-   VGP_POPCC;
+   VGP_POPCC(VgpSched);
+   VG_(running_on_simd_CPU) = False;
 
    if (VG_(clo_verbosity) > 0)
       VG_(message)(Vg_UserMsg, "");
@@ -1161,25 +1233,19 @@
         "Warning: pthread scheduler exited due to deadlock");
    }
 
-   if (VG_(clo_instrument)) {
+   if (VG_(needs).core_errors || VG_(needs).skin_errors)
       VG_(show_all_errors)();
-      VG_(clientmalloc_done)();
-      if (VG_(clo_verbosity) == 1) {
-         VG_(message)(Vg_UserMsg, 
-                      "For counts of detected errors, rerun with: -v");
-      }
-      if (VG_(clo_leak_check)) VG_(detect_memory_leaks)();
-   }
-   VG_(running_on_simd_CPU) = False;
 
-   if (VG_(clo_cachesim))
-      VG_(do_cachesim_results)(VG_(client_argc), VG_(client_argv));
+   SK_(fini)();
 
    VG_(do_sanity_checks)( True /*include expensive checks*/ );
 
    if (VG_(clo_verbosity) > 1)
       vg_show_counts();
 
+   if (VG_(clo_verbosity) > 2)
+      VG_(print_UInstr_histogram)();
+
    if (0) {
       VG_(message)(Vg_DebugMsg, "");
       VG_(message)(Vg_DebugMsg, 
@@ -1189,16 +1255,10 @@
       VG_(message)(Vg_DebugMsg, 
          "------ Valgrind's ExeContext management stats follow ------" );
       VG_(show_ExeContext_stats)();
-      VG_(message)(Vg_DebugMsg, 
-         "------ Valgrind's client block stats follow ---------------" );
-      VG_(show_client_block_stats)();
    }
  
-#  ifdef VG_PROFILE
-   VGP_(done_profiling)();
-#  endif
-
-   VG_(done_prof_mem)();
+   if (VG_(clo_profile))
+      VGP_(done_profiling)();
 
    VG_(shutdown_logging)();
 
@@ -1220,9 +1280,10 @@
                    && VG_(last_run_tid) < VG_N_THREADS);
          tst = & VG_(threads)[VG_(last_run_tid)];
          vg_assert(tst->status == VgTs_Runnable);
-         /* The thread's %EBX will hold the arg to exit(), so we just
-            do exit with that arg. */
-         VG_(exit)( tst->m_ebx );
+         /* The thread's %EBX at the time it did __NR_exit() will hold
+            the arg to __NR_exit(), so we just do __NR_exit() with
+            that arg. */
+         VG_(exit)( VG_(exitcode) );
          /* NOT ALIVE HERE! */
          VG_(panic)("entered the afterlife in vg_main() -- ExitSyscall");
          break; /* what the hell :) */
@@ -1267,6 +1328,10 @@
    tracing into child processes.  To make this work the build system
    also supplies a dummy file, "valgrinq.so". 
 
+   Also replace "vgskin_<foo>.so" with whitespace, for the same reason;
+   without it, child processes try to find valgrind.so symbols in the 
+   skin .so.
+
    Also look for $(libdir)/lib/valgrind in LD_LIBRARY_PATH and change
    it to $(libdir)/lib/valgrinq, so as to make our libpthread.so
    disappear.  
@@ -1274,20 +1339,22 @@
 void VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH) ( Char* ld_preload_str,
                                                 Char* ld_library_path_str )
 {
-   Char* p_prel = NULL;
-   Char* p_path = NULL;
-   Int   what = 0;
+   Char* p_prel  = NULL;
+   Char* sk_prel = NULL;
+   Char* p_path  = NULL;
+   Int   what    = 0;
    if (ld_preload_str == NULL || ld_library_path_str == NULL)
       goto mutancy;
 
    /* VG_(printf)("%s %s\n", ld_preload_str, ld_library_path_str); */
 
    p_prel = VG_(strstr)(ld_preload_str, "valgrind.so");
+   sk_prel = VG_(strstr)(ld_preload_str, "vgskin_");
    p_path = VG_(strstr)(ld_library_path_str, VG_LIBDIR);
 
+   what = 1;
    if (p_prel == NULL) {
       /* perhaps already happened? */
-      what = 1;
       if (VG_(strstr)(ld_preload_str, "valgrinq.so") == NULL)
          goto mutancy;
       if (VG_(strstr)(ld_library_path_str, "lib/valgrinq") == NULL)
@@ -1296,10 +1363,30 @@
    }
 
    what = 2;
+   if (sk_prel == NULL) goto mutancy;
+
+   what = 3;
    if (p_path == NULL) goto mutancy;
 
+   what = 4;
+   {  
+      /* Blank from "vgskin_" back to prev. LD_PRELOAD entry, or start */
+      Char* p = sk_prel;
+      while (*p != ':' && p > ld_preload_str) { 
+         *p = ' ';
+         p--;
+      }
+      /* Blank from "vgskin_" to next LD_PRELOAD entry */
+      while (*p != ':' && *p != '\0') { 
+         *p = ' ';
+         p++;
+      }
+      if (*p == '\0') goto mutancy;    /* valgrind.so has disappeared?! */
+      *p = ' ';                        /* blank ending ':' */
+   }
+
    /* in LD_PRELOAD, turn valgrind.so into valgrinq.so. */
-   what = 3;
+   what = 5;
    if (p_prel[7] != 'd') goto mutancy;
    p_prel[7] = 'q';
 
@@ -1307,10 +1394,10 @@
       .../lib/valgrind .../lib/valgrinq, which doesn't exist,
       so that our own libpthread.so goes out of scope. */
    p_path += VG_(strlen)(VG_LIBDIR);
-   what = 4;
+   what = 6;
    if (p_path[0] != '/') goto mutancy;
    p_path++; /* step over / */
-   what = 5;
+   what = 7;
    if (p_path[7] != 'd') goto mutancy;
    p_path[7] = 'q';
    return;
@@ -1406,6 +1493,70 @@
 }
 
 
+/* ---------------------------------------------------------------------
+   Sanity check machinery (permanently engaged).
+   ------------------------------------------------------------------ */
+
+/* A fast sanity check -- suitable for calling circa once per
+   millisecond. */
+
+void VG_(do_sanity_checks) ( Bool force_expensive )
+{
+   Int          i;
+
+   if (VG_(sanity_level) < 1) return;
+
+   /* --- First do all the tests that we can do quickly. ---*/
+
+   VG_(sanity_fast_count)++;
+
+   /* Check that we haven't overrun our private stack. */
+   for (i = 0; i < 10; i++) {
+      vg_assert(VG_(stack)[i]
+                == ((UInt)(&VG_(stack)[i]) ^ 0xA4B3C2D1));
+      vg_assert(VG_(stack)[10000-1-i] 
+                == ((UInt)(&VG_(stack)[10000-i-1]) ^ 0xABCD4321));
+   }
+
+   /* Check stuff pertaining to the memory check system. */
+
+   /* Check that nobody has spuriously claimed that the first or
+      last 16 pages of memory have become accessible [...] */
+   if (VG_(needs).sanity_checks)
+      vg_assert(SK_(cheap_sanity_check)());
+
+   /* --- Now some more expensive checks. ---*/
+
+   /* Once every 25 times, check some more expensive stuff. */
+   if ( force_expensive
+     || VG_(sanity_level) > 1
+     || (VG_(sanity_level) == 1 && (VG_(sanity_fast_count) % 25) == 0)) {
+
+      VG_(sanity_slow_count)++;
+
+#     if 0
+      { void zzzmemscan(void); zzzmemscan(); }
+#     endif
+
+      if ((VG_(sanity_fast_count) % 250) == 0)
+         VG_(sanity_check_tc_tt)();
+
+      if (VG_(needs).sanity_checks) {
+          vg_assert(SK_(expensive_sanity_check)());
+      }
+      /* 
+      if ((VG_(sanity_fast_count) % 500) == 0) VG_(mallocSanityCheckAll)(); 
+      */
+   }
+
+   if (VG_(sanity_level) > 1) {
+      /* Check sanity of the low-level memory manager.  Note that bugs
+         in the client's code can cause this to fail, so we don't do
+         this check unless specially asked for.  And because it's
+         potentially very expensive. */
+      VG_(mallocSanityCheckAll)();
+   }
+}
 /*--------------------------------------------------------------------*/
 /*--- end                                                vg_main.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_malloc2.c b/coregrind/vg_malloc2.c
index 87f580d..92358c1 100644
--- a/coregrind/vg_malloc2.c
+++ b/coregrind/vg_malloc2.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 
@@ -178,13 +178,14 @@
 /* It is important that this library is self-initialising, because it
    may get called very early on -- as a result of C++ static
    constructor initialisations -- before Valgrind itself is
-   initialised.  Hence vg_malloc() and vg_free() below always call
-   ensure_mm_init() to ensure things are correctly initialised.  */
+   initialised.  Hence VG_(arena_malloc)() and VG_(arena_free)() below always
+   call ensure_mm_init() to ensure things are correctly initialised.  */
 
 static
 void ensure_mm_init ( void )
 {
    static Bool init_done = False;
+
    if (init_done) return;
 
    /* Use a checked red zone size of 1 word for our internal stuff,
@@ -194,22 +195,28 @@
       which merely checks at the time of freeing that the red zone
       words are unchanged. */
 
-   arena_init ( &vg_arena[VG_AR_PRIVATE], "private ", 
+   arena_init ( &vg_arena[VG_AR_CORE],      "core    ", 
                 1, True, 262144 );
 
-   arena_init ( &vg_arena[VG_AR_SYMTAB],  "symtab  ", 
+   arena_init ( &vg_arena[VG_AR_SKIN],      "skin    ", 
                 1, True, 262144 );
 
-   arena_init ( &vg_arena[VG_AR_CLIENT],  "client  ",  
+   arena_init ( &vg_arena[VG_AR_SYMTAB],    "symtab  ", 
+                1, True, 262144 );
+
+   arena_init ( &vg_arena[VG_AR_JITTER],    "JITter  ", 
+                1, True, 8192 );
+
+   arena_init ( &vg_arena[VG_AR_CLIENT],    "client  ",  
                 VG_AR_CLIENT_REDZONE_SZW, False, 262144 );
 
-   arena_init ( &vg_arena[VG_AR_DEMANGLE], "demangle",  
+   arena_init ( &vg_arena[VG_AR_DEMANGLE],  "demangle",  
                 4 /*paranoid*/, True, 16384 );
 
-   arena_init ( &vg_arena[VG_AR_EXECTXT],  "exectxt ",  
+   arena_init ( &vg_arena[VG_AR_EXECTXT],   "exectxt ",  
                 1, True, 16384 );
 
-   arena_init ( &vg_arena[VG_AR_ERRCTXT],  "errctxt ",  
+   arena_init ( &vg_arena[VG_AR_ERRORS],    "errors  ",  
                 1, True, 16384 );
 
    arena_init ( &vg_arena[VG_AR_TRANSIENT], "transien",  
@@ -692,7 +699,7 @@
 
 
 /* Sanity check both the superblocks and the chains. */
-void VG_(mallocSanityCheckArena) ( ArenaId aid )
+static void mallocSanityCheckArena ( ArenaId aid )
 {
    Int         i, superblockctr, b_bszW, b_pszW, blockctr_sb, blockctr_li;
    Int         blockctr_sb_free, listno, list_min_pszW, list_max_pszW;
@@ -703,7 +710,7 @@
    UInt        arena_bytes_on_loan;
    Arena*      a;
 
-#  define BOMB VG_(panic)("vg_mallocSanityCheckArena")
+#  define BOMB VG_(panic)("mallocSanityCheckArena")
 
    a = arenaId_to_ArenaP(aid);
    
@@ -722,15 +729,15 @@
          b     = &sb->payload_words[i];
          b_bszW = get_bszW_lo(b);
          if (!blockSane(a, b)) {
-            VG_(printf)( "mallocSanityCheck: sb %p, block %d (bszW %d): "
-                         "BAD\n",
+            VG_(printf)("mallocSanityCheckArena: sb %p, block %d (bszW %d): "
+                        " BAD\n",
                          sb, i, b_bszW );
             BOMB;
          }
          thisFree = !is_inuse_bszW(b_bszW);
          if (thisFree && lastWasFree) {
-            VG_(printf)( "mallocSanityCheck: sb %p, block %d (bszW %d): "
-                         "UNMERGED FREES\n",
+            VG_(printf)("mallocSanityCheckArena: sb %p, block %d (bszW %d): "
+                        "UNMERGED FREES\n",
                          sb, i, b_bszW );
             BOMB;
          }
@@ -741,7 +748,7 @@
          i += mk_plain_bszW(b_bszW);
       }
       if (i > sb->n_payload_words) {
-         VG_(printf)( "mallocSanityCheck: sb %p: last block "
+         VG_(printf)( "mallocSanityCheckArena: sb %p: last block "
                       "overshoots end\n", sb);
          BOMB;
       }
@@ -750,7 +757,7 @@
 
    if (arena_bytes_on_loan != a->bytes_on_loan) {
             VG_(printf)( 
-                    "mallocSanityCheck: a->bytes_on_loan %d, "
+                    "mallocSanityCheckArena: a->bytes_on_loan %d, "
                     "arena_bytes_on_loan %d: "
                     "MISMATCH\n", a->bytes_on_loan, arena_bytes_on_loan);
       ppSuperblocks(a);
@@ -770,7 +777,7 @@
          b_prev = b;
          b = get_next_p(b);
          if (get_prev_p(b) != b_prev) {
-            VG_(printf)( "mallocSanityCheck: list %d at %p: "
+            VG_(printf)( "mallocSanityCheckArena: list %d at %p: "
                          "BAD LINKAGE\n", 
                          listno, b );
             BOMB;
@@ -778,7 +785,7 @@
          b_pszW = bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(b)));
          if (b_pszW < list_min_pszW || b_pszW > list_max_pszW) {
             VG_(printf)( 
-               "mallocSanityCheck: list %d at %p: "
+               "mallocSanityCheckArena: list %d at %p: "
                "WRONG CHAIN SIZE %d (%d, %d)\n", 
                listno, b, b_pszW, list_min_pszW, list_max_pszW );
             BOMB;
@@ -790,7 +797,7 @@
 
    if (blockctr_sb_free != blockctr_li) {
       VG_(printf)( 
-         "mallocSanityCheck: BLOCK COUNT MISMATCH "
+         "mallocSanityCheckArena: BLOCK COUNT MISMATCH "
          "(via sbs %d, via lists %d)\n",
          blockctr_sb_free, blockctr_li );
       ppSuperblocks(a);
@@ -813,7 +820,7 @@
 {
    Int i;
    for (i = 0; i < VG_N_ARENAS; i++)
-      VG_(mallocSanityCheckArena) ( i );
+      mallocSanityCheckArena ( i );
 }
 
 
@@ -828,6 +835,7 @@
    Superblock* sb;
    WordF*      b;
    Int         b_bszW;
+
    ensure_mm_init();
    a = arenaId_to_ArenaP(aid);
    for (sb = a->sblocks; sb != NULL; sb = sb->next) {
@@ -845,10 +853,10 @@
 
 
 /*------------------------------------------------------------*/
-/*--- Externally-visible functions.                        ---*/
+/*--- Core-visible functions.                              ---*/
 /*------------------------------------------------------------*/
 
-void* VG_(malloc) ( ArenaId aid, Int req_pszB )
+void* VG_(arena_malloc) ( ArenaId aid, Int req_pszB )
 {
    Int         req_pszW, req_bszW, frag_bszW, b_bszW, lno;
    Superblock* new_sb;
@@ -943,15 +951,15 @@
       a->bytes_on_loan_max = a->bytes_on_loan;
 
 #  ifdef DEBUG_MALLOC
-   VG_(mallocSanityCheckArena)(aid);
+   mallocSanityCheckArena(aid);
 #  endif
 
-   VGP_POPCC;
+   VGP_POPCC(VgpMalloc);
    return first_to_payload(a, b);
 }
 
  
-void VG_(free) ( ArenaId aid, void* ptr )
+void VG_(arena_free) ( ArenaId aid, void* ptr )
 {
    Superblock* sb;
    UInt*       sb_payl_firstw;
@@ -966,8 +974,11 @@
    ensure_mm_init();
    a = arenaId_to_ArenaP(aid);
 
-   if (ptr == NULL) return;
-
+   if (ptr == NULL) {
+      VGP_POPCC(VgpMalloc);
+      return;
+   }
+      
    ch = payload_to_first(a, ptr);
 
 #  ifdef DEBUG_MALLOC
@@ -1026,10 +1037,10 @@
    }
 
 #  ifdef DEBUG_MALLOC
-   VG_(mallocSanityCheckArena)(aid);
+   mallocSanityCheckArena(aid);
 #  endif
 
-   VGP_POPCC;
+   VGP_POPCC(VgpMalloc);
 }
 
 
@@ -1065,13 +1076,15 @@
    .    .               .   .   .               .   .
 
 */
-void* VG_(malloc_aligned) ( ArenaId aid, Int req_alignB, Int req_pszB )
+void* VG_(arena_malloc_aligned) ( ArenaId aid, Int req_alignB, Int req_pszB )
 {
    Int    req_alignW, req_pszW, base_pszW_req, base_pszW_act, frag_bszW;
    Word   *base_b, *base_p, *align_p;
    UInt   saved_bytes_on_loan;
    Arena* a;
 
+   VGP_PUSHCC(VgpMalloc);
+
    ensure_mm_init();
    a = arenaId_to_ArenaP(aid);
 
@@ -1091,7 +1104,7 @@
          break;
       default:
          VG_(printf)("vg_malloc_aligned(%p, %d, %d)\nbad alignment request", 
-                     a, req_pszB, req_alignB );
+                     a, req_alignB, req_pszB );
          VG_(panic)("vg_malloc_aligned");
          /*NOTREACHED*/
    }
@@ -1112,7 +1125,7 @@
    /* Payload ptr for the block we are going to split.  Note this
       changes a->bytes_on_loan; we save and restore it ourselves. */
    saved_bytes_on_loan = a->bytes_on_loan;
-   base_p = VG_(malloc) ( aid, base_pszW_req * VKI_BYTES_PER_WORD );
+   base_p = VG_(arena_malloc) ( aid, base_pszW_req * VKI_BYTES_PER_WORD );
    a->bytes_on_loan = saved_bytes_on_loan;
 
    /* Block ptr for the block we are going to split. */
@@ -1163,9 +1176,11 @@
       a->bytes_on_loan_max = a->bytes_on_loan;
 
 #  ifdef DEBUG_MALLOC
-   VG_(mallocSanityCheckArena)(aid);
+   mallocSanityCheckArena(aid);
 #  endif
 
+   VGP_POPCC(VgpMalloc);
+
    return align_p;
 }
 
@@ -1174,25 +1189,34 @@
 /*--- Services layered on top of malloc/free.              ---*/
 /*------------------------------------------------------------*/
 
-void* VG_(calloc) ( ArenaId aid, Int nmemb, Int nbytes )
+void* VG_(arena_calloc) ( ArenaId aid, Int nmemb, Int nbytes )
 {
    Int    i, size;
    UChar* p;
+
+   VGP_PUSHCC(VgpMalloc);
+
    size = nmemb * nbytes;
    vg_assert(size >= 0);
-   p = VG_(malloc) ( aid, size );
+   p = VG_(arena_malloc) ( aid, size );
    for (i = 0; i < size; i++) p[i] = 0;
+
+   VGP_POPCC(VgpMalloc);
+   
    return p;
 }
 
 
-void* VG_(realloc) ( ArenaId aid, void* ptr, Int req_pszB )
+void* VG_(arena_realloc) ( ArenaId aid, void* ptr, 
+                          Int req_alignB, Int req_pszB )
 {
    Arena* a;
    Int    old_bszW, old_pszW, old_pszB, i;
    UChar  *p_old, *p_new;
    UInt*  ch;
 
+   VGP_PUSHCC(VgpMalloc);
+
    ensure_mm_init();
    a = arenaId_to_ArenaP(aid);
 
@@ -1208,19 +1232,60 @@
    old_pszW = bszW_to_pszW(a, old_bszW);
    old_pszB = old_pszW * VKI_BYTES_PER_WORD;
 
-   if (req_pszB <= old_pszB) return ptr;
+   if (req_pszB <= old_pszB) {
+      VGP_POPCC(VgpMalloc);
+      return ptr;
+   }
 
-   p_new = VG_(malloc) ( aid, req_pszB );
+   if (req_alignB == 4)
+      p_new = VG_(arena_malloc) ( aid, req_pszB );
+   else
+      p_new = VG_(arena_malloc_aligned) ( aid, req_alignB, req_pszB );
+
    p_old = (UChar*)ptr;
    for (i = 0; i < old_pszB; i++)
       p_new[i] = p_old[i];
 
-   VG_(free)(aid, p_old);
+   VG_(arena_free)(aid, p_old);
+
+   VGP_POPCC(VgpMalloc);
    return p_new;
 }
 
 
 /*------------------------------------------------------------*/
+/*--- Skin-visible functions.                              ---*/
+/*------------------------------------------------------------*/
+
+/* All just wrappers to avoid exposing arenas to skins */
+
+void* VG_(malloc) ( Int nbytes )
+{
+   return VG_(arena_malloc) ( VG_AR_SKIN, nbytes );
+}
+
+void  VG_(free) ( void* ptr )
+{
+   VG_(arena_free) ( VG_AR_SKIN, ptr );
+}
+
+void* VG_(calloc) ( Int nmemb, Int nbytes )
+{
+   return VG_(arena_calloc) ( VG_AR_SKIN, nmemb, nbytes );
+}
+
+void* VG_(realloc) ( void* ptr, Int size )
+{
+   return VG_(arena_realloc) ( VG_AR_SKIN, ptr, /*alignment*/4, size );
+}
+
+void* VG_(malloc_aligned) ( Int req_alignB, Int req_pszB )
+{
+   return VG_(arena_malloc_aligned) ( VG_AR_SKIN, req_alignB, req_pszB );
+}
+
+
+/*------------------------------------------------------------*/
 /*--- The original test driver machinery.                  ---*/
 /*------------------------------------------------------------*/
 
@@ -1243,7 +1308,7 @@
 {
    Int i, j, k, nbytes, qq;
    unsigned char* chp;
-   Arena* a = &arena[VG_AR_PRIVATE];
+   Arena* a = &arena[VG_AR_CORE];
    srandom(1);
    for (i = 0; i < N_TEST_ARR; i++)
       test_arr[i] = NULL;
diff --git a/coregrind/vg_memory.c b/coregrind/vg_memory.c
index eea79cb..5ea4246 100644
--- a/coregrind/vg_memory.c
+++ b/coregrind/vg_memory.c
@@ -1,7 +1,7 @@
 
 /*--------------------------------------------------------------------*/
-/*--- Maintain bitmaps of memory, tracking the accessibility (A)   ---*/
-/*--- and validity (V) status of each byte.                        ---*/
+/*--- Memory-related stuff: segment initialisation and tracking,   ---*/
+/*--- stack operations                                             ---*/
 /*---                                                  vg_memory.c ---*/
 /*--------------------------------------------------------------------*/
 
@@ -27,1275 +27,208 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
 
-/* Define to debug the mem audit system. */
-/* #define VG_DEBUG_MEMORY */
 
-/* Define to debug the memory-leak-detector. */
-/* #define VG_DEBUG_LEAKCHECK */
+/*--------------------------------------------------------------*/
+/*--- Initialise program data/text etc on program startup.   ---*/
+/*--------------------------------------------------------------*/
 
-/* Define to collect detailed performance info. */
-/* #define VG_PROFILE_MEMORY */
-
-
-/*------------------------------------------------------------*/
-/*--- Low-level support for memory checking.               ---*/
-/*------------------------------------------------------------*/
-
-/* 
-   All reads and writes are checked against a memory map, which
-   records the state of all memory in the process.  The memory map is
-   organised like this:
-
-   The top 16 bits of an address are used to index into a top-level
-   map table, containing 65536 entries.  Each entry is a pointer to a
-   second-level map, which records the accesibililty and validity
-   permissions for the 65536 bytes indexed by the lower 16 bits of the
-   address.  Each byte is represented by nine bits, one indicating
-   accessibility, the other eight validity.  So each second-level map
-   contains 73728 bytes.  This two-level arrangement conveniently
-   divides the 4G address space into 64k lumps, each size 64k bytes.
-
-   All entries in the primary (top-level) map must point to a valid
-   secondary (second-level) map.  Since most of the 4G of address
-   space will not be in use -- ie, not mapped at all -- there is a
-   distinguished secondary map, which indicates `not addressible and
-   not valid' writeable for all bytes.  Entries in the primary map for
-   which the entire 64k is not in use at all point at this
-   distinguished map.
-
-   [...] lots of stuff deleted due to out of date-ness
-
-   As a final optimisation, the alignment and address checks for
-   4-byte loads and stores are combined in a neat way.  The primary
-   map is extended to have 262144 entries (2^18), rather than 2^16.
-   The top 3/4 of these entries are permanently set to the
-   distinguished secondary map.  For a 4-byte load/store, the
-   top-level map is indexed not with (addr >> 16) but instead f(addr),
-   where
-
-    f( XXXX XXXX XXXX XXXX ____ ____ ____ __YZ )
-        = ____ ____ ____ __YZ XXXX XXXX XXXX XXXX  or 
-        = ____ ____ ____ __ZY XXXX XXXX XXXX XXXX
-
-   ie the lowest two bits are placed above the 16 high address bits.
-   If either of these two bits are nonzero, the address is misaligned;
-   this will select a secondary map from the upper 3/4 of the primary
-   map.  Because this is always the distinguished secondary map, a
-   (bogus) address check failure will result.  The failure handling
-   code can then figure out whether this is a genuine addr check
-   failure or whether it is a possibly-legitimate access at a
-   misaligned address.  
-*/
-
-
-/*------------------------------------------------------------*/
-/*--- Crude profiling machinery.                           ---*/
-/*------------------------------------------------------------*/
-
-#ifdef VG_PROFILE_MEMORY
-
-#define N_PROF_EVENTS 150
-
-static UInt event_ctr[N_PROF_EVENTS];
-
-static void init_prof_mem ( void )
-{
-   Int i;
-   for (i = 0; i < N_PROF_EVENTS; i++)
-      event_ctr[i] = 0;
-}
-
-void VG_(done_prof_mem) ( void )
-{
-   Int i;
-   for (i = 0; i < N_PROF_EVENTS; i++) {
-      if ((i % 10) == 0) 
-         VG_(printf)("\n");
-      if (event_ctr[i] > 0)
-         VG_(printf)( "prof mem event %2d: %d\n", i, event_ctr[i] );
+typedef
+   struct _ExeSeg {
+      Addr start;
+      UInt size;
+      struct _ExeSeg* next;
    }
-   VG_(printf)("\n");
-}
+   ExeSeg;
 
-#define PROF_EVENT(ev)                                  \
-   do { vg_assert((ev) >= 0 && (ev) < N_PROF_EVENTS);   \
-        event_ctr[ev]++;                                \
-   } while (False);
+/* The list of current executable segments loaded.  Required so that when a
+   segment is munmap'd, if it's executable we can recognise it as such and
+   invalidate translations for it, and drop any basic-block specific
+   information being stored.  If symbols are being used, this list will have
+   the same segments recorded in it as the SegInfo symbols list (but much
+   less information about each segment).
+*/
+static ExeSeg* exeSegsHead = NULL;
 
-#else
-
-static void init_prof_mem ( void ) { }
-       void VG_(done_prof_mem) ( void ) { }
-
-#define PROF_EVENT(ev) /* */
-
-#endif
-
-/* Event index.  If just the name of the fn is given, this means the
-   number of calls to the fn.  Otherwise it is the specified event.
-
-   10   alloc_secondary_map
-
-   20   get_abit
-   21   get_vbyte
-   22   set_abit
-   23   set_vbyte
-   24   get_abits4_ALIGNED
-   25   get_vbytes4_ALIGNED
-
-   30   set_address_range_perms
-   31   set_address_range_perms(lower byte loop)
-   32   set_address_range_perms(quadword loop)
-   33   set_address_range_perms(upper byte loop)
+/* Prepend it -- mmaps/munmaps likely to follow a stack pattern(?) so this
+   is good.
+   Also check no segments overlap, which would be very bad.  Check is linear
+   for each seg added (quadratic overall) but the total number should be
+   small (konqueror has around 50 --njn). */
+static void add_exe_segment_to_list( a, len ) 
+{
+   Addr lo = a;
+   Addr hi = a + len - 1;
+   ExeSeg* es;
+   ExeSeg* es2;
    
-   35   make_noaccess
-   36   make_writable
-   37   make_readable
+   /* Prepend it */
+   es        = (ExeSeg*)VG_(arena_malloc)(VG_AR_CORE, sizeof(ExeSeg));
+   es->start = a;
+   es->size  = len;
+   es->next  = exeSegsHead;
+   exeSegsHead = es;
 
-   40   copy_address_range_perms
-   41   copy_address_range_perms(byte loop)
-   42   check_writable
-   43   check_writable(byte loop)
-   44   check_readable
-   45   check_readable(byte loop)
-   46   check_readable_asciiz
-   47   check_readable_asciiz(byte loop)
-
-   50   make_aligned_word_NOACCESS
-   51   make_aligned_word_WRITABLE
-
-   60   helperc_LOADV4
-   61   helperc_STOREV4
-   62   helperc_LOADV2
-   63   helperc_STOREV2
-   64   helperc_LOADV1
-   65   helperc_STOREV1
-
-   70   rim_rd_V4_SLOWLY
-   71   rim_wr_V4_SLOWLY
-   72   rim_rd_V2_SLOWLY
-   73   rim_wr_V2_SLOWLY
-   74   rim_rd_V1_SLOWLY
-   75   rim_wr_V1_SLOWLY
-
-   80   fpu_read
-   81   fpu_read aligned 4
-   82   fpu_read aligned 8
-   83   fpu_read 2
-   84   fpu_read 10
-
-   85   fpu_write
-   86   fpu_write aligned 4
-   87   fpu_write aligned 8
-   88   fpu_write 2
-   89   fpu_write 10
-
-   90   fpu_read_check_SLOWLY
-   91   fpu_read_check_SLOWLY(byte loop)
-   92   fpu_write_check_SLOWLY
-   93   fpu_write_check_SLOWLY(byte loop)
-
-   100  is_plausible_stack_addr
-   101  handle_esp_assignment
-   102  handle_esp_assignment(-4)
-   103  handle_esp_assignment(+4)
-   104  handle_esp_assignment(-12)
-   105  handle_esp_assignment(-8)
-   106  handle_esp_assignment(+16)
-   107  handle_esp_assignment(+12)
-   108  handle_esp_assignment(0)
-   109  handle_esp_assignment(+8)
-   110  handle_esp_assignment(-16)
-   111  handle_esp_assignment(+20)
-   112  handle_esp_assignment(-20)
-   113  handle_esp_assignment(+24)
-   114  handle_esp_assignment(-24)
-
-   120  vg_handle_esp_assignment_SLOWLY
-   121  vg_handle_esp_assignment_SLOWLY(normal; move down)
-   122  vg_handle_esp_assignment_SLOWLY(normal; move up)
-   123  vg_handle_esp_assignment_SLOWLY(normal)
-   124  vg_handle_esp_assignment_SLOWLY(>= HUGE_DELTA)
-*/
-
-/*------------------------------------------------------------*/
-/*--- Function declarations.                               ---*/
-/*------------------------------------------------------------*/
-
-/* Set permissions for an address range.  Not speed-critical. */
-void VGM_(make_noaccess) ( Addr a, UInt len );
-void VGM_(make_writable) ( Addr a, UInt len );
-void VGM_(make_readable) ( Addr a, UInt len );
-
-/* Check permissions for an address range.  Not speed-critical. */
-Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr );
-Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr );
-Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr );
-
-static UInt vgm_rd_V4_SLOWLY ( Addr a );
-static UInt vgm_rd_V2_SLOWLY ( Addr a );
-static UInt vgm_rd_V1_SLOWLY ( Addr a );
-static void vgm_wr_V4_SLOWLY ( Addr a, UInt vbytes );
-static void vgm_wr_V2_SLOWLY ( Addr a, UInt vbytes );
-static void vgm_wr_V1_SLOWLY ( Addr a, UInt vbytes );
-static void fpu_read_check_SLOWLY ( Addr addr, Int size );
-static void fpu_write_check_SLOWLY ( Addr addr, Int size );
-
-
-/*------------------------------------------------------------*/
-/*--- Data defns.                                          ---*/
-/*------------------------------------------------------------*/
-
-typedef 
-   struct {
-      UChar abits[8192];
-      UChar vbyte[65536];
-   }
-   SecMap;
-
-/* These two are statically allocated.  Should they be non-public? */
-SecMap* VG_(primary_map)[ /*65536*/ 262144 ];
-static SecMap  vg_distinguished_secondary_map;
-
-#define IS_DISTINGUISHED_SM(smap) \
-   ((smap) == &vg_distinguished_secondary_map)
-
-#define ENSURE_MAPPABLE(addr,caller)                                   \
-   do {                                                                \
-      if (IS_DISTINGUISHED_SM(VG_(primary_map)[(addr) >> 16])) {       \
-         VG_(primary_map)[(addr) >> 16] = alloc_secondary_map(caller); \
-         /* VG_(printf)("new 2map because of %p\n", addr);   */       \
-      }                                                                \
-   } while(0)
-
-#define BITARR_SET(aaa_p,iii_p)                         \
-   do {                                                 \
-      UInt   iii = (UInt)iii_p;                         \
-      UChar* aaa = (UChar*)aaa_p;                       \
-      aaa[iii >> 3] |= (1 << (iii & 7));                \
-   } while (0)
-
-#define BITARR_CLEAR(aaa_p,iii_p)                       \
-   do {                                                 \
-      UInt   iii = (UInt)iii_p;                         \
-      UChar* aaa = (UChar*)aaa_p;                       \
-      aaa[iii >> 3] &= ~(1 << (iii & 7));               \
-   } while (0)
-
-#define BITARR_TEST(aaa_p,iii_p)                        \
-      (0 != (((UChar*)aaa_p)[ ((UInt)iii_p) >> 3 ]      \
-               & (1 << (((UInt)iii_p) & 7))))           \
-
-
-#define VGM_BIT_VALID      0
-#define VGM_BIT_INVALID    1
-
-#define VGM_NIBBLE_VALID   0
-#define VGM_NIBBLE_INVALID 0xF
-
-#define VGM_BYTE_VALID     0
-#define VGM_BYTE_INVALID   0xFF
-
-/* Now in vg_include.h.
-#define VGM_WORD_VALID     0
-#define VGM_WORD_INVALID   0xFFFFFFFF
-*/
-
-#define VGM_EFLAGS_VALID   0xFFFFFFFE
-#define VGM_EFLAGS_INVALID 0xFFFFFFFF
-
-
-#define IS_ALIGNED4_ADDR(aaa_p) (0 == (((UInt)(aaa_p)) & 3))
-
-
-/*------------------------------------------------------------*/
-/*--- Basic bitmap management, reading and writing.        ---*/
-/*------------------------------------------------------------*/
-
-/* Allocate and initialise a secondary map. */
-
-static SecMap* alloc_secondary_map ( __attribute__ ((unused)) 
-                                     Char* caller )
-{
-   SecMap* map;
-   UInt  i;
-   PROF_EVENT(10);
-
-   /* Mark all bytes as invalid access and invalid value. */
-
-   /* It just happens that a SecMap occupies exactly 18 pages --
-      although this isn't important, so the following assert is
-      spurious. */
-   vg_assert(0 == (sizeof(SecMap) % VKI_BYTES_PER_PAGE));
-   map = VG_(get_memory_from_mmap)( sizeof(SecMap), caller );
-
-   for (i = 0; i < 8192; i++)
-      map->abits[i] = VGM_BYTE_INVALID; /* Invalid address */
-   for (i = 0; i < 65536; i++)
-      map->vbyte[i] = VGM_BYTE_INVALID; /* Invalid Value */
-
-   /* VG_(printf)("ALLOC_2MAP(%s)\n", caller ); */
-   return map;
-}
-
-
-/* Basic reading/writing of the bitmaps, for byte-sized accesses. */
-
-static __inline__ UChar get_abit ( Addr a )
-{
-   SecMap* sm     = VG_(primary_map)[a >> 16];
-   UInt    sm_off = a & 0xFFFF;
-   PROF_EVENT(20);
-   return BITARR_TEST(sm->abits, sm_off) 
-             ? VGM_BIT_INVALID : VGM_BIT_VALID;
-}
-
-static __inline__ UChar get_vbyte ( Addr a )
-{
-   SecMap* sm     = VG_(primary_map)[a >> 16];
-   UInt    sm_off = a & 0xFFFF;
-   PROF_EVENT(21);
-   return sm->vbyte[sm_off];
-}
-
-static __inline__ void set_abit ( Addr a, UChar abit )
-{
-   SecMap* sm;
-   UInt    sm_off;
-   PROF_EVENT(22);
-   ENSURE_MAPPABLE(a, "set_abit");
-   sm     = VG_(primary_map)[a >> 16];
-   sm_off = a & 0xFFFF;
-   if (abit) 
-      BITARR_SET(sm->abits, sm_off);
-   else
-      BITARR_CLEAR(sm->abits, sm_off);
-}
-
-static __inline__ void set_vbyte ( Addr a, UChar vbyte )
-{
-   SecMap* sm;
-   UInt    sm_off;
-   PROF_EVENT(23);
-   ENSURE_MAPPABLE(a, "set_vbyte");
-   sm     = VG_(primary_map)[a >> 16];
-   sm_off = a & 0xFFFF;
-   sm->vbyte[sm_off] = vbyte;
-}
-
-
-/* Reading/writing of the bitmaps, for aligned word-sized accesses. */
-
-static __inline__ UChar get_abits4_ALIGNED ( Addr a )
-{
-   SecMap* sm;
-   UInt    sm_off;
-   UChar   abits8;
-   PROF_EVENT(24);
-#  ifdef VG_DEBUG_MEMORY
-   vg_assert(IS_ALIGNED4_ADDR(a));
-#  endif
-   sm     = VG_(primary_map)[a >> 16];
-   sm_off = a & 0xFFFF;
-   abits8 = sm->abits[sm_off >> 3];
-   abits8 >>= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
-   abits8 &= 0x0F;
-   return abits8;
-}
-
-static UInt __inline__ get_vbytes4_ALIGNED ( Addr a )
-{
-   SecMap* sm     = VG_(primary_map)[a >> 16];
-   UInt    sm_off = a & 0xFFFF;
-   PROF_EVENT(25);
-#  ifdef VG_DEBUG_MEMORY
-   vg_assert(IS_ALIGNED4_ADDR(a));
-#  endif
-   return ((UInt*)(sm->vbyte))[sm_off >> 2];
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Setting permissions over address ranges.             ---*/
-/*------------------------------------------------------------*/
-
-static void set_address_range_perms ( Addr a, UInt len, 
-                                      UInt example_a_bit,
-                                      UInt example_v_bit )
-{
-   UChar   vbyte, abyte8;
-   UInt    vword4, sm_off;
-   SecMap* sm;
-
-   PROF_EVENT(30);
-
-   if (len == 0)
-      return;
-
-   if (len > 100 * 1000 * 1000) 
-      VG_(message)(Vg_UserMsg, 
-                   "Warning: set address range perms: "
-                   "large range %d, a %d, v %d",
-                   len, example_a_bit, example_v_bit );
-
-   VGP_PUSHCC(VgpSARP);
-
-   /* Requests to change permissions of huge address ranges may
-      indicate bugs in our machinery.  30,000,000 is arbitrary, but so
-      far all legitimate requests have fallen beneath that size. */
-   /* 4 Mar 02: this is just stupid; get rid of it. */
-   /* vg_assert(len < 30000000); */
-
-   /* Check the permissions make sense. */
-   vg_assert(example_a_bit == VGM_BIT_VALID 
-             || example_a_bit == VGM_BIT_INVALID);
-   vg_assert(example_v_bit == VGM_BIT_VALID 
-             || example_v_bit == VGM_BIT_INVALID);
-   if (example_a_bit == VGM_BIT_INVALID)
-      vg_assert(example_v_bit == VGM_BIT_INVALID);
-
-   /* The validity bits to write. */
-   vbyte = example_v_bit==VGM_BIT_VALID 
-              ? VGM_BYTE_VALID : VGM_BYTE_INVALID;
-
-   /* In order that we can charge through the address space at 8
-      bytes/main-loop iteration, make up some perms. */
-   abyte8 = (example_a_bit << 7)
-            | (example_a_bit << 6)
-            | (example_a_bit << 5)
-            | (example_a_bit << 4)
-            | (example_a_bit << 3)
-            | (example_a_bit << 2)
-            | (example_a_bit << 1)
-            | (example_a_bit << 0);
-   vword4 = (vbyte << 24) | (vbyte << 16) | (vbyte << 8) | vbyte;
-
-#  ifdef VG_DEBUG_MEMORY
-   /* Do it ... */
-   while (True) {
-      PROF_EVENT(31);
-      if (len == 0) break;
-      set_abit ( a, example_a_bit );
-      set_vbyte ( a, vbyte );
-      a++;
-      len--;
-   }
-
-#  else
-   /* Slowly do parts preceding 8-byte alignment. */
-   while (True) {
-      PROF_EVENT(31);
-      if (len == 0) break;
-      if ((a % 8) == 0) break;
-      set_abit ( a, example_a_bit );
-      set_vbyte ( a, vbyte );
-      a++;
-      len--;
-   }   
-
-   if (len == 0) {
-      VGP_POPCC;
-      return;
-   }
-   vg_assert((a % 8) == 0 && len > 0);
-
-   /* Once aligned, go fast. */
-   while (True) {
-      PROF_EVENT(32);
-      if (len < 8) break;
-      ENSURE_MAPPABLE(a, "set_address_range_perms(fast)");
-      sm = VG_(primary_map)[a >> 16];
-      sm_off = a & 0xFFFF;
-      sm->abits[sm_off >> 3] = abyte8;
-      ((UInt*)(sm->vbyte))[(sm_off >> 2) + 0] = vword4;
-      ((UInt*)(sm->vbyte))[(sm_off >> 2) + 1] = vword4;
-      a += 8;
-      len -= 8;
-   }
-
-   if (len == 0) {
-      VGP_POPCC;
-      return;
-   }
-   vg_assert((a % 8) == 0 && len > 0 && len < 8);
-
-   /* Finish the upper fragment. */
-   while (True) {
-      PROF_EVENT(33);
-      if (len == 0) break;
-      set_abit ( a, example_a_bit );
-      set_vbyte ( a, vbyte );
-      a++;
-      len--;
-   }   
-#  endif
-
-   /* Check that zero page and highest page have not been written to
-      -- this could happen with buggy syscall wrappers.  Today
-      (2001-04-26) had precisely such a problem with
-      __NR_setitimer. */
-   vg_assert(VG_(first_and_last_secondaries_look_plausible)());
-   VGP_POPCC;
-}
-
-
-/* Set permissions for address ranges ... */
-
-void VGM_(make_noaccess) ( Addr a, UInt len )
-{
-   PROF_EVENT(35);
-   set_address_range_perms ( a, len, VGM_BIT_INVALID, VGM_BIT_INVALID );
-}
-
-void VGM_(make_writable) ( Addr a, UInt len )
-{
-   PROF_EVENT(36);
-   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_INVALID );
-}
-
-void VGM_(make_readable) ( Addr a, UInt len )
-{
-   PROF_EVENT(37);
-   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID );
-}
-
-void VGM_(make_readwritable) ( Addr a, UInt len )
-{
-   PROF_EVENT(38);
-   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID );
-}
-
-/* Block-copy permissions (needed for implementing realloc()). */
-
-void VGM_(copy_address_range_perms) ( Addr src, Addr dst, UInt len )
-{
-   UInt i;
-   PROF_EVENT(40);
-   for (i = 0; i < len; i++) {
-      UChar abit  = get_abit ( src+i );
-      UChar vbyte = get_vbyte ( src+i );
-      PROF_EVENT(41);
-      set_abit ( dst+i, abit );
-      set_vbyte ( dst+i, vbyte );
-   }
-}
-
-
-/* Check permissions for address range.  If inadequate permissions
-   exist, *bad_addr is set to the offending address, so the caller can
-   know what it is. */
-
-Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr )
-{
-   UInt  i;
-   UChar abit;
-   PROF_EVENT(42);
-   for (i = 0; i < len; i++) {
-      PROF_EVENT(43);
-      abit = get_abit(a);
-      if (abit == VGM_BIT_INVALID) {
-         if (bad_addr != NULL) *bad_addr = a;
-         return False;
+   /* Check there's no overlap with the rest of the list */
+   for (es2 = es->next; es2 != NULL; es2 = es2->next) {
+      Addr lo2 = es2->start;
+      Addr hi2 = es2->start + es2->size - 1;
+      Bool overlap;
+      vg_assert(lo < hi);
+      vg_assert(lo2 < hi2);
+      /* the main assertion */
+      overlap = (lo <= lo2 && lo2 <= hi)
+                 || (lo <= hi2 && hi2 <= hi);
+      if (overlap) {
+         VG_(printf)("\n\nOVERLAPPING EXE SEGMENTS\n"
+                     "  new: start %p, size %d\n"
+                     "  old: start %p, size %d\n\n",
+                     es->start, es->size, es2->start, es2->size );
+         vg_assert(! overlap);
       }
-      a++;
    }
+}
+
+static Bool remove_if_exe_segment_from_list( Addr a, UInt len )
+{
+   ExeSeg **prev_next_ptr = & exeSegsHead, 
+          *curr = exeSegsHead;
+
+   while (True) {
+      if (curr == NULL) break;
+      if (a == curr->start) break;
+      prev_next_ptr = &curr->next;
+      curr = curr->next;
+   }
+   if (curr == NULL)
+      return False;
+
+   vg_assert(*prev_next_ptr == curr);
+
+   *prev_next_ptr = curr->next;
+
+   VG_(arena_free)(VG_AR_CORE, curr);
    return True;
 }
 
-Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr )
+/* Records the exe segment in the ExeSeg list (checking for overlaps), and
+   reads debug info if required.  Note the entire /proc/pid/maps file is 
+   read for the debug info, but it just reads symbols for newly added exe
+   segments.  This is required to find out their names if they have one.  So
+   we don't use this at startup because it's overkill and can screw reading
+   of /proc/pid/maps.
+ */
+void VG_(new_exe_segment) ( Addr a, UInt len )
 {
-   UInt  i;
-   UChar abit;
-   UChar vbyte;
-   PROF_EVENT(44);
-   for (i = 0; i < len; i++) {
-      abit  = get_abit(a);
-      vbyte = get_vbyte(a);
-      PROF_EVENT(45);
-      if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) {
-         if (bad_addr != NULL) *bad_addr = a;
-         return False;
-      }
-      a++;
-   }
-   return True;
+   // SSS: only bother if size != 0?  Does that happen? (probably can)
+
+   add_exe_segment_to_list( a, len );
+   VG_(maybe_read_symbols)();
 }
 
-
-/* Check a zero-terminated ascii string.  Tricky -- don't want to
-   examine the actual bytes, to find the end, until we're sure it is
-   safe to do so. */
-
-Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr )
+/* Invalidate translations as necessary (also discarding any basic
+   block-specific info retained by the skin) and unload any debug
+   symbols. */
+// Nb: remove_if_exe_segment_from_list() and VG_(maybe_unload_symbols)()
+// both ignore 'len', but that seems that's ok for most programs...  see
+// comment above vg_syscalls.c:mmap_segment() et al for more details.
+void VG_(remove_if_exe_segment) ( Addr a, UInt len )
 {
-   UChar abit;
-   UChar vbyte;
-   PROF_EVENT(46);
-   while (True) {
-      PROF_EVENT(47);
-      abit  = get_abit(a);
-      vbyte = get_vbyte(a);
-      if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) {
-         if (bad_addr != NULL) *bad_addr = a;
-         return False;
-      }
-      /* Ok, a is safe to read. */
-      if (* ((UChar*)a) == 0) return True;
-      a++;
+   if (remove_if_exe_segment_from_list( a, len )) {
+      VG_(invalidate_translations) ( a, len );
+      VG_(maybe_unload_symbols)    ( a, len );
    }
 }
 
 
-/* Setting permissions for aligned words.  This supports fast stack
-   operations. */
-
-static __inline__ void make_aligned_word_NOACCESS ( Addr a )
+static
+void startup_segment_callback ( Addr start, UInt size, 
+                                Char rr, Char ww, Char xx, 
+                                UInt foffset, UChar* filename )
 {
-   SecMap* sm;
-   UInt    sm_off;
-   UChar   mask;
-   PROF_EVENT(50);
-#  ifdef VG_DEBUG_MEMORY
-   vg_assert(IS_ALIGNED4_ADDR(a));
-#  endif
-   ENSURE_MAPPABLE(a, "make_aligned_word_NOACCESS");
-   sm     = VG_(primary_map)[a >> 16];
-   sm_off = a & 0xFFFF;
-   ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID;
-   mask = 0x0F;
-   mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
-   /* mask now contains 1s where we wish to make address bits
-      invalid (1s). */
-   sm->abits[sm_off >> 3] |= mask;
-}
+   UInt r_esp;
+   Bool is_stack_segment;
 
-static __inline__ void make_aligned_word_WRITABLE ( Addr a )
-{
-   SecMap* sm;
-   UInt    sm_off;
-   UChar   mask;
-   PROF_EVENT(51);
-#  ifdef VG_DEBUG_MEMORY
-   vg_assert(IS_ALIGNED4_ADDR(a));
-#  endif
-   ENSURE_MAPPABLE(a, "make_aligned_word_WRITABLE");
-   sm     = VG_(primary_map)[a >> 16];
-   sm_off = a & 0xFFFF;
-   ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID;
-   mask = 0x0F;
-   mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
-   /* mask now contains 1s where we wish to make address bits
-      invalid (0s). */
-   sm->abits[sm_off >> 3] &= ~mask;
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Functions called directly from generated code.       ---*/
-/*------------------------------------------------------------*/
-
-static __inline__ UInt rotateRight16 ( UInt x )
-{
-   /* Amazingly, gcc turns this into a single rotate insn. */
-   return (x >> 16) | (x << 16);
-}
-
-
-static __inline__ UInt shiftRight16 ( UInt x )
-{
-   return x >> 16;
-}
-
-
-/* Read/write 1/2/4 sized V bytes, and emit an address error if
-   needed. */
-
-/* VG_(helperc_{LD,ST}V{1,2,4}) handle the common case fast.
-   Under all other circumstances, it defers to the relevant _SLOWLY
-   function, which can handle all situations.
-*/
-UInt VG_(helperc_LOADV4) ( Addr a )
-{
-#  ifdef VG_DEBUG_MEMORY
-   return vgm_rd_V4_SLOWLY(a);
-#  else
-   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   UChar   abits  = sm->abits[a_off];
-   abits >>= (a & 4);
-   abits &= 15;
-   PROF_EVENT(60);
-   if (abits == VGM_NIBBLE_VALID) {
-      /* Handle common case quickly: a is suitably aligned, is mapped,
-         and is addressible. */
-      UInt v_off = a & 0xFFFF;
-      return ((UInt*)(sm->vbyte))[ v_off >> 2 ];
-   } else {
-      /* Slow but general case. */
-      return vgm_rd_V4_SLOWLY(a);
-   }
-#  endif
-}
-
-void VG_(helperc_STOREV4) ( Addr a, UInt vbytes )
-{
-#  ifdef VG_DEBUG_MEMORY
-   vgm_wr_V4_SLOWLY(a, vbytes);
-#  else
-   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   UChar   abits  = sm->abits[a_off];
-   abits >>= (a & 4);
-   abits &= 15;
-   PROF_EVENT(61);
-   if (abits == VGM_NIBBLE_VALID) {
-      /* Handle common case quickly: a is suitably aligned, is mapped,
-         and is addressible. */
-      UInt v_off = a & 0xFFFF;
-      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = vbytes;
-   } else {
-      /* Slow but general case. */
-      vgm_wr_V4_SLOWLY(a, vbytes);
-   }
-#  endif
-}
-
-UInt VG_(helperc_LOADV2) ( Addr a )
-{
-#  ifdef VG_DEBUG_MEMORY
-   return vgm_rd_V2_SLOWLY(a);
-#  else
-   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   PROF_EVENT(62);
-   if (sm->abits[a_off] == VGM_BYTE_VALID) {
-      /* Handle common case quickly. */
-      UInt v_off = a & 0xFFFF;
-      return 0xFFFF0000 
-             |  
-             (UInt)( ((UShort*)(sm->vbyte))[ v_off >> 1 ] );
-   } else {
-      /* Slow but general case. */
-      return vgm_rd_V2_SLOWLY(a);
-   }
-#  endif
-}
-
-void VG_(helperc_STOREV2) ( Addr a, UInt vbytes )
-{
-#  ifdef VG_DEBUG_MEMORY
-   vgm_wr_V2_SLOWLY(a, vbytes);
-#  else
-   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   PROF_EVENT(63);
-   if (sm->abits[a_off] == VGM_BYTE_VALID) {
-      /* Handle common case quickly. */
-      UInt v_off = a & 0xFFFF;
-      ((UShort*)(sm->vbyte))[ v_off >> 1 ] = vbytes & 0x0000FFFF;
-   } else {
-      /* Slow but general case. */
-      vgm_wr_V2_SLOWLY(a, vbytes);
-   }
-#  endif
-}
-
-UInt VG_(helperc_LOADV1) ( Addr a )
-{
-#  ifdef VG_DEBUG_MEMORY
-   return vgm_rd_V1_SLOWLY(a);
-#  else
-   UInt    sec_no = shiftRight16(a);
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   PROF_EVENT(64);
-   if (sm->abits[a_off] == VGM_BYTE_VALID) {
-      /* Handle common case quickly. */
-      UInt v_off = a & 0xFFFF;
-      return 0xFFFFFF00
-             |
-             (UInt)( ((UChar*)(sm->vbyte))[ v_off ] );
-   } else {
-      /* Slow but general case. */
-      return vgm_rd_V1_SLOWLY(a);
-   }
-#  endif
-}
-
-void VG_(helperc_STOREV1) ( Addr a, UInt vbytes )
-{
-#  ifdef VG_DEBUG_MEMORY
-   vgm_wr_V1_SLOWLY(a, vbytes);
-#  else
-   UInt    sec_no = shiftRight16(a);
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   PROF_EVENT(65);
-   if (sm->abits[a_off] == VGM_BYTE_VALID) {
-      /* Handle common case quickly. */
-      UInt v_off = a & 0xFFFF;
-      ((UChar*)(sm->vbyte))[ v_off ] = vbytes & 0x000000FF;
-   } else {
-      /* Slow but general case. */
-      vgm_wr_V1_SLOWLY(a, vbytes);
-   }
-#  endif
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Fallback functions to handle cases that the above    ---*/
-/*--- VG_(helperc_{LD,ST}V{1,2,4}) can't manage.           ---*/
-/*------------------------------------------------------------*/
-
-static UInt vgm_rd_V4_SLOWLY ( Addr a )
-{
-   Bool a0ok, a1ok, a2ok, a3ok;
-   UInt vb0, vb1, vb2, vb3;
-
-   PROF_EVENT(70);
-
-   /* First establish independently the addressibility of the 4 bytes
-      involved. */
-   a0ok = get_abit(a+0) == VGM_BIT_VALID;
-   a1ok = get_abit(a+1) == VGM_BIT_VALID;
-   a2ok = get_abit(a+2) == VGM_BIT_VALID;
-   a3ok = get_abit(a+3) == VGM_BIT_VALID;
-
-   /* Also get the validity bytes for the address. */
-   vb0 = (UInt)get_vbyte(a+0);
-   vb1 = (UInt)get_vbyte(a+1);
-   vb2 = (UInt)get_vbyte(a+2);
-   vb3 = (UInt)get_vbyte(a+3);
-
-   /* Now distinguish 3 cases */
-
-   /* Case 1: the address is completely valid, so:
-      - no addressing error
-      - return V bytes as read from memory
-   */
-   if (a0ok && a1ok && a2ok && a3ok) {
-      UInt vw = VGM_WORD_INVALID;
-      vw <<= 8; vw |= vb3;
-      vw <<= 8; vw |= vb2;
-      vw <<= 8; vw |= vb1;
-      vw <<= 8; vw |= vb0;
-      return vw;
-   }
-
-   /* Case 2: the address is completely invalid.  
-      - emit addressing error
-      - return V word indicating validity.  
-      This sounds strange, but if we make loads from invalid addresses 
-      give invalid data, we also risk producing a number of confusing
-      undefined-value errors later, which confuses the fact that the
-      error arose in the first place from an invalid address. 
-   */
-   /* VG_(printf)("%p (%d %d %d %d)\n", a, a0ok, a1ok, a2ok, a3ok); */
-   if (!VG_(clo_partial_loads_ok) 
-       || ((a & 3) != 0)
-       || (!a0ok && !a1ok && !a2ok && !a3ok)) {
-      VG_(record_address_error)( a, 4, False );
-      return (VGM_BYTE_VALID << 24) | (VGM_BYTE_VALID << 16) 
-             | (VGM_BYTE_VALID << 8) | VGM_BYTE_VALID;
-   }
-
-   /* Case 3: the address is partially valid.  
-      - no addressing error
-      - returned V word is invalid where the address is invalid, 
-        and contains V bytes from memory otherwise. 
-      Case 3 is only allowed if VG_(clo_partial_loads_ok) is True
-      (which is the default), and the address is 4-aligned.  
-      If not, Case 2 will have applied.
-   */
-   vg_assert(VG_(clo_partial_loads_ok));
-   {
-      UInt vw = VGM_WORD_INVALID;
-      vw <<= 8; vw |= (a3ok ? vb3 : VGM_BYTE_INVALID);
-      vw <<= 8; vw |= (a2ok ? vb2 : VGM_BYTE_INVALID);
-      vw <<= 8; vw |= (a1ok ? vb1 : VGM_BYTE_INVALID);
-      vw <<= 8; vw |= (a0ok ? vb0 : VGM_BYTE_INVALID);
-      return vw;
-   }
-}
-
-static void vgm_wr_V4_SLOWLY ( Addr a, UInt vbytes )
-{
-   /* Check the address for validity. */
-   Bool aerr = False;
-   PROF_EVENT(71);
-
-   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
-   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
-   if (get_abit(a+2) != VGM_BIT_VALID) aerr = True;
-   if (get_abit(a+3) != VGM_BIT_VALID) aerr = True;
-
-   /* Store the V bytes, remembering to do it little-endian-ly. */
-   set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8;
-   set_vbyte( a+1, vbytes & 0x000000FF ); vbytes >>= 8;
-   set_vbyte( a+2, vbytes & 0x000000FF ); vbytes >>= 8;
-   set_vbyte( a+3, vbytes & 0x000000FF );
-
-   /* If an address error has happened, report it. */
-   if (aerr)
-      VG_(record_address_error)( a, 4, True );
-}
-
-static UInt vgm_rd_V2_SLOWLY ( Addr a )
-{
-   /* Check the address for validity. */
-   UInt vw   = VGM_WORD_INVALID;
-   Bool aerr = False;
-   PROF_EVENT(72);
-
-   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
-   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
-
-   /* Fetch the V bytes, remembering to do it little-endian-ly. */
-   vw <<= 8; vw |= (UInt)get_vbyte(a+1);
-   vw <<= 8; vw |= (UInt)get_vbyte(a+0);
-
-   /* If an address error has happened, report it. */
-   if (aerr) {
-      VG_(record_address_error)( a, 2, False );
-      vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) 
-           | (VGM_BYTE_VALID << 8) | (VGM_BYTE_VALID);
-   }
-   return vw;   
-}
-
-static void vgm_wr_V2_SLOWLY ( Addr a, UInt vbytes )
-{
-   /* Check the address for validity. */
-   Bool aerr = False;
-   PROF_EVENT(73);
-
-   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
-   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
-
-   /* Store the V bytes, remembering to do it little-endian-ly. */
-   set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8;
-   set_vbyte( a+1, vbytes & 0x000000FF );
-
-   /* If an address error has happened, report it. */
-   if (aerr)
-      VG_(record_address_error)( a, 2, True );
-}
-
-static UInt vgm_rd_V1_SLOWLY ( Addr a )
-{
-   /* Check the address for validity. */
-   UInt vw   = VGM_WORD_INVALID;
-   Bool aerr = False;
-   PROF_EVENT(74);
-
-   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
-
-   /* Fetch the V byte. */
-   vw <<= 8; vw |= (UInt)get_vbyte(a+0);
-
-   /* If an address error has happened, report it. */
-   if (aerr) {
-      VG_(record_address_error)( a, 1, False );
-      vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) 
-           | (VGM_BYTE_INVALID << 8) | (VGM_BYTE_VALID);
-   }
-   return vw;   
-}
-
-static void vgm_wr_V1_SLOWLY ( Addr a, UInt vbytes )
-{
-   /* Check the address for validity. */
-   Bool aerr = False;
-   PROF_EVENT(75);
-   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
-
-   /* Store the V bytes, remembering to do it little-endian-ly. */
-   set_vbyte( a+0, vbytes & 0x000000FF );
-
-   /* If an address error has happened, report it. */
-   if (aerr)
-      VG_(record_address_error)( a, 1, True );
-}
-
-
-/* ---------------------------------------------------------------------
-   Called from generated code, or from the assembly helpers.
-   Handlers for value check failures.
-   ------------------------------------------------------------------ */
-
-void VG_(helperc_value_check0_fail) ( void )
-{
-   VG_(record_value_error) ( 0 );
-}
-
-void VG_(helperc_value_check1_fail) ( void )
-{
-   VG_(record_value_error) ( 1 );
-}
-
-void VG_(helperc_value_check2_fail) ( void )
-{
-   VG_(record_value_error) ( 2 );
-}
-
-void VG_(helperc_value_check4_fail) ( void )
-{
-   VG_(record_value_error) ( 4 );
-}
-
-
-/* ---------------------------------------------------------------------
-   FPU load and store checks, called from generated code.
-   ------------------------------------------------------------------ */
-
-void VGM_(fpu_read_check) ( Addr addr, Int size )
-{
-   /* Ensure the read area is both addressible and valid (ie,
-      readable).  If there's an address error, don't report a value
-      error too; but if there isn't an address error, check for a
-      value error. 
-
-      Try to be reasonably fast on the common case; wimp out and defer
-      to fpu_read_check_SLOWLY for everything else.  */
-
-   SecMap* sm;
-   UInt    sm_off, v_off, a_off;
-   Addr    addr4;
-
-   PROF_EVENT(80);
-
-#  ifdef VG_DEBUG_MEMORY
-   fpu_read_check_SLOWLY ( addr, size );
-#  else
-
-   if (size == 4) {
-      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
-      PROF_EVENT(81);
-      /* Properly aligned. */
-      sm     = VG_(primary_map)[addr >> 16];
-      sm_off = addr & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
-      /* Properly aligned and addressible. */
-      v_off = addr & 0xFFFF;
-      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
-         goto slow4;
-      /* Properly aligned, addressible and with valid data. */
-      return;
-     slow4:
-      fpu_read_check_SLOWLY ( addr, 4 );
-      return;
-   }
-
-   if (size == 8) {
-      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
-      PROF_EVENT(82);
-      /* Properly aligned.  Do it in two halves. */
-      addr4 = addr + 4;
-      /* First half. */
-      sm     = VG_(primary_map)[addr >> 16];
-      sm_off = addr & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
-      /* First half properly aligned and addressible. */
-      v_off = addr & 0xFFFF;
-      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
-         goto slow8;
-      /* Second half. */
-      sm     = VG_(primary_map)[addr4 >> 16];
-      sm_off = addr4 & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
-      /* Second half properly aligned and addressible. */
-      v_off = addr4 & 0xFFFF;
-      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
-         goto slow8;
-      /* Both halves properly aligned, addressible and with valid
-         data. */
-      return;
-     slow8:
-      fpu_read_check_SLOWLY ( addr, 8 );
-      return;
-   }
-
-   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
-      cases go quickly.  */
-   if (size == 2) {
-      PROF_EVENT(83);
-      fpu_read_check_SLOWLY ( addr, 2 );
-      return;
-   }
-
-   if (size == 10) {
-      PROF_EVENT(84);
-      fpu_read_check_SLOWLY ( addr, 10 );
-      return;
-   }
-
-   if (size == 28) {
-      PROF_EVENT(84); /* XXX assign correct event number */
-      fpu_read_check_SLOWLY ( addr, 28 );
-      return;
-   }
-
-   VG_(printf)("size is %d\n", size);
-   VG_(panic)("vgm_fpu_read_check: unhandled size");
-#  endif
-}
-
-
-void VGM_(fpu_write_check) ( Addr addr, Int size )
-{
-   /* Ensure the written area is addressible, and moan if otherwise.
-      If it is addressible, make it valid, otherwise invalid. 
-   */
-
-   SecMap* sm;
-   UInt    sm_off, v_off, a_off;
-   Addr    addr4;
-
-   PROF_EVENT(85);
-
-#  ifdef VG_DEBUG_MEMORY
-   fpu_write_check_SLOWLY ( addr, size );
-#  else
-
-   if (size == 4) {
-      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
-      PROF_EVENT(86);
-      /* Properly aligned. */
-      sm     = VG_(primary_map)[addr >> 16];
-      sm_off = addr & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
-      /* Properly aligned and addressible.  Make valid. */
-      v_off = addr & 0xFFFF;
-      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
-      return;
-     slow4:
-      fpu_write_check_SLOWLY ( addr, 4 );
-      return;
-   }
-
-   if (size == 8) {
-      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
-      PROF_EVENT(87);
-      /* Properly aligned.  Do it in two halves. */
-      addr4 = addr + 4;
-      /* First half. */
-      sm     = VG_(primary_map)[addr >> 16];
-      sm_off = addr & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
-      /* First half properly aligned and addressible.  Make valid. */
-      v_off = addr & 0xFFFF;
-      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
-      /* Second half. */
-      sm     = VG_(primary_map)[addr4 >> 16];
-      sm_off = addr4 & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
-      /* Second half properly aligned and addressible. */
-      v_off = addr4 & 0xFFFF;
-      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
-      /* Properly aligned, addressible and with valid data. */
-      return;
-     slow8:
-      fpu_write_check_SLOWLY ( addr, 8 );
-      return;
-   }
-
-   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
-      cases go quickly.  */
-   if (size == 2) {
-      PROF_EVENT(88);
-      fpu_write_check_SLOWLY ( addr, 2 );
-      return;
-   }
-
-   if (size == 10) {
-      PROF_EVENT(89);
-      fpu_write_check_SLOWLY ( addr, 10 );
-      return;
-   }
-
-   if (size == 28) {
-      PROF_EVENT(89); /* XXX assign correct event number */
-      fpu_write_check_SLOWLY ( addr, 28 );
-      return;
-   }
-
-   VG_(printf)("size is %d\n", size);
-   VG_(panic)("vgm_fpu_write_check: unhandled size");
-#  endif
-}
-
-
-/* ---------------------------------------------------------------------
-   Slow, general cases for FPU load and store checks.
-   ------------------------------------------------------------------ */
-
-/* Generic version.  Test for both addr and value errors, but if
-   there's an addr error, don't report a value error even if it
-   exists. */
-
-void fpu_read_check_SLOWLY ( Addr addr, Int size )
-{
-   Int  i;
-   Bool aerr = False;
-   Bool verr = False;
-   PROF_EVENT(90);
-   for (i = 0; i < size; i++) {
-      PROF_EVENT(91);
-      if (get_abit(addr+i) != VGM_BIT_VALID)
-         aerr = True;
-      if (get_vbyte(addr+i) != VGM_BYTE_VALID)
-         verr = True;
-   }
-
-   if (aerr) {
-      VG_(record_address_error)( addr, size, False );
-   } else {
-     if (verr)
-        VG_(record_value_error)( size );
-   }
-}
-
-
-/* Generic version.  Test for addr errors.  Valid addresses are
-   given valid values, and invalid addresses invalid values. */
-
-void fpu_write_check_SLOWLY ( Addr addr, Int size )
-{
-   Int  i;
-   Addr a_here;
-   Bool a_ok;
-   Bool aerr = False;
-   PROF_EVENT(92);
-   for (i = 0; i < size; i++) {
-      PROF_EVENT(93);
-      a_here = addr+i;
-      a_ok = get_abit(a_here) == VGM_BIT_VALID;
-      if (a_ok) {
-	set_vbyte(a_here, VGM_BYTE_VALID);
-      } else {
-	set_vbyte(a_here, VGM_BYTE_INVALID);
-        aerr = True;
+   /* Sanity check ... if this is the executable's text segment,
+      ensure it is loaded where we think it ought to be.  Any file
+      name which doesn't contain ".so" is assumed to be the
+      executable. */
+   if (filename != NULL
+       && xx == 'x'
+       && VG_(strstr(filename, ".so")) == NULL
+      ) {
+      /* We assume this is the executable. */
+      if (start != VG_ASSUMED_EXE_BASE) {
+         VG_(message)(Vg_UserMsg,
+                      "FATAL: executable base addr not as assumed.");
+         VG_(message)(Vg_UserMsg, "name %s, actual %p, assumed %p.",
+                      filename, start, VG_ASSUMED_EXE_BASE);
+         VG_(message)(Vg_UserMsg,
+            "One reason this could happen is that you have a shared object");
+         VG_(message)(Vg_UserMsg,
+            " whose name doesn't contain the characters \".so\", so Valgrind ");
+         VG_(message)(Vg_UserMsg,
+            "naively assumes it is the executable.  ");
+         VG_(message)(Vg_UserMsg,
+            "In that case, rename it appropriately.");
+         VG_(panic)("VG_ASSUMED_EXE_BASE doesn't match reality");
       }
    }
-   if (aerr) {
-      VG_(record_address_error)( addr, size, True );
+
+   if (0)
+      VG_(message)(Vg_DebugMsg,
+                   "initial map %8x-%8x %c%c%c? %8x (%d) (%s)",
+                   start,start+size,rr,ww,xx,foffset,
+                   size, filename?filename:(UChar*)"NULL");
+
+   if (rr != 'r' && xx != 'x' && ww != 'w') {
+      VG_(printf)("No permissions on the segment named %s\n", filename);
+      VG_(panic)("Non-readable, writable, executable segment at startup");
    }
+
+   /* This parallels what happens when we mmap some new memory */
+   if (filename != NULL && xx == 'x') {
+      VG_(new_exe_segment)( start, size );
+   }
+   VG_TRACK( new_mem_startup, start, size, rr=='r', ww=='w', xx=='x' );
+
+   /* If this is the stack segment mark all below %esp as noaccess. */
+   r_esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   is_stack_segment = start <= r_esp && r_esp < start+size;
+   if (is_stack_segment) {
+      if (0)
+         VG_(message)(Vg_DebugMsg, "invalidating stack area: %x .. %x",
+                      start,r_esp);
+      VG_TRACK( die_mem_stack, start, r_esp-start );
+   }
+}
+
+
+/* 1. Records exe segments from /proc/pid/maps -- always necessary, because 
+      if they're munmap()ed we need to know if they were executable in order
+      to discard translations.  Also checks there's no exe segment overlaps.
+
+   2. Marks global variables that might be accessed from generated code;
+
+   3. Sets up the end of the data segment so that vg_syscalls.c can make
+      sense of calls to brk().
+ */
+void VG_(init_memory) ( void )
+{
+   /* 1 and 2 */
+   VG_(read_procselfmaps) ( startup_segment_callback );
+
+   /* 3 */
+   VG_TRACK( post_mem_write, (Addr) & VG_(running_on_simd_CPU), 1 );
+   VG_TRACK( post_mem_write, (Addr) & VG_(clo_trace_malloc),    1 );
+   VG_TRACK( post_mem_write, (Addr) & VG_(clo_sloppy_malloc),   1 );
+
+   /* 4 */
+   VG_(init_dataseg_end_for_brk)();
 }
 
 
@@ -1340,7 +273,7 @@
 Bool is_plausible_stack_addr ( ThreadState* tst, Addr aa )
 {
    UInt a = (UInt)aa;
-   PROF_EVENT(100);
+   //PROF_EVENT(100);   PPP
    if (a <= tst->stack_highest_word && 
        a > tst->stack_highest_word - VG_PLAUSIBLE_STACK_SIZE)
       return True;
@@ -1349,18 +282,6 @@
 }
 
 
-/* Is this address within some small distance below %ESP?  Used only
-   for the --workaround-gcc296-bugs kludge. */
-Bool VG_(is_just_below_ESP)( Addr esp, Addr aa )
-{
-   if ((UInt)esp > (UInt)aa
-       && ((UInt)esp - (UInt)aa) <= VG_GCC296_BUG_STACK_SLOP)
-      return True;
-   else
-      return False;
-}
-
-
 /* Kludgey ... how much does %esp have to change before we reckon that
    the application is switching stacks ? */
 #define VG_HUGE_DELTA (VG_PLAUSIBLE_STACK_SIZE / 4)
@@ -1370,133 +291,59 @@
    return a & ~(VKI_BYTES_PER_PAGE-1);
 }
 
+static void vg_handle_esp_assignment_SLOWLY ( Addr old_esp, Addr new_esp );
 
-static void vg_handle_esp_assignment_SLOWLY ( Addr );
-
-void VGM_(handle_esp_assignment) ( Addr new_espA )
+__attribute__ ((regparm (1)))
+void VG_(handle_esp_assignment) ( Addr new_esp )
 {
-   UInt old_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   UInt new_esp = (UInt)new_espA;
-   Int  delta   = ((Int)new_esp) - ((Int)old_esp);
+   UInt old_esp;
+   Int  delta;
 
-   PROF_EVENT(101);
+   VGP_MAYBE_PUSHCC(VgpStack);
+
+   old_esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   delta = ((Int)new_esp) - ((Int)old_esp);
+
+   /* Update R_ESP */
+   VG_(baseBlock)[VGOFF_(m_esp)] = new_esp;
+
+   //PROF_EVENT(101);   PPP
 
 #  ifndef VG_DEBUG_MEMORY
 
-   if (IS_ALIGNED4_ADDR(old_esp)) {
+   if (IS_ALIGNED4_ADDR(old_esp) &&  IS_ALIGNED4_ADDR(new_esp)) {
 
       /* Deal with the most common cases fast.  These are ordered in
          the sequence most common first. */
 
-      if (delta == -4) {
-         /* Moving down by 4 and properly aligned.. */
-         PROF_EVENT(102);
-         make_aligned_word_WRITABLE(new_esp);
-         return;
+#     ifdef VG_PROFILE_MEMORY
+      // PPP
+      if      (delta = - 4) PROF_EVENT(102);
+      else if (delta =   4) PROF_EVENT(103);
+      else if (delta = -12) PROF_EVENT(104);
+      else if (delta = - 8) PROF_EVENT(105);
+      else if (delta =  16) PROF_EVENT(106);
+      else if (delta =  12) PROF_EVENT(107);
+      else if (delta =   0) PROF_EVENT(108);
+      else if (delta =   8) PROF_EVENT(109);
+      else if (delta = -16) PROF_EVENT(110);
+      else if (delta =  20) PROF_EVENT(111);
+      else if (delta = -20) PROF_EVENT(112);
+      else if (delta =  24) PROF_EVENT(113);
+      else if (delta = -24) PROF_EVENT(114);
+      else if (delta > 0)   PROF_EVENT(115); // PPP: new: aligned_big_pos
+      else                  PROF_EVENT(116); // PPP: new: aligned_big_neg
+#     endif
+      
+      if (delta < 0) {
+         VG_TRACK(new_mem_stack_aligned, new_esp, -delta);
+      } else if (delta > 0) {
+         VG_TRACK(die_mem_stack_aligned, old_esp, delta);
       }
+      /* Do nothing if (delta==0) */
 
-      if (delta == 4) {
-         /* Moving up by 4 and properly aligned. */
-         PROF_EVENT(103);
-         make_aligned_word_NOACCESS(old_esp);
-         return;
-      }
-
-      if (delta == -12) {
-         PROF_EVENT(104);
-         make_aligned_word_WRITABLE(new_esp);
-         make_aligned_word_WRITABLE(new_esp+4);
-         make_aligned_word_WRITABLE(new_esp+8);
-         return;
-      }
-
-      if (delta == -8) {
-         PROF_EVENT(105);
-         make_aligned_word_WRITABLE(new_esp);
-         make_aligned_word_WRITABLE(new_esp+4);
-         return;
-      }
-
-      if (delta == 16) {
-         PROF_EVENT(106);
-         make_aligned_word_NOACCESS(old_esp);
-         make_aligned_word_NOACCESS(old_esp+4);
-         make_aligned_word_NOACCESS(old_esp+8);
-         make_aligned_word_NOACCESS(old_esp+12);
-         return;
-      }
-
-      if (delta == 12) {
-         PROF_EVENT(107);
-         make_aligned_word_NOACCESS(old_esp);
-         make_aligned_word_NOACCESS(old_esp+4);
-         make_aligned_word_NOACCESS(old_esp+8);
-         return;
-      }
-
-      if (delta == 0) {
-         PROF_EVENT(108);
-         return;
-      }
-
-      if (delta == 8) {
-         PROF_EVENT(109);
-         make_aligned_word_NOACCESS(old_esp);
-         make_aligned_word_NOACCESS(old_esp+4);
-         return;
-      }
-
-      if (delta == -16) {
-         PROF_EVENT(110);
-         make_aligned_word_WRITABLE(new_esp);
-         make_aligned_word_WRITABLE(new_esp+4);
-         make_aligned_word_WRITABLE(new_esp+8);
-         make_aligned_word_WRITABLE(new_esp+12);
-         return;
-      }
-
-      if (delta == 20) {
-         PROF_EVENT(111);
-         make_aligned_word_NOACCESS(old_esp);
-         make_aligned_word_NOACCESS(old_esp+4);
-         make_aligned_word_NOACCESS(old_esp+8);
-         make_aligned_word_NOACCESS(old_esp+12);
-         make_aligned_word_NOACCESS(old_esp+16);
-         return;
-      }
-
-      if (delta == -20) {
-         PROF_EVENT(112);
-         make_aligned_word_WRITABLE(new_esp);
-         make_aligned_word_WRITABLE(new_esp+4);
-         make_aligned_word_WRITABLE(new_esp+8);
-         make_aligned_word_WRITABLE(new_esp+12);
-         make_aligned_word_WRITABLE(new_esp+16);
-         return;
-      }
-
-      if (delta == 24) {
-         PROF_EVENT(113);
-         make_aligned_word_NOACCESS(old_esp);
-         make_aligned_word_NOACCESS(old_esp+4);
-         make_aligned_word_NOACCESS(old_esp+8);
-         make_aligned_word_NOACCESS(old_esp+12);
-         make_aligned_word_NOACCESS(old_esp+16);
-         make_aligned_word_NOACCESS(old_esp+20);
-         return;
-      }
-
-      if (delta == -24) {
-         PROF_EVENT(114);
-         make_aligned_word_WRITABLE(new_esp);
-         make_aligned_word_WRITABLE(new_esp+4);
-         make_aligned_word_WRITABLE(new_esp+8);
-         make_aligned_word_WRITABLE(new_esp+12);
-         make_aligned_word_WRITABLE(new_esp+16);
-         make_aligned_word_WRITABLE(new_esp+20);
-         return;
-      }
-
+      VGP_MAYBE_POPCC(VgpStack);
+      return;
    }
 
 #  endif
@@ -1504,33 +351,35 @@
    /* The above special cases handle 90% to 95% of all the stack
       adjustments.  The rest we give to the slow-but-general
       mechanism. */
-   vg_handle_esp_assignment_SLOWLY ( new_espA );
+   vg_handle_esp_assignment_SLOWLY ( old_esp, new_esp );
+   VGP_MAYBE_POPCC(VgpStack);
 }
 
 
-static void vg_handle_esp_assignment_SLOWLY ( Addr new_espA )
+static void vg_handle_esp_assignment_SLOWLY ( Addr old_esp, Addr new_esp )
 {
-   UInt old_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   UInt new_esp = (UInt)new_espA;
-   Int  delta   = ((Int)new_esp) - ((Int)old_esp);
-   //   VG_(printf)("%d ", delta);
-   PROF_EVENT(120);
+   Int  delta;
+   
+   delta = ((Int)new_esp) - ((Int)old_esp);
+   //VG_(printf)("delta %d (%x) %x --> %x\n", delta, delta, old_esp, new_esp);
+   //PROF_EVENT(120);   PPP
    if (-(VG_HUGE_DELTA) < delta && delta < VG_HUGE_DELTA) {
       /* "Ordinary" stack change. */
       if (new_esp < old_esp) {
          /* Moving down; the stack is growing. */
-         PROF_EVENT(121);
-         VGM_(make_writable) ( new_esp, old_esp - new_esp );
-         return;
-      }
-      if (new_esp > old_esp) {
+         //PROF_EVENT(121); PPP
+         VG_TRACK( new_mem_stack, new_esp, -delta );
+      
+      } else if (new_esp > old_esp) {
          /* Moving up; the stack is shrinking. */
-         PROF_EVENT(122);
-         VGM_(make_noaccess) ( old_esp, new_esp - old_esp );
-         return;
+         //PROF_EVENT(122); PPP
+         VG_TRACK( die_mem_stack, old_esp, delta );
+
+      } else {
+         /* when old_esp == new_esp */
+         //PROF_EVENT(123);    PPP
       }
-      PROF_EVENT(123);
-      return; /* when old_esp == new_esp */
+      return;
    }
 
    /* %esp has changed by more than HUGE_DELTA.  We take this to mean
@@ -1552,863 +401,21 @@
      Addr valid_up_to     = get_page_base(new_esp) + VKI_BYTES_PER_PAGE
                             + 0 * VKI_BYTES_PER_PAGE;
      ThreadState* tst     = VG_(get_current_thread_state)();
-     PROF_EVENT(124);
+     //PROF_EVENT(124); PPP
      if (VG_(clo_verbosity) > 1)
         VG_(message)(Vg_UserMsg, "Warning: client switching stacks?  "
-                                 "%%esp: %p --> %p",
-                                  old_esp, new_esp);
+                                 "%%esp: %p --> %p", old_esp, new_esp);
      /* VG_(printf)("na %p,   %%esp %p,   wr %p\n",
                     invalid_down_to, new_esp, valid_up_to ); */
-     VGM_(make_noaccess) ( invalid_down_to, new_esp - invalid_down_to );
+     VG_TRACK( die_mem_stack, invalid_down_to, new_esp - invalid_down_to );
      if (!is_plausible_stack_addr(tst, new_esp)) {
-        VGM_(make_readable) ( new_esp, valid_up_to - new_esp );
+        VG_TRACK( post_mem_write, new_esp, valid_up_to - new_esp );
      }
    }
 }
 
 
-/*--------------------------------------------------------------*/
-/*--- Initialise the memory audit system on program startup. ---*/
-/*--------------------------------------------------------------*/
-
-/* Handle one entry derived from /proc/self/maps. */
-
-static
-void init_memory_audit_callback ( 
-        Addr start, UInt size, 
-        Char rr, Char ww, Char xx, 
-        UInt foffset, UChar* filename )
-{
-   UChar example_a_bit;
-   UChar example_v_bit;
-   UInt  r_esp;
-   Bool  is_stack_segment;
-
-   /* Sanity check ... if this is the executable's text segment,
-      ensure it is loaded where we think it ought to be.  Any file
-      name which doesn't contain ".so" is assumed to be the
-      executable. */
-   if (filename != NULL
-       && xx == 'x'
-       && VG_(strstr(filename, ".so")) == NULL
-      ) {
-      /* We assume this is the executable. */
-      if (start != VG_ASSUMED_EXE_BASE) {
-         VG_(message)(Vg_UserMsg,
-                      "FATAL: executable base addr not as assumed.");
-         VG_(message)(Vg_UserMsg, "name %s, actual %p, assumed %p.",
-                      filename, start, VG_ASSUMED_EXE_BASE);
-         VG_(message)(Vg_UserMsg,
-            "One reason this could happen is that you have a shared object");
-         VG_(message)(Vg_UserMsg,
-            " whose name doesn't contain the characters \".so\", so Valgrind ");
-         VG_(message)(Vg_UserMsg,
-            "naively assumes it is the executable.  ");
-         VG_(message)(Vg_UserMsg,
-            "In that case, rename it appropriately.");
-         VG_(panic)("VG_ASSUMED_EXE_BASE doesn't match reality");
-      }
-   }
-    
-   if (0)
-      VG_(message)(Vg_DebugMsg, 
-                   "initial map %8x-%8x %c%c%c? %8x (%d) (%s)",
-                   start,start+size,rr,ww,xx,foffset,
-                   size, filename?filename:(UChar*)"NULL");
-
-   r_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   is_stack_segment = start <= r_esp && r_esp < start+size;
-
-   /* Figure out the segment's permissions.
-
-      All segments are addressible -- since a process can read its
-      own text segment.
-
-      A read-but-not-write segment presumably contains initialised
-      data, so is all valid.  Read-write segments presumably contains
-      uninitialised data, so is all invalid.  */
-
-   /* ToDo: make this less bogus. */
-   if (rr != 'r' && xx != 'x' && ww != 'w') {
-      /* Very bogus; this path never gets taken. */
-      /* A no, V no */
-      example_a_bit = VGM_BIT_INVALID;
-      example_v_bit = VGM_BIT_INVALID;
-   } else {
-      /* A yes, V yes */
-      example_a_bit = VGM_BIT_VALID;
-      example_v_bit = VGM_BIT_VALID;
-      /* Causes a lot of errs for unknown reasons. 
-         if (filename is valgrind.so 
-               [careful about end conditions on filename]) {
-            example_a_bit = VGM_BIT_INVALID;
-            example_v_bit = VGM_BIT_INVALID;
-         }
-      */
-   }
-
-   set_address_range_perms ( start, size, 
-                             example_a_bit, example_v_bit );
-
-   if (is_stack_segment) {
-      /* This is the stack segment.  Mark all below %esp as
-         noaccess. */
-      if (0)
-         VG_(message)(Vg_DebugMsg, 
-                      "invalidating stack area: %x .. %x",
-                      start,r_esp);
-      VGM_(make_noaccess)( start, r_esp-start );
-   }
-}
-
-
-/* Initialise the memory audit system. */
-void VGM_(init_memory_audit) ( void )
-{
-   Int i;
-
-   init_prof_mem();
-
-   for (i = 0; i < 8192; i++)
-      vg_distinguished_secondary_map.abits[i] 
-         = VGM_BYTE_INVALID; /* Invalid address */
-   for (i = 0; i < 65536; i++)
-      vg_distinguished_secondary_map.vbyte[i] 
-         = VGM_BYTE_INVALID; /* Invalid Value */
-
-   /* These entries gradually get overwritten as the used address
-      space expands. */
-   for (i = 0; i < 65536; i++)
-      VG_(primary_map)[i] = &vg_distinguished_secondary_map;
-   /* These ones should never change; it's a bug in Valgrind if they
-      do. */
-   for (i = 65536; i < 262144; i++)
-      VG_(primary_map)[i] = &vg_distinguished_secondary_map;
-
-   /* Read the initial memory mapping from the /proc filesystem, and
-      set up our own maps accordingly. */
-   VG_(read_procselfmaps) ( init_memory_audit_callback );
-
-   /* Last but not least, set up the shadow regs with reasonable (sic)
-      values.  All regs are claimed to have valid values.
-   */
-   VG_(baseBlock)[VGOFF_(sh_esp)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_ebp)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_eax)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_ecx)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_edx)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_ebx)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_esi)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_edi)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_eflags)] = VGM_EFLAGS_VALID;
-
-   /* Record the end of the data segment, so that vg_syscall_mem.c
-      can make sense of calls to brk(). 
-   */
-   VGM_(curr_dataseg_end) = (Addr)VG_(brk)(0);
-   if (VGM_(curr_dataseg_end) == (Addr)(-1))
-      VG_(panic)("vgm_init_memory_audit: can't determine data-seg end");
-
-   if (0)
-      VG_(printf)("DS END is %p\n", (void*)VGM_(curr_dataseg_end));
-
-   /* Read the list of errors to suppress.  This should be found in
-      the file specified by vg_clo_suppressions. */
-   VG_(load_suppressions)();
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Low-level address-space scanning, for the leak       ---*/
-/*--- detector.                                            ---*/
-/*------------------------------------------------------------*/
-
-static 
-jmp_buf memscan_jmpbuf;
-
-static
-void vg_scan_all_valid_memory_sighandler ( Int sigNo )
-{
-   __builtin_longjmp(memscan_jmpbuf, 1);
-}
-
-UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) )
-{
-   /* All volatile, because some gccs seem paranoid about longjmp(). */
-   volatile UInt res, numPages, page, vbytes, primaryMapNo, nWordsNotified;
-   volatile Addr pageBase, addr;
-   volatile SecMap* sm;
-   volatile UChar abits;
-   volatile UInt page_first_word;
-
-   vki_ksigaction sigbus_saved;
-   vki_ksigaction sigbus_new;
-   vki_ksigaction sigsegv_saved;
-   vki_ksigaction sigsegv_new;
-   vki_ksigset_t  blockmask_saved;
-   vki_ksigset_t  unblockmask_new;
-
-   /* Temporarily install a new sigsegv and sigbus handler, and make
-      sure SIGBUS, SIGSEGV and SIGTERM are unblocked.  (Perhaps the
-      first two can never be blocked anyway?)  */
-
-   sigbus_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
-   sigbus_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
-   sigbus_new.ksa_restorer = NULL;
-   res = VG_(ksigemptyset)( &sigbus_new.ksa_mask );
-   vg_assert(res == 0);
-
-   sigsegv_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
-   sigsegv_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
-   sigsegv_new.ksa_restorer = NULL;
-   res = VG_(ksigemptyset)( &sigsegv_new.ksa_mask );
-   vg_assert(res == 0+0);
-
-   res =  VG_(ksigemptyset)( &unblockmask_new );
-   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGBUS );
-   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGSEGV );
-   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGTERM );
-   vg_assert(res == 0+0+0);
-
-   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_new, &sigbus_saved );
-   vg_assert(res == 0+0+0+0);
-
-   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_new, &sigsegv_saved );
-   vg_assert(res == 0+0+0+0+0);
-
-   res = VG_(ksigprocmask)( VKI_SIG_UNBLOCK, &unblockmask_new, &blockmask_saved );
-   vg_assert(res == 0+0+0+0+0+0);
-
-   /* The signal handlers are installed.  Actually do the memory scan. */
-   numPages = 1 << (32-VKI_BYTES_PER_PAGE_BITS);
-   vg_assert(numPages == 1048576);
-   vg_assert(4096 == (1 << VKI_BYTES_PER_PAGE_BITS));
-
-   nWordsNotified = 0;
-
-   for (page = 0; page < numPages; page++) {
-      pageBase = page << VKI_BYTES_PER_PAGE_BITS;
-      primaryMapNo = pageBase >> 16;
-      sm = VG_(primary_map)[primaryMapNo];
-      if (IS_DISTINGUISHED_SM(sm)) continue;
-      if (__builtin_setjmp(memscan_jmpbuf) == 0) {
-         /* try this ... */
-         page_first_word = * (volatile UInt*)pageBase;
-         /* we get here if we didn't get a fault */
-         /* Scan the page */
-         for (addr = pageBase; addr < pageBase+VKI_BYTES_PER_PAGE; addr += 4) {
-            abits  = get_abits4_ALIGNED(addr);
-            vbytes = get_vbytes4_ALIGNED(addr);
-            if (abits == VGM_NIBBLE_VALID 
-                && vbytes == VGM_WORD_VALID) {
-               nWordsNotified++;
-               notify_word ( addr, *(UInt*)addr );
-	    }
-         }
-      } else {
-         /* We get here if reading the first word of the page caused a
-            fault, which in turn caused the signal handler to longjmp.
-            Ignore this page. */
-         if (0)
-         VG_(printf)(
-            "vg_scan_all_valid_memory_sighandler: ignoring page at %p\n",
-            (void*)pageBase 
-         );
-      }
-   }
-
-   /* Restore signal state to whatever it was before. */
-   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_saved, NULL );
-   vg_assert(res == 0 +0);
-
-   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_saved, NULL );
-   vg_assert(res == 0 +0 +0);
-
-   res = VG_(ksigprocmask)( VKI_SIG_SETMASK, &blockmask_saved, NULL );
-   vg_assert(res == 0 +0 +0 +0);
-
-   return nWordsNotified;
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
-/*------------------------------------------------------------*/
-
-/* A block is either 
-   -- Proper-ly reached; a pointer to its start has been found
-   -- Interior-ly reached; only an interior pointer to it has been found
-   -- Unreached; so far, no pointers to any part of it have been found. 
-*/
-typedef 
-   enum { Unreached, Interior, Proper } 
-   Reachedness;
-
-/* A block record, used for generating err msgs. */
-typedef
-   struct _LossRecord {
-      struct _LossRecord* next;
-      /* Where these lost blocks were allocated. */
-      ExeContext*  allocated_at;
-      /* Their reachability. */
-      Reachedness  loss_mode;
-      /* Number of blocks and total # bytes involved. */
-      UInt         total_bytes;
-      UInt         num_blocks;
-   }
-   LossRecord;
-
-
-/* Find the i such that ptr points at or inside the block described by
-   shadows[i].  Return -1 if none found.  This assumes that shadows[]
-   has been sorted on the ->data field. */
-
-#ifdef VG_DEBUG_LEAKCHECK
-/* Used to sanity-check the fast binary-search mechanism. */
-static Int find_shadow_for_OLD ( Addr          ptr, 
-                                 ShadowChunk** shadows,
-                                 Int           n_shadows )
-
-{
-   Int  i;
-   Addr a_lo, a_hi;
-   PROF_EVENT(70);
-   for (i = 0; i < n_shadows; i++) {
-      PROF_EVENT(71);
-      a_lo = shadows[i]->data;
-      a_hi = ((Addr)shadows[i]->data) + shadows[i]->size - 1;
-      if (a_lo <= ptr && ptr <= a_hi)
-         return i;
-   }
-   return -1;
-}
-#endif
-
-
-static Int find_shadow_for ( Addr          ptr, 
-                             ShadowChunk** shadows,
-                             Int           n_shadows )
-{
-   Addr a_mid_lo, a_mid_hi;
-   Int lo, mid, hi, retVal;
-   PROF_EVENT(70);
-   /* VG_(printf)("find shadow for %p = ", ptr); */
-   retVal = -1;
-   lo = 0;
-   hi = n_shadows-1;
-   while (True) {
-      PROF_EVENT(71);
-
-      /* invariant: current unsearched space is from lo to hi,
-         inclusive. */
-      if (lo > hi) break; /* not found */
-
-      mid      = (lo + hi) / 2;
-      a_mid_lo = shadows[mid]->data;
-      a_mid_hi = ((Addr)shadows[mid]->data) + shadows[mid]->size - 1;
-
-      if (ptr < a_mid_lo) {
-         hi = mid-1;
-         continue;
-      } 
-      if (ptr > a_mid_hi) {
-         lo = mid+1;
-         continue;
-      }
-      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
-      retVal = mid;
-      break;
-   }
-
-#  ifdef VG_DEBUG_LEAKCHECK
-   vg_assert(retVal == find_shadow_for_OLD ( ptr, shadows, n_shadows ));
-#  endif
-   /* VG_(printf)("%d\n", retVal); */
-   return retVal;
-}
-
-
-
-static void sort_malloc_shadows ( ShadowChunk** shadows, UInt n_shadows )
-{
-   Int   incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
-                      9841, 29524, 88573, 265720,
-                      797161, 2391484 };
-   Int          lo = 0;
-   Int          hi = n_shadows-1;
-   Int          i, j, h, bigN, hp;
-   ShadowChunk* v;
-
-   PROF_EVENT(72);
-   bigN = hi - lo + 1; if (bigN < 2) return;
-   hp = 0; while (incs[hp] < bigN) hp++; hp--;
-
-   for (; hp >= 0; hp--) {
-      PROF_EVENT(73);
-      h = incs[hp];
-      i = lo + h;
-      while (1) {
-         PROF_EVENT(74);
-         if (i > hi) break;
-         v = shadows[i];
-         j = i;
-         while (shadows[j-h]->data > v->data) {
-            PROF_EVENT(75);
-            shadows[j] = shadows[j-h];
-            j = j - h;
-            if (j <= (lo + h - 1)) break;
-         }
-         shadows[j] = v;
-         i++;
-      }
-   }
-}
-
-/* Globals, for the callback used by VG_(detect_memory_leaks). */
-
-static ShadowChunk** vglc_shadows;
-static Int           vglc_n_shadows;
-static Reachedness*  vglc_reachedness;
-static Addr          vglc_min_mallocd_addr;
-static Addr          vglc_max_mallocd_addr;
-
-static 
-void vg_detect_memory_leaks_notify_addr ( Addr a, UInt word_at_a )
-{
-   Int  sh_no;
-   Addr ptr;
-
-   /* Rule out some known causes of bogus pointers.  Mostly these do
-      not cause much trouble because only a few false pointers can
-      ever lurk in these places.  This mainly stops it reporting that
-      blocks are still reachable in stupid test programs like this
-
-         int main (void) { char* a = malloc(100); return 0; }
-
-      which people seem inordinately fond of writing, for some reason.  
-
-      Note that this is a complete kludge.  It would be better to
-      ignore any addresses corresponding to valgrind.so's .bss and
-      .data segments, but I cannot think of a reliable way to identify
-      where the .bss segment has been put.  If you can, drop me a
-      line.  
-   */
-   if (a >= ((Addr)(&VG_(stack)))
-       && a <= ((Addr)(&VG_(stack))) + sizeof(VG_(stack))) {
-      return;
-   }
-   if (a >= ((Addr)(&VG_(m_state_static)))
-       && a <= ((Addr)(&VG_(m_state_static))) + sizeof(VG_(m_state_static))) {
-      return;
-   }
-   if (a == (Addr)(&vglc_min_mallocd_addr))
-      return;
-   if (a == (Addr)(&vglc_max_mallocd_addr))
-      return;
-
-   /* OK, let's get on and do something Useful for a change. */
-
-   ptr = (Addr)word_at_a;
-   if (ptr >= vglc_min_mallocd_addr && ptr <= vglc_max_mallocd_addr) {
-      /* Might be legitimate; we'll have to investigate further. */
-      sh_no = find_shadow_for ( ptr, vglc_shadows, vglc_n_shadows );
-      if (sh_no != -1) {
-         /* Found a block at/into which ptr points. */
-         vg_assert(sh_no >= 0 && sh_no < vglc_n_shadows);
-         vg_assert(ptr < vglc_shadows[sh_no]->data 
-                         + vglc_shadows[sh_no]->size);
-         /* Decide whether Proper-ly or Interior-ly reached. */
-         if (ptr == vglc_shadows[sh_no]->data) {
-            if (0) VG_(printf)("pointer at %p to %p\n", a, word_at_a );
-            vglc_reachedness[sh_no] = Proper;
-         } else {
-            if (vglc_reachedness[sh_no] == Unreached)
-               vglc_reachedness[sh_no] = Interior;
-         }
-      }
-   }
-}
-
-
-void VG_(detect_memory_leaks) ( void )
-{
-   Int    i;
-   Int    blocks_leaked, bytes_leaked;
-   Int    blocks_dubious, bytes_dubious;
-   Int    blocks_reachable, bytes_reachable;
-   Int    n_lossrecords;
-   UInt   bytes_notified;
-   
-   LossRecord*  errlist;
-   LossRecord*  p;
-
-   Bool (*ec_comparer_fn) ( ExeContext*, ExeContext* );
-   PROF_EVENT(76);
-   vg_assert(VG_(clo_instrument));
-
-   /* Decide how closely we want to match ExeContexts in leak
-      records. */
-   switch (VG_(clo_leak_resolution)) {
-      case 2: 
-         ec_comparer_fn = VG_(eq_ExeContext_top2); 
-         break;
-      case 4: 
-         ec_comparer_fn = VG_(eq_ExeContext_top4); 
-         break;
-      case VG_DEEPEST_BACKTRACE: 
-         ec_comparer_fn = VG_(eq_ExeContext_all); 
-         break;
-      default: 
-         VG_(panic)("VG_(detect_memory_leaks): "
-                    "bad VG_(clo_leak_resolution)");
-         break;
-   }
-
-   /* vg_get_malloc_shadows allocates storage for shadows */
-   vglc_shadows = VG_(get_malloc_shadows)( &vglc_n_shadows );
-   if (vglc_n_shadows == 0) {
-      vg_assert(vglc_shadows == NULL);
-      VG_(message)(Vg_UserMsg, 
-                   "No malloc'd blocks -- no leaks are possible.\n");
-      return;
-   }
-
-   VG_(message)(Vg_UserMsg, 
-                "searching for pointers to %d not-freed blocks.", 
-                vglc_n_shadows );
-   sort_malloc_shadows ( vglc_shadows, vglc_n_shadows );
-
-   /* Sanity check; assert that the blocks are now in order and that
-      they don't overlap. */
-   for (i = 0; i < vglc_n_shadows-1; i++) {
-      vg_assert( ((Addr)vglc_shadows[i]->data)
-                 < ((Addr)vglc_shadows[i+1]->data) );
-      vg_assert( ((Addr)vglc_shadows[i]->data) + vglc_shadows[i]->size
-                 < ((Addr)vglc_shadows[i+1]->data) );
-   }
-
-   vglc_min_mallocd_addr = ((Addr)vglc_shadows[0]->data);
-   vglc_max_mallocd_addr = ((Addr)vglc_shadows[vglc_n_shadows-1]->data)
-                         + vglc_shadows[vglc_n_shadows-1]->size - 1;
-
-   vglc_reachedness 
-      = VG_(malloc)( VG_AR_PRIVATE, vglc_n_shadows * sizeof(Reachedness) );
-   for (i = 0; i < vglc_n_shadows; i++)
-      vglc_reachedness[i] = Unreached;
-
-   /* Do the scan of memory. */
-   bytes_notified
-       = VG_(scan_all_valid_memory)( &vg_detect_memory_leaks_notify_addr )
-         * VKI_BYTES_PER_WORD;
-
-   VG_(message)(Vg_UserMsg, "checked %d bytes.", bytes_notified);
-
-   blocks_leaked    = bytes_leaked    = 0;
-   blocks_dubious   = bytes_dubious   = 0;
-   blocks_reachable = bytes_reachable = 0;
-
-   for (i = 0; i < vglc_n_shadows; i++) {
-      if (vglc_reachedness[i] == Unreached) {
-         blocks_leaked++;
-         bytes_leaked += vglc_shadows[i]->size;
-      }
-      else if (vglc_reachedness[i] == Interior) {
-         blocks_dubious++;
-         bytes_dubious += vglc_shadows[i]->size;
-      }
-      else if (vglc_reachedness[i] == Proper) {
-         blocks_reachable++;
-         bytes_reachable += vglc_shadows[i]->size;
-      }
-   }
-
-   VG_(message)(Vg_UserMsg, "");
-   VG_(message)(Vg_UserMsg, "definitely lost: %d bytes in %d blocks.", 
-                            bytes_leaked, blocks_leaked );
-   VG_(message)(Vg_UserMsg, "possibly lost:   %d bytes in %d blocks.", 
-                            bytes_dubious, blocks_dubious );
-   VG_(message)(Vg_UserMsg, "still reachable: %d bytes in %d blocks.", 
-                            bytes_reachable, blocks_reachable );
-
-
-   /* Common up the lost blocks so we can print sensible error
-      messages. */
-
-   n_lossrecords = 0;
-   errlist       = NULL;
-   for (i = 0; i < vglc_n_shadows; i++) {
-      for (p = errlist; p != NULL; p = p->next) {
-         if (p->loss_mode == vglc_reachedness[i]
-             && ec_comparer_fn (
-                   p->allocated_at, 
-                   vglc_shadows[i]->where) ) {
-            break;
-	 }
-      }
-      if (p != NULL) {
-         p->num_blocks  ++;
-         p->total_bytes += vglc_shadows[i]->size;
-      } else {
-         n_lossrecords ++;
-         p = VG_(malloc)(VG_AR_PRIVATE, sizeof(LossRecord));
-         p->loss_mode    = vglc_reachedness[i];
-         p->allocated_at = vglc_shadows[i]->where;
-         p->total_bytes  = vglc_shadows[i]->size;
-         p->num_blocks   = 1;
-         p->next         = errlist;
-         errlist         = p;
-      }
-   }
-   
-   for (i = 0; i < n_lossrecords; i++) {
-      LossRecord* p_min = NULL;
-      UInt        n_min = 0xFFFFFFFF;
-      for (p = errlist; p != NULL; p = p->next) {
-         if (p->num_blocks > 0 && p->total_bytes < n_min) {
-            n_min = p->total_bytes;
-            p_min = p;
-         }
-      }
-      vg_assert(p_min != NULL);
-
-      if ( (!VG_(clo_show_reachable)) && p_min->loss_mode == Proper) {
-         p_min->num_blocks = 0;
-         continue;
-      }
-
-      VG_(message)(Vg_UserMsg, "");
-      VG_(message)(
-         Vg_UserMsg,
-         "%d bytes in %d blocks are %s in loss record %d of %d",
-         p_min->total_bytes, p_min->num_blocks,
-         p_min->loss_mode==Unreached ? "definitely lost" :
-            (p_min->loss_mode==Interior ? "possibly lost"
-                                        : "still reachable"),
-         i+1, n_lossrecords
-      );
-      VG_(pp_ExeContext)(p_min->allocated_at);
-      p_min->num_blocks = 0;
-   }
-
-   VG_(message)(Vg_UserMsg, "");
-   VG_(message)(Vg_UserMsg, "LEAK SUMMARY:");
-   VG_(message)(Vg_UserMsg, "   definitely lost: %d bytes in %d blocks.", 
-                            bytes_leaked, blocks_leaked );
-   VG_(message)(Vg_UserMsg, "   possibly lost:   %d bytes in %d blocks.", 
-                            bytes_dubious, blocks_dubious );
-   VG_(message)(Vg_UserMsg, "   still reachable: %d bytes in %d blocks.", 
-                            bytes_reachable, blocks_reachable );
-   if (!VG_(clo_show_reachable)) {
-      VG_(message)(Vg_UserMsg, 
-         "Reachable blocks (those to which a pointer was found) are not shown.");
-      VG_(message)(Vg_UserMsg, 
-         "To see them, rerun with: --show-reachable=yes");
-   }
-   VG_(message)(Vg_UserMsg, "");
-
-   VG_(free) ( VG_AR_PRIVATE, vglc_shadows );
-   VG_(free) ( VG_AR_PRIVATE, vglc_reachedness );
-}
-
-
-/* ---------------------------------------------------------------------
-   Sanity check machinery (permanently engaged).
-   ------------------------------------------------------------------ */
-
-/* Check that nobody has spuriously claimed that the first or last 16
-   pages (64 KB) of address space have become accessible.  Failure of
-   the following do not per se indicate an internal consistency
-   problem, but they are so likely to that we really want to know
-   about it if so. */
-
-Bool VG_(first_and_last_secondaries_look_plausible) ( void )
-{
-   if (IS_DISTINGUISHED_SM(VG_(primary_map)[0])
-       && IS_DISTINGUISHED_SM(VG_(primary_map)[65535])) {
-      return True;
-   } else {
-      return False;
-   }
-}
-
-
-/* A fast sanity check -- suitable for calling circa once per
-   millisecond. */
-
-void VG_(do_sanity_checks) ( Bool force_expensive )
-{
-   Int          i;
-   Bool         do_expensive_checks;
-
-   if (VG_(sanity_level) < 1) return;
-
-   /* --- First do all the tests that we can do quickly. ---*/
-
-   VG_(sanity_fast_count)++;
-
-   /* Check that we haven't overrun our private stack. */
-   for (i = 0; i < 10; i++) {
-      vg_assert(VG_(stack)[i]
-                == ((UInt)(&VG_(stack)[i]) ^ 0xA4B3C2D1));
-      vg_assert(VG_(stack)[10000-1-i] 
-                == ((UInt)(&VG_(stack)[10000-i-1]) ^ 0xABCD4321));
-   }
-
-   /* Check stuff pertaining to the memory check system. */
-
-   if (VG_(clo_instrument)) {
-
-      /* Check that nobody has spuriously claimed that the first or
-         last 16 pages of memory have become accessible [...] */
-      vg_assert(VG_(first_and_last_secondaries_look_plausible)());
-   }
-
-   /* --- Now some more expensive checks. ---*/
-
-   /* Once every 25 times, check some more expensive stuff. */
-
-   do_expensive_checks = False;
-   if (force_expensive) 
-      do_expensive_checks = True;
-   if (VG_(sanity_level) > 1) 
-      do_expensive_checks = True;
-   if (VG_(sanity_level) == 1 
-       && (VG_(sanity_fast_count) % 25) == 0)
-      do_expensive_checks = True;
-
-   if (do_expensive_checks) {
-      VG_(sanity_slow_count)++;
-
-#     if 0
-      { void zzzmemscan(void); zzzmemscan(); }
-#     endif
-
-      if ((VG_(sanity_fast_count) % 250) == 0)
-         VG_(sanity_check_tc_tt)();
-
-      if (VG_(clo_instrument)) {
-         /* Make sure nobody changed the distinguished secondary. */
-         for (i = 0; i < 8192; i++)
-            vg_assert(vg_distinguished_secondary_map.abits[i] 
-                      == VGM_BYTE_INVALID);
-         for (i = 0; i < 65536; i++)
-            vg_assert(vg_distinguished_secondary_map.vbyte[i] 
-                      == VGM_BYTE_INVALID);
-
-         /* Make sure that the upper 3/4 of the primary map hasn't
-            been messed with. */
-         for (i = 65536; i < 262144; i++)
-            vg_assert(VG_(primary_map)[i] 
-                      == & vg_distinguished_secondary_map);
-      }
-      /* 
-      if ((VG_(sanity_fast_count) % 500) == 0) VG_(mallocSanityCheckAll)(); 
-      */
-   }
-
-   if (VG_(sanity_level) > 1) {
-      /* Check sanity of the low-level memory manager.  Note that bugs
-         in the client's code can cause this to fail, so we don't do
-         this check unless specially asked for.  And because it's
-         potentially very expensive. */
-      VG_(mallocSanityCheckAll)();
-   }
-}
-
-
-/* ---------------------------------------------------------------------
-   Debugging machinery (turn on to debug).  Something of a mess.
-   ------------------------------------------------------------------ */
-
-/* Print the value tags on the 8 integer registers & flag reg. */
-
-static void uint_to_bits ( UInt x, Char* str )
-{
-   Int i;
-   Int w = 0;
-   /* str must point to a space of at least 36 bytes. */
-   for (i = 31; i >= 0; i--) {
-      str[w++] = (x & ( ((UInt)1) << i)) ? '1' : '0';
-      if (i == 24 || i == 16 || i == 8)
-         str[w++] = ' ';
-   }
-   str[w++] = 0;
-   vg_assert(w == 36);
-}
-
-/* Caution!  Not vthread-safe; looks in VG_(baseBlock), not the thread
-   state table. */
-
-void VG_(show_reg_tags) ( void )
-{
-   Char buf1[36];
-   Char buf2[36];
-   UInt z_eax, z_ebx, z_ecx, z_edx, 
-        z_esi, z_edi, z_ebp, z_esp, z_eflags;
-
-   z_eax    = VG_(baseBlock)[VGOFF_(sh_eax)];
-   z_ebx    = VG_(baseBlock)[VGOFF_(sh_ebx)];
-   z_ecx    = VG_(baseBlock)[VGOFF_(sh_ecx)];
-   z_edx    = VG_(baseBlock)[VGOFF_(sh_edx)];
-   z_esi    = VG_(baseBlock)[VGOFF_(sh_esi)];
-   z_edi    = VG_(baseBlock)[VGOFF_(sh_edi)];
-   z_ebp    = VG_(baseBlock)[VGOFF_(sh_ebp)];
-   z_esp    = VG_(baseBlock)[VGOFF_(sh_esp)];
-   z_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
-   
-   uint_to_bits(z_eflags, buf1);
-   VG_(message)(Vg_DebugMsg, "efl %\n", buf1);
-
-   uint_to_bits(z_eax, buf1);
-   uint_to_bits(z_ebx, buf2);
-   VG_(message)(Vg_DebugMsg, "eax %s   ebx %s\n", buf1, buf2);
-
-   uint_to_bits(z_ecx, buf1);
-   uint_to_bits(z_edx, buf2);
-   VG_(message)(Vg_DebugMsg, "ecx %s   edx %s\n", buf1, buf2);
-
-   uint_to_bits(z_esi, buf1);
-   uint_to_bits(z_edi, buf2);
-   VG_(message)(Vg_DebugMsg, "esi %s   edi %s\n", buf1, buf2);
-
-   uint_to_bits(z_ebp, buf1);
-   uint_to_bits(z_esp, buf2);
-   VG_(message)(Vg_DebugMsg, "ebp %s   esp %s\n", buf1, buf2);
-}
-
-
-#if 0
-/* For debugging only.  Scan the address space and touch all allegedly
-   addressible words.  Useful for establishing where Valgrind's idea of
-   addressibility has diverged from what the kernel believes. */
-
-static 
-void zzzmemscan_notify_word ( Addr a, UInt w )
-{
-}
-
-void zzzmemscan ( void )
-{
-   Int n_notifies
-      = VG_(scan_all_valid_memory)( zzzmemscan_notify_word );
-   VG_(printf)("zzzmemscan: n_bytes = %d\n", 4 * n_notifies );
-}
-#endif
-
-
-
-
-#if 0
-static Int zzz = 0;
-
-void show_bb ( Addr eip_next )
-{
-   VG_(printf)("[%4d] ", zzz);
-   VG_(show_reg_tags)( &VG_(m_shadow );
-   VG_(translate) ( eip_next, NULL, NULL, NULL );
-}
-#endif /* 0 */
-
 /*--------------------------------------------------------------------*/
 /*--- end                                              vg_memory.c ---*/
 /*--------------------------------------------------------------------*/
+
diff --git a/coregrind/vg_messages.c b/coregrind/vg_messages.c
index 3eaf8cd..b0051bd 100644
--- a/coregrind/vg_messages.c
+++ b/coregrind/vg_messages.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 
diff --git a/coregrind/vg_mylibc.c b/coregrind/vg_mylibc.c
index e32aee8..3fe6032 100644
--- a/coregrind/vg_mylibc.c
+++ b/coregrind/vg_mylibc.c
@@ -27,7 +27,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -177,7 +177,7 @@
 {
    if (set == NULL)
       return -1;
-   if (signum < 1 && signum > VKI_KNSIG)
+   if (signum < 1 || signum > VKI_KNSIG)
       return -1;
    signum--;
    set->ws[signum / VKI_KNSIG_BPW] |= (1 << (signum % VKI_KNSIG_BPW));
@@ -188,7 +188,7 @@
 {
    if (set == NULL)
       return -1;
-   if (signum < 1 && signum > VKI_KNSIG)
+   if (signum < 1 || signum > VKI_KNSIG)
       return -1;
    signum--;
    set->ws[signum / VKI_KNSIG_BPW] &= ~(1 << (signum % VKI_KNSIG_BPW));
@@ -199,7 +199,7 @@
 {
    if (set == NULL)
       return 0;
-   if (signum < 1 && signum > VKI_KNSIG)
+   if (signum < 1 || signum > VKI_KNSIG)
       return 0;
    signum--;
    if (1 & ((set->ws[signum / VKI_KNSIG_BPW]) >> (signum % VKI_KNSIG_BPW)))
@@ -675,22 +675,49 @@
 }
 
 
-Long VG_(atoll36) ( Char* str )
+Long VG_(atoll16) ( Char* str )
 {
    Bool neg = False;
    Long n = 0;
    if (*str == '-') { str++; neg = True; };
    while (True) {
       if (*str >= '0' && *str <= '9') {
-         n = 36*n + (Long)(*str - '0');
+         n = 16*n + (Long)(*str - '0');
       }
       else 
-      if (*str >= 'A' && *str <= 'Z') {
-         n = 36*n + (Long)((*str - 'A') + 10);
+      if (*str >= 'A' && *str <= 'F') {
+         n = 16*n + (Long)((*str - 'A') + 10);
       }
       else 
-      if (*str >= 'a' && *str <= 'z') {
-         n = 36*n + (Long)((*str - 'a') + 10);
+      if (*str >= 'a' && *str <= 'f') {
+         n = 16*n + (Long)((*str - 'a') + 10);
+      }
+      else {
+	break;
+      }
+      str++;
+   }
+   if (neg) n = -n;
+   return n;
+}
+
+Long VG_(atoll36) ( UInt base, Char* str )
+{
+   Bool neg = False;
+   Long n = 0;
+   vg_assert(base >= 2 && base <= 36);
+   if (*str == '-') { str++; neg = True; };
+   while (True) {
+      if (*str >= '0' && *str <=('9' - (10 - base))) {
+         n = base*n + (Long)(*str - '0');
+      }
+      else 
+      if (base > 10 && *str >= 'A' && *str <= ('Z' - (36 - base))) {
+         n = base*n + (Long)((*str - 'A') + 10);
+      }
+      else 
+      if (base > 10 && *str >= 'a' && *str <= ('z' - (36 - base))) {
+         n = base*n + (Long)((*str - 'a') + 10);
       }
       else {
 	break;
@@ -763,9 +790,18 @@
 }
 
 
-void VG_(strncpy) ( Char* dest, const Char* src, Int ndest )
+Char* VG_(strncpy) ( Char* dest, const Char* src, Int ndest )
 {
-   VG_(strncpy_safely)( dest, src, ndest+1 ); 
+   Int i = 0;
+   while (True) {
+      if (i >= ndest) return dest;     /* reached limit */
+      dest[i] = src[i];
+      if (src[i++] == 0) {
+         /* reached NUL;  pad rest with zeroes as required */
+         while (i < ndest) dest[i++] = 0;
+         return dest;
+      }
+   }
 }
 
 
@@ -868,16 +904,22 @@
 }
 
 
-Char* VG_(strdup) ( ArenaId aid, const Char* s )
+/* Inline just for the wrapper VG_(strdup) below */
+__inline__ Char* VG_(arena_strdup) ( ArenaId aid, const Char* s )
 {
-    Int   i;
-    Int   len = VG_(strlen)(s) + 1;
-    Char* res = VG_(malloc) (aid, len);
-    for (i = 0; i < len; i++)
-       res[i] = s[i];
-    return res;
+   Int   i;
+   Int   len = VG_(strlen)(s) + 1;
+   Char* res = VG_(arena_malloc) (aid, len);
+   for (i = 0; i < len; i++)
+      res[i] = s[i];
+   return res;
 }
 
+/* Wrapper to avoid exposing skins to ArenaId's */
+Char* VG_(strdup) ( const Char* s )
+{
+   return VG_(arena_strdup) ( VG_AR_SKIN, s ); 
+}
 
 /* ---------------------------------------------------------------------
    A simple string matching routine, purloined from Hugs98.
@@ -966,66 +1008,32 @@
    VG_(exit)(1);
 }
 
+void VG_(skin_error) ( Char* str )
+{
+   VG_(printf)("\n%s: misconfigured skin:\n   %s\n\n", VG_(needs).name, str);
+   //VG_(printf)("Please report this bug to me at: %s\n\n", VG_EMAIL_ADDR);
+   VG_(shutdown_logging)();
+   VG_(exit)(1);
+}
+
 
 /* ---------------------------------------------------------------------
    Primitive support for reading files.
    ------------------------------------------------------------------ */
 
 /* Returns -1 on failure. */
-Int VG_(open_read) ( Char* pathname )
-{
+Int VG_(open) ( const Char* pathname, Int flags, Int mode )
+{  
    Int fd;
-   /* VG_(printf)("vg_open_read %s\n", pathname ); */
 
+   /* (old comment, not sure if it still applies  NJN 2002-sep-09) */
    /* This gets a segmentation fault if pathname isn't a valid file.
       I don't know why.  It seems like the call to open is getting
       intercepted and messed with by glibc ... */
    /* fd = open( pathname, O_RDONLY ); */
    /* ... so we go direct to the horse's mouth, which seems to work
       ok: */
-   const int O_RDONLY = 0; /* See /usr/include/bits/fcntl.h */
-   fd = vg_do_syscall3(__NR_open, (UInt)pathname, O_RDONLY, 0);
-   /* VG_(printf)("result = %d\n", fd); */
-   if (VG_(is_kerror)(fd)) fd = -1;
-   return fd;
-}
-
-/* Returns -1 on failure. */
-static Int VG_(chmod_u_rw) ( Int fd )
-{
-   Int res;
-   const int O_IRUSR_IWUSR = 000600; /* See /usr/include/cpio.h */
-   res = vg_do_syscall2(__NR_fchmod, fd, O_IRUSR_IWUSR);
-   if (VG_(is_kerror)(res)) res = -1;
-   return res;
-}
- 
-/* Returns -1 on failure. */
-Int VG_(create_and_write) ( Char* pathname )
-{
-   Int fd;
-
-   const int O_CR_AND_WR_ONLY = 0101; /* See /usr/include/bits/fcntl.h */
-   fd = vg_do_syscall3(__NR_open, (UInt)pathname, O_CR_AND_WR_ONLY, 0);
-   /* VG_(printf)("result = %d\n", fd); */
-   if (VG_(is_kerror)(fd)) {
-      fd = -1;
-   } else {
-      VG_(chmod_u_rw)(fd);
-      if (VG_(is_kerror)(fd)) {
-         fd = -1;
-      }
-   }
-   return fd;
-}
- 
-/* Returns -1 on failure. */
-Int VG_(open_write) ( Char* pathname )
-{  
-   Int fd;
-
-   const int O_WRONLY_AND_TRUNC = 01001; /* See /usr/include/bits/fcntl.h */
-   fd = vg_do_syscall3(__NR_open, (UInt)pathname, O_WRONLY_AND_TRUNC, 0);
+   fd = vg_do_syscall3(__NR_open, (UInt)pathname, flags, mode);
    /* VG_(printf)("result = %d\n", fd); */
    if (VG_(is_kerror)(fd)) {
       fd = -1;
@@ -1068,7 +1076,7 @@
 /* Misc functions looking for a proper home. */
 
 /* We do getenv without libc's help by snooping around in
-   VG_(client_env) as determined at startup time. */
+   VG_(client_envp) as determined at startup time. */
 Char* VG_(getenv) ( Char* varname )
 {
    Int i, n;
@@ -1266,11 +1274,40 @@
             tot_alloc, nBytes, p, ((char*)p) + nBytes - 1, who );
       return p;
    }
-   VG_(printf)("vg_get_memory_from_mmap failed on request of %d\n", 
+   VG_(printf)("\n");
+   VG_(printf)("VG_(get_memory_from_mmap): request for %d bytes failed.\n", 
                nBytes);
-   VG_(panic)("vg_get_memory_from_mmap: out of memory!  Fatal!  Bye!\n");
+   VG_(printf)("VG_(get_memory_from_mmap): %d bytes already allocated.\n", 
+               tot_alloc);
+   VG_(printf)("\n");
+   VG_(printf)("This may mean that you have run out of swap space,\n");
+   VG_(printf)("since running programs on valgrind increases their memory\n");
+   VG_(printf)("usage at least 3 times.  You might want to use 'top'\n");
+   VG_(printf)("to determine whether you really have run out of swap.\n");
+   VG_(printf)("If so, you may be able to work around it by adding a\n");
+   VG_(printf)("temporary swap file -- this is easier than finding a\n");
+   VG_(printf)("new swap partition.  Go ask your sysadmin(s) [politely!]\n");
+   VG_(printf)("\n");
+   VG_(printf)("VG_(get_memory_from_mmap): out of memory!  Fatal!  Bye!\n");
+   VG_(printf)("\n");
+   VG_(exit)(1);
 }
 
+/* ---------------------------------------------------------------------
+   Generally useful...
+   ------------------------------------------------------------------ */
+
+Int VG_(log2) ( Int x ) 
+{
+   Int i;
+   /* Any more than 32 and we overflow anyway... */
+   for (i = 0; i < 32; i++) {
+      if (1 << i == x) return i;
+   }
+   return -1;
+}
+
+
 
 /*--------------------------------------------------------------------*/
 /*--- end                                              vg_mylibc.c ---*/
diff --git a/coregrind/vg_procselfmaps.c b/coregrind/vg_procselfmaps.c
index ceba7b3..840f34b 100644
--- a/coregrind/vg_procselfmaps.c
+++ b/coregrind/vg_procselfmaps.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 
@@ -102,7 +102,7 @@
    UChar  rr, ww, xx, pp, ch;
 
    /* Read the initial memory mapping from the /proc filesystem. */
-   fd = VG_(open_read) ( "/proc/self/maps" );
+   fd = VG_(open) ( "/proc/self/maps", VKI_O_RDONLY, 0 );
    if (fd == -1) {
       VG_(message)(Vg_UserMsg, "FATAL: can't open /proc/self/maps");
       VG_(exit)(1);
@@ -172,6 +172,7 @@
        VG_(exit)(1);
 
     read_line_ok:
+
       /* Try and find the name of the file mapped to this segment, if
          it exists. */
       while (procmap_buf[i] != '\n' && i < M_PROCMAP_BUF-1) i++;
diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c
index 0ad56b1..b65426b 100644
--- a/coregrind/vg_scheduler.c
+++ b/coregrind/vg_scheduler.c
@@ -25,13 +25,12 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
-#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
-                         VG_USERREQ__DO_LEAK_CHECK */
+#include "valgrind.h" /* for VG_USERREQ__RUNNING_ON_VALGRIND and
+                             VG_USERREQ__DISCARD_TRANSLATIONS */
 
 /* BORKAGE/ISSUES as of 29 May 02
 
@@ -126,6 +125,10 @@
          happens, this entire record is marked as no longer in use, by
          making the fd field be -1.  */
       Bool     ready; 
+
+      /* The result from SK_(pre_blocking_syscall)();  is passed to
+       * SK_(post_blocking_syscall)(). */
+      void*    pre_result;
    }
    VgWaitedOnFd;
 
@@ -149,12 +152,13 @@
 typedef UInt ThreadKey;
 
 
+UInt VG_(written_shadow_reg);
+
 /* Forwards */
 static void do_client_request ( ThreadId tid );
 static void scheduler_sanity ( void );
 static void do_pthread_cond_timedwait_TIMEOUT ( ThreadId tid );
 
-
 /* ---------------------------------------------------------------------
    Helper functions for the scheduler.
    ------------------------------------------------------------------ */
@@ -181,11 +185,12 @@
 
 
 /* For constructing error messages only: try and identify a thread
-   whose stack this address currently falls within, or return
-   VG_INVALID_THREADID if it doesn't.  A small complication is dealing
-   with any currently VG_(baseBlock)-resident thread. 
+   whose stack satisfies the predicate p, or return VG_INVALID_THREADID
+   if none do.  A small complication is dealing with any currently
+   VG_(baseBlock)-resident thread. 
 */
-ThreadId VG_(identify_stack_addr)( Addr a )
+ThreadId VG_(any_matching_thread_stack)
+              ( Bool (*p) ( Addr stack_min, Addr stack_max ))
 {
    ThreadId tid, tid_to_skip;
 
@@ -195,8 +200,8 @@
       VG_(baseBlock). */
    if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) {
       tid = vg_tid_currently_in_baseBlock;
-      if (VG_(baseBlock)[VGOFF_(m_esp)] <= a
-          && a <= VG_(threads)[tid].stack_highest_word) 
+      if ( p ( VG_(baseBlock)[VGOFF_(m_esp)], 
+               VG_(threads)[tid].stack_highest_word) )
          return tid;
       else
          tid_to_skip = tid;
@@ -205,8 +210,8 @@
    for (tid = 1; tid < VG_N_THREADS; tid++) {
       if (VG_(threads)[tid].status == VgTs_Empty) continue;
       if (tid == tid_to_skip) continue;
-      if (VG_(threads)[tid].m_esp <= a 
-          && a <= VG_(threads)[tid].stack_highest_word)
+      if ( p ( VG_(threads)[tid].m_esp,
+               VG_(threads)[tid].stack_highest_word) )
          return tid;
    }
    return VG_INVALID_THREADID;
@@ -238,14 +243,16 @@
                   VG_(threads)[i].associated_mx,
                   VG_(threads)[i].associated_cv );
       VG_(pp_ExeContext)( 
-         VG_(get_ExeContext)( False, VG_(threads)[i].m_eip, 
-                                     VG_(threads)[i].m_ebp ));
+         VG_(get_ExeContext2)( VG_(threads)[i].m_eip, VG_(threads)[i].m_ebp,
+                               VG_(threads)[i].m_esp, 
+                               VG_(threads)[i].stack_highest_word)
+      );
    }
    VG_(printf)("\n");
 }
 
 static
-void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
+void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no, void* pre_res )
 {
    Int i;
 
@@ -265,6 +272,7 @@
    vg_waiting_fds[i].tid        = tid;
    vg_waiting_fds[i].ready      = False;
    vg_waiting_fds[i].syscall_no = syscall_no;
+   vg_waiting_fds[i].pre_result = pre_res;
 }
 
 
@@ -325,7 +333,7 @@
                        ( trans_addr, trans_size );
    tte.mru_epoch  = VG_(current_epoch);
    /* Free the intermediary -- was allocated by VG_(emit_code). */
-   VG_(jitfree)( (void*)trans_addr );
+   VG_(arena_free)( VG_AR_JITTER, (void*)trans_addr );
    /* Add to trans tab and set back pointer. */
    VG_(add_to_trans_tab) ( &tte );
    /* Update stats. */
@@ -353,6 +361,11 @@
    /*NOTREACHED*/
 }
 
+ThreadState* VG_(get_ThreadState)( ThreadId tid )
+{
+   vg_assert(tid >= 0 && tid < VG_N_THREADS);
+   return & VG_(threads)[tid];
+}
 
 ThreadState* VG_(get_current_thread_state) ( void )
 {
@@ -367,6 +380,15 @@
    return vg_tid_currently_in_baseBlock;
 }
 
+ThreadId VG_(get_current_tid_1_if_root) ( void )
+{
+   if (0 == vg_tid_currently_in_baseBlock)
+      return 1;     /* root thread */
+    
+   vg_assert(VG_(is_valid_tid)(vg_tid_currently_in_baseBlock));
+   return vg_tid_currently_in_baseBlock;
+}
+
 
 /* Copy the saved state of a thread into VG_(baseBlock), ready for it
    to be run. */
@@ -390,15 +412,31 @@
    for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
       VG_(baseBlock)[VGOFF_(m_fpustate) + i] = VG_(threads)[tid].m_fpu[i];
 
-   VG_(baseBlock)[VGOFF_(sh_eax)] = VG_(threads)[tid].sh_eax;
-   VG_(baseBlock)[VGOFF_(sh_ebx)] = VG_(threads)[tid].sh_ebx;
-   VG_(baseBlock)[VGOFF_(sh_ecx)] = VG_(threads)[tid].sh_ecx;
-   VG_(baseBlock)[VGOFF_(sh_edx)] = VG_(threads)[tid].sh_edx;
-   VG_(baseBlock)[VGOFF_(sh_esi)] = VG_(threads)[tid].sh_esi;
-   VG_(baseBlock)[VGOFF_(sh_edi)] = VG_(threads)[tid].sh_edi;
-   VG_(baseBlock)[VGOFF_(sh_ebp)] = VG_(threads)[tid].sh_ebp;
-   VG_(baseBlock)[VGOFF_(sh_esp)] = VG_(threads)[tid].sh_esp;
-   VG_(baseBlock)[VGOFF_(sh_eflags)] = VG_(threads)[tid].sh_eflags;
+   if (VG_(needs).shadow_regs) {
+      VG_(baseBlock)[VGOFF_(sh_eax)] = VG_(threads)[tid].sh_eax;
+      VG_(baseBlock)[VGOFF_(sh_ebx)] = VG_(threads)[tid].sh_ebx;
+      VG_(baseBlock)[VGOFF_(sh_ecx)] = VG_(threads)[tid].sh_ecx;
+      VG_(baseBlock)[VGOFF_(sh_edx)] = VG_(threads)[tid].sh_edx;
+      VG_(baseBlock)[VGOFF_(sh_esi)] = VG_(threads)[tid].sh_esi;
+      VG_(baseBlock)[VGOFF_(sh_edi)] = VG_(threads)[tid].sh_edi;
+      VG_(baseBlock)[VGOFF_(sh_ebp)] = VG_(threads)[tid].sh_ebp;
+      VG_(baseBlock)[VGOFF_(sh_esp)] = VG_(threads)[tid].sh_esp;
+      VG_(baseBlock)[VGOFF_(sh_eflags)] = VG_(threads)[tid].sh_eflags;
+   } else {
+      /* Fields shouldn't be used -- check their values haven't changed. */
+      /* Nb: they are written to by some macros like SET_EDX, but they
+       *     should just write VG_UNUSED_SHADOW_REG_VALUE. */
+      vg_assert(
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_eax &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_ebx &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_ecx &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_edx &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_esi &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_edi &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_ebp &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_esp &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_eflags);
+   }
 
    vg_tid_currently_in_baseBlock = tid;
 }
@@ -432,15 +470,28 @@
    for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
       VG_(threads)[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
 
-   VG_(threads)[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
-   VG_(threads)[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
-   VG_(threads)[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
-   VG_(threads)[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
-   VG_(threads)[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
-   VG_(threads)[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
-   VG_(threads)[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
-   VG_(threads)[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
-   VG_(threads)[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
+   if (VG_(needs).shadow_regs) {
+      VG_(threads)[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
+      VG_(threads)[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
+      VG_(threads)[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
+      VG_(threads)[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
+      VG_(threads)[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
+      VG_(threads)[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
+      VG_(threads)[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
+      VG_(threads)[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
+      VG_(threads)[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
+   } else {
+      /* Fill with recognisable junk */
+      VG_(threads)[tid].sh_eax =
+      VG_(threads)[tid].sh_ebx =
+      VG_(threads)[tid].sh_ecx =
+      VG_(threads)[tid].sh_edx =
+      VG_(threads)[tid].sh_esi =
+      VG_(threads)[tid].sh_edi =
+      VG_(threads)[tid].sh_ebp =
+      VG_(threads)[tid].sh_esp = 
+      VG_(threads)[tid].sh_eflags = VG_UNUSED_SHADOW_REG_VALUE;
+   }
 
    /* Fill it up with junk. */
    VG_(baseBlock)[VGOFF_(m_eax)] = junk;
@@ -491,7 +542,7 @@
    vg_assert(!VG_(scheduler_jmpbuf_valid));
 
    VG_(save_thread_state) ( tid );
-   VGP_POPCC;
+   VGP_POPCC(VgpRun);
    return trc;
 }
 
@@ -566,14 +617,18 @@
 
    if (VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_1)
        || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_2) 
-       || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_3)) {
+       || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_3)
+       || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_4)) {
       /* Jolly good! */
    } else {
-      VG_(printf)("%%esp at startup = %p is not near %p, %p or %p; aborting\n", 
-                  (void*)startup_esp, 
-                  (void*)VG_STARTUP_STACK_BASE_1,
-                  (void*)VG_STARTUP_STACK_BASE_2,
-                  (void*)VG_STARTUP_STACK_BASE_3 );
+      VG_(printf)(
+         "%%esp at startup = %p is not near %p, %p, %p or %p; aborting\n", 
+         (void*)startup_esp, 
+         (void*)VG_STARTUP_STACK_BASE_1,
+         (void*)VG_STARTUP_STACK_BASE_2,
+         (void*)VG_STARTUP_STACK_BASE_3,
+         (void*)VG_STARTUP_STACK_BASE_4 
+      );
       VG_(panic)("unexpected %esp at startup");
    }
 
@@ -751,11 +806,12 @@
 static
 void sched_do_syscall ( ThreadId tid )
 {
-   UInt saved_eax;
-   UInt res, syscall_no;
-   UInt fd;
-   Bool orig_fd_blockness;
-   Char msg_buf[100];
+   UInt  saved_eax;
+   UInt  res, syscall_no;
+   UInt  fd;
+   void* pre_res;
+   Bool  orig_fd_blockness;
+   Char  msg_buf[100];
 
    vg_assert(VG_(is_valid_tid)(tid));
    vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
@@ -797,12 +853,13 @@
 
    /* Deal with error case immediately. */
    if (!fd_is_valid(fd)) {
-      VG_(message)(Vg_UserMsg, 
-         "Warning: invalid file descriptor %d in syscall %s",
-         fd, syscall_no == __NR_read ? "read()" : "write()" );
-      VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
+      if (VG_(needs).core_errors)
+         VG_(message)(Vg_UserMsg, 
+            "Warning: invalid file descriptor %d in syscall %s",
+            fd, syscall_no == __NR_read ? "read()" : "write()" );
+      pre_res = VG_(pre_known_blocking_syscall)(tid, syscall_no);
       KERNEL_DO_SYSCALL(tid, res);
-      VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
+      VG_(post_known_blocking_syscall)(tid, syscall_no, pre_res, res);
       /* We're still runnable. */
       vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
       return;
@@ -813,7 +870,7 @@
    orig_fd_blockness = fd_is_blockful(fd);
    set_fd_nonblocking(fd);
    vg_assert(!fd_is_blockful(fd));
-   VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
+   pre_res = VG_(pre_known_blocking_syscall)(tid, syscall_no);
 
    /* This trashes the thread's %eax; we have to preserve it. */
    saved_eax = VG_(threads)[tid].m_eax;
@@ -834,7 +891,7 @@
              the I/O completion -- the client is.  So don't file a 
              completion-wait entry. 
       */
-      VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
+      VG_(post_known_blocking_syscall)(tid, syscall_no, pre_res, res);
       /* We're still runnable. */
       vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
 
@@ -848,7 +905,8 @@
       /* Put this fd in a table of fds on which we are waiting for
          completion. The arguments for select() later are constructed
          from this table.  */
-      add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
+      add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */,
+                     pre_res);
       /* Deschedule thread until an I/O completion happens. */
       VG_(threads)[tid].status = VgTs_WaitFD;
       if (VG_(clo_trace_sched)) {
@@ -1042,6 +1100,7 @@
 void complete_blocked_syscalls ( void )
 {
    Int      fd, i, res, syscall_no;
+   void*    pre_res;
    ThreadId tid;
    Char     msg_buf[100];
 
@@ -1069,6 +1128,8 @@
       syscall_no = vg_waiting_fds[i].syscall_no;
       vg_assert(syscall_no == VG_(threads)[tid].m_eax);
 
+      pre_res = vg_waiting_fds[i].pre_result;
+
       /* In a rare case pertaining to writing into a pipe, write()
          will block when asked to write > 4096 bytes even though the
          kernel claims, when asked via select(), that blocking will
@@ -1086,7 +1147,7 @@
       }
 
       KERNEL_DO_SYSCALL(tid,res);
-      VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
+      VG_(post_known_blocking_syscall)(tid, syscall_no, pre_res, res);
 
       /* Reschedule. */
       VG_(threads)[tid].status = VgTs_Runnable;
@@ -1125,7 +1186,7 @@
    struct vki_timespec req;
    struct vki_timespec rem;
    req.tv_sec = 0;
-   req.tv_nsec = 20 * 1000 * 1000;
+   req.tv_nsec = 10 * 1000 * 1000;
    res = VG_(nanosleep)( &req, &rem );   
    vg_assert(res == 0 /* ok */ || res == 1 /* interrupted by signal */);
 }
@@ -1266,6 +1327,8 @@
       if (0)
          VG_(printf)("SCHED: tid %d\n", tid);
 
+      VG_TRACK( thread_run, tid );
+
       /* Figure out how many bbs to ask vg_run_innerloop to do.  Note
          that it decrements the counter before testing it for zero, so
          that if VG_(dispatch_ctr) is set to N you get at most N-1
@@ -1326,7 +1389,8 @@
                = VG_(search_transtab) ( VG_(threads)[tid].m_eip );
             if (trans_addr == (Addr)0) {
                /* Not found; we need to request a translation. */
-               create_translation_for( tid, VG_(threads)[tid].m_eip ); 
+               create_translation_for( 
+                  tid, VG_(threads)[tid].m_eip ); 
                trans_addr = VG_(search_transtab) ( VG_(threads)[tid].m_eip ); 
                if (trans_addr == (Addr)0)
                   VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
@@ -1382,8 +1446,13 @@
                If not valgrinding (cachegrinding, etc) don't do this.
                __libc_freeres does some invalid frees which crash
                the unprotected malloc/free system. */
+
+            /* If __NR_exit, remember the supplied argument. */
+            if (VG_(threads)[tid].m_eax == __NR_exit)
+               VG_(exitcode) = VG_(threads)[tid].m_ebx; /* syscall arg1 */
+
             if (VG_(threads)[tid].m_eax == __NR_exit 
-                && !VG_(clo_instrument)) {
+                && ! VG_(needs).run_libc_freeres) {
                if (VG_(clo_trace_syscalls) || VG_(clo_trace_sched)) {
                   VG_(message)(Vg_DebugMsg, 
                      "Caught __NR_exit; quitting");
@@ -1392,7 +1461,7 @@
             }
 
             if (VG_(threads)[tid].m_eax == __NR_exit) {
-               vg_assert(VG_(clo_instrument));
+               vg_assert(VG_(needs).run_libc_freeres);
                if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched)) {
                   VG_(message)(Vg_DebugMsg, 
                      "Caught __NR_exit; running __libc_freeres()");
@@ -1574,10 +1643,10 @@
    vki_ksigset_t irrelevant_sigmask;
    vg_assert(VG_(is_valid_or_empty_tid)(tid));
    vg_assert(VG_(threads)[tid].status == VgTs_Empty);
-   /* Mark its stack no-access */
-   if (VG_(clo_instrument) && tid != 1)
-      VGM_(make_noaccess)( VG_(threads)[tid].stack_base,
-                           VG_(threads)[tid].stack_size );
+   /* Its stack is now off-limits */
+   VG_TRACK( die_mem_stack, VG_(threads)[tid].stack_base,
+                            VG_(threads)[tid].stack_size );
+
    /* Forget about any pending signals directed specifically at this
       thread, and get rid of signal handlers specifically arranged for
       this thread. */
@@ -1620,17 +1689,14 @@
       thread_return = VG_(threads)[jnr].joiner_thread_return;
       if (thread_return != NULL) {
          /* CHECK thread_return writable */
-         if (VG_(clo_instrument)
-             && !VGM_(check_writable)( (Addr)thread_return, 
-                                       sizeof(void*), NULL))
-            VG_(record_pthread_err)( jnr, 
-               "pthread_join: thread_return points to invalid location");
+         VG_TRACK( pre_mem_write, Vg_CorePThread, &VG_(threads)[jnr],
+                                  "pthread_join: thread_return",
+                                  (Addr)thread_return, sizeof(void*));
 
          *thread_return = VG_(threads)[jee].joinee_retval;
          /* Not really right, since it makes the thread's return value
             appear to be defined even if it isn't. */
-         if (VG_(clo_instrument))
-            VGM_(make_readable)( (Addr)thread_return, sizeof(void*) );
+         VG_TRACK( post_mem_write, (Addr)thread_return, sizeof(void*) );
       }
 
       /* Joinee is discarded */
@@ -1716,8 +1782,8 @@
    }
    sp--;
    *cu = VG_(threads)[tid].custack[sp];
-   if (VG_(clo_instrument))
-      VGM_(make_readable)( (Addr)cu, sizeof(CleanupEntry) );
+   // JJJ: no corresponding pre_mem_write check??
+   VG_TRACK( post_mem_write, (Addr)cu, sizeof(CleanupEntry) );
    VG_(threads)[tid].custack_used = sp;
    SET_EDX(tid, 0);
 }
@@ -1884,7 +1950,7 @@
             "set_cancelpend for invalid tid %d", cee);
          print_sched_event(tid, msg_buf);
       }
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_cancel: target thread does not exist, or invalid");
       SET_EDX(tid, -VKI_ESRCH);
       return;
@@ -1919,7 +1985,7 @@
    vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
 
    if (jee == tid) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_join: attempt to join to self");
       SET_EDX(tid, EDEADLK); /* libc constant, not a kernel one */
       VG_(threads)[tid].status = VgTs_Runnable;
@@ -1935,7 +2001,7 @@
        || jee >= VG_N_THREADS
        || VG_(threads)[jee].status == VgTs_Empty) {
       /* Invalid thread to join to. */
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_join: target thread does not exist, or invalid");
       SET_EDX(tid, EINVAL);
       VG_(threads)[tid].status = VgTs_Runnable;
@@ -1948,7 +2014,7 @@
       if (VG_(threads)[i].status == VgTs_WaitJoinee
           && VG_(threads)[i].joiner_jee_tid == jee) {
          /* Someone already did join on this thread */
-         VG_(record_pthread_err)( tid, 
+         VG_(record_pthread_error)( tid, 
             "pthread_join: another thread already "
             "in join-wait for target thread");
          SET_EDX(tid, EINVAL);
@@ -2074,33 +2140,39 @@
                      - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4  ??? */;
    }
 
-   VG_(threads)[tid].m_esp 
-      = VG_(threads)[tid].stack_base 
-        + VG_(threads)[tid].stack_size
-        - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
+   /* Having got memory to hold the thread's stack:
+      - set %esp as base + size
+      - mark everything below %esp inaccessible
+      - mark redzone at stack end inaccessible
+    */
+   VG_(threads)[tid].m_esp = VG_(threads)[tid].stack_base 
+                           + VG_(threads)[tid].stack_size
+                           - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
 
-   if (VG_(clo_instrument))
-      VGM_(make_noaccess)( VG_(threads)[tid].m_esp, 
-                           VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
+   VG_TRACK ( die_mem_stack, VG_(threads)[tid].stack_base, 
+                           + new_stk_szb - VG_AR_CLIENT_STACKBASE_REDZONE_SZB);
+   VG_TRACK ( ban_mem_stack, VG_(threads)[tid].m_esp, 
+                             VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
    
-   /* push arg */
-   VG_(threads)[tid].m_esp -= 4;
-   * (UInt*)(VG_(threads)[tid].m_esp) = (UInt)arg;
-
-   /* push (bogus) return address */
-   VG_(threads)[tid].m_esp -= 4;
+   /* push two args */
+   VG_(threads)[tid].m_esp -= 8;
+   VG_TRACK ( new_mem_stack, (Addr)VG_(threads)[tid].m_esp, 2 * 4 );
+   VG_TRACK ( pre_mem_write, Vg_CorePThread, & VG_(threads)[tid], 
+                             "new thread: stack",
+                             (Addr)VG_(threads)[tid].m_esp, 2 * 4 );
+ 
+   /* push arg and (bogus) return address */
+   * (UInt*)(VG_(threads)[tid].m_esp+4) = (UInt)arg;
    * (UInt*)(VG_(threads)[tid].m_esp) 
       = (UInt)&do__apply_in_new_thread_bogusRA;
 
-   if (VG_(clo_instrument))
-      VGM_(make_readable)( VG_(threads)[tid].m_esp, 2 * 4 );
+   VG_TRACK ( post_mem_write, VG_(threads)[tid].m_esp, 2 * 4 );
 
    /* this is where we start */
    VG_(threads)[tid].m_eip = (UInt)fn;
 
    if (VG_(clo_trace_sched)) {
-      VG_(sprintf)(msg_buf,
-         "new thread, created by %d", parent_tid );
+      VG_(sprintf)(msg_buf, "new thread, created by %d", parent_tid );
       print_sched_event(tid, msg_buf);
    }
 
@@ -2230,7 +2302,7 @@
 
    /* POSIX doesn't mandate this, but for sanity ... */
    if (mutex == NULL) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_mutex_lock/trylock: mutex is NULL");
       SET_EDX(tid, EINVAL);
       return;
@@ -2250,7 +2322,7 @@
          if (mutex->__m_count >= 0) break;
          /* else fall thru */
       default:
-         VG_(record_pthread_err)( tid, 
+         VG_(record_pthread_error)( tid, 
             "pthread_mutex_lock/trylock: mutex is invalid");
          SET_EDX(tid, EINVAL);
          return;
@@ -2304,6 +2376,9 @@
       /* We get it! [for the first time]. */
       mutex->__m_count = 1;
       mutex->__m_owner = (_pthread_descr)tid;
+
+      VG_TRACK( post_mutex_lock, tid, mutex);
+
       /* return 0 (success). */
       SET_EDX(tid, 0);
    }
@@ -2327,7 +2402,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (mutex == NULL) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_mutex_unlock: mutex is NULL");
       SET_EDX(tid, EINVAL);
       return;
@@ -2347,7 +2422,7 @@
          if (mutex->__m_count >= 0) break;
          /* else fall thru */
       default:
-         VG_(record_pthread_err)( tid, 
+         VG_(record_pthread_error)( tid, 
             "pthread_mutex_unlock: mutex is invalid");
          SET_EDX(tid, EINVAL);
          return;
@@ -2356,7 +2431,7 @@
    /* Barf if we don't currently hold the mutex. */
    if (mutex->__m_count == 0) {
       /* nobody holds it */
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_mutex_unlock: mutex is not locked");
       SET_EDX(tid, EPERM);
       return;
@@ -2364,7 +2439,7 @@
 
    if ((ThreadId)mutex->__m_owner != tid) {
       /* we don't hold it */
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_mutex_unlock: mutex is locked by a different thread");
       SET_EDX(tid, EPERM);
       return;
@@ -2384,6 +2459,8 @@
    vg_assert(mutex->__m_count == 1);
    vg_assert((ThreadId)mutex->__m_owner == tid);
 
+   VG_TRACK( post_mutex_unlock, tid, mutex);
+
    /* Release at max one thread waiting on this mutex. */
    release_one_thread_waiting_on_mutex ( mutex, "pthread_mutex_lock" );
 
@@ -2561,7 +2638,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (mutex == NULL || cond == NULL) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_cond_wait/timedwait: cond or mutex is NULL");
       SET_EDX(tid, EINVAL);
       return;
@@ -2581,7 +2658,7 @@
          if (mutex->__m_count >= 0) break;
          /* else fall thru */
       default:
-         VG_(record_pthread_err)( tid, 
+         VG_(record_pthread_error)( tid, 
             "pthread_cond_wait/timedwait: mutex is invalid");
          SET_EDX(tid, EINVAL);
          return;
@@ -2590,7 +2667,7 @@
    /* Barf if we don't currently hold the mutex. */
    if (mutex->__m_count == 0 /* nobody holds it */
        || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
-         VG_(record_pthread_err)( tid, 
+         VG_(record_pthread_error)( tid, 
             "pthread_cond_wait/timedwait: mutex is unlocked "
             "or is locked but not owned by thread");
       SET_EDX(tid, EINVAL);
@@ -2636,7 +2713,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (cond == NULL) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_cond_signal/broadcast: cond is NULL");
       SET_EDX(tid, EINVAL);
       return;
@@ -2699,14 +2776,11 @@
    vg_thread_keys[i].destructor = destructor;
 
    /* check key for addressibility */
-   if (VG_(clo_instrument)
-       && !VGM_(check_writable)( (Addr)key, 
-                                 sizeof(pthread_key_t), NULL))
-      VG_(record_pthread_err)( tid, 
-         "pthread_key_create: key points to invalid location");
+   VG_TRACK( pre_mem_write, Vg_CorePThread, &VG_(threads)[tid], 
+                            "pthread_key_create: key",
+                            (Addr)key, sizeof(pthread_key_t));
    *key = i;
-   if (VG_(clo_instrument))
-      VGM_(make_readable)( (Addr)key, sizeof(pthread_key_t) );
+   VG_TRACK( post_mem_write, (Addr)key, sizeof(pthread_key_t) );
 
    SET_EDX(tid, 0);
 }
@@ -2726,7 +2800,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
    
    if (!is_valid_key(key)) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_key_delete: key is invalid");
       SET_EDX(tid, EINVAL);
       return;
@@ -2760,7 +2834,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (!is_valid_key(key)) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_getspecific: key is invalid");
       SET_EDX(tid, (UInt)NULL);
       return;
@@ -2786,7 +2860,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (!is_valid_key(key)) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_setspecific: key is invalid");
       SET_EDX(tid, EINVAL);
       return;
@@ -2814,14 +2888,16 @@
    }
    vg_assert(VG_(is_valid_tid)(tid));
    vg_assert(key >= 0 && key < VG_N_THREAD_KEYS);
+
+   // JJJ: no pre_mem_write check??
+   
    if (!vg_thread_keys[key].inuse) {
       SET_EDX(tid, -1);
       return;
    }
    cu->fn = vg_thread_keys[key].destructor;
    cu->arg = VG_(threads)[tid].specifics[key];
-   if (VG_(clo_instrument))
-      VGM_(make_readable)( (Addr)cu, sizeof(CleanupEntry) );
+   VG_TRACK( post_mem_write, (Addr)cu, sizeof(CleanupEntry) );
    SET_EDX(tid, 0);
 }
 
@@ -2852,27 +2928,19 @@
    vg_assert(VG_(is_valid_tid)(tid) 
              && VG_(threads)[tid].status == VgTs_Runnable);
 
-   if (VG_(clo_instrument)) {
-      /* check newmask/oldmask are addressible/defined */
-      if (newmask
-          && !VGM_(check_readable)( (Addr)newmask, 
-                                    sizeof(vki_ksigset_t), NULL))
-         VG_(record_pthread_err)( tid, 
-            "pthread_sigmask: newmask contains "
-            "unaddressible or undefined bytes");
-      if (oldmask
-          && !VGM_(check_writable)( (Addr)oldmask, 
-                                    sizeof(vki_ksigset_t), NULL))
-         VG_(record_pthread_err)( tid, 
-            "pthread_sigmask: oldmask contains "
-            "unaddressible bytes");
-   }
+   if (newmask)
+      VG_TRACK( pre_mem_read, Vg_CorePThread, &VG_(threads)[tid],
+                              "pthread_sigmask: newmask",
+                              (Addr)newmask, sizeof(vki_ksigset_t));
+   if (oldmask)
+      VG_TRACK( pre_mem_write, Vg_CorePThread, &VG_(threads)[tid],
+                               "pthread_sigmask: oldmask",
+                               (Addr)oldmask, sizeof(vki_ksigset_t));
 
    VG_(do_pthread_sigmask_SCSS_upd) ( tid, vki_how, newmask, oldmask );
 
-   if (oldmask && VG_(clo_instrument)) {
-      VGM_(make_readable)( (Addr)oldmask, sizeof(vki_ksigset_t) );
-   }
+   if (oldmask)
+      VG_TRACK( post_mem_write, (Addr)oldmask, sizeof(vki_ksigset_t) );
 
    /* Success. */
    SET_EDX(tid, 0);
@@ -2924,7 +2992,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (!VG_(is_valid_tid)(thread)) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_kill: invalid target thread");
       SET_EDX(tid, -VKI_ESRCH);
       return;
@@ -2994,18 +3062,11 @@
 
    vg_assert(VG_(is_valid_tid)(tid) 
              && VG_(threads)[tid].status == VgTs_Runnable);
+   VG_TRACK( pre_mem_read, Vg_CorePThread, &VG_(threads)[tid],
+                           "pthread_atfork: prepare/parent/child",
+                           (Addr)fh, sizeof(ForkHandlerEntry));
 
-   if (VG_(clo_instrument)) {
-      /* check fh is addressible/defined */
-      if (!VGM_(check_readable)( (Addr)fh,
-                                 sizeof(ForkHandlerEntry), NULL)) {
-         VG_(record_pthread_err)( tid, 
-            "pthread_atfork: prepare/parent/child contains "
-            "unaddressible or undefined bytes");
-      }
-   }
-
-   if (n < 0 && n >= VG_N_FORKHANDLERSTACK) {
+   if (n < 0 || n >= VG_N_FORKHANDLERSTACK) {
       SET_EDX(tid, -1);
       return;
    } 
@@ -3027,18 +3088,11 @@
 
    vg_assert(VG_(is_valid_tid)(tid) 
              && VG_(threads)[tid].status == VgTs_Runnable);
+   VG_TRACK( pre_mem_write, Vg_CorePThread, &VG_(threads)[tid],
+                            "fork: prepare/parent/child",
+                            (Addr)fh, sizeof(ForkHandlerEntry));
 
-   if (VG_(clo_instrument)) {
-      /* check fh is addressible/defined */
-      if (!VGM_(check_writable)( (Addr)fh,
-                                 sizeof(ForkHandlerEntry), NULL)) {
-         VG_(record_pthread_err)( tid, 
-            "fork: prepare/parent/child contains "
-            "unaddressible bytes");
-      }
-   }
-
-   if (n < 0 && n >= VG_N_FORKHANDLERSTACK) {
+   if (n < 0 || n >= VG_N_FORKHANDLERSTACK) {
       SET_EDX(tid, -1);
       return;
    } 
@@ -3046,9 +3100,7 @@
    *fh = vg_fhstack[n];
    SET_EDX(tid, 0);
 
-   if (VG_(clo_instrument)) {
-      VGM_(make_readable)( (Addr)fh, sizeof(ForkHandlerEntry) );
-   }
+   VG_TRACK( post_mem_write, (Addr)fh, sizeof(ForkHandlerEntry) );
 }
 
 
@@ -3063,9 +3115,9 @@
 static
 void do_client_request ( ThreadId tid )
 {
-#  define RETURN_WITH(vvv)                        \
-       { tst->m_edx = (vvv);                      \
-         tst->sh_edx = VGM_WORD_VALID;            \
+#  define RETURN_WITH(vvv)                      \
+       { tst->m_edx = (vvv);                    \
+         tst->sh_edx = VG_(written_shadow_reg); \
        }
 
    ThreadState* tst    = &VG_(threads)[tid];
@@ -3289,7 +3341,7 @@
          break;
 
       case VG_USERREQ__PTHREAD_ERROR:
-         VG_(record_pthread_err)( tid, (Char*)(arg[1]) );
+         VG_(record_pthread_error)( tid, (Char*)(arg[1]) );
          SET_EDX(tid, 0);
          break;
 
@@ -3311,30 +3363,40 @@
                                      (ForkHandlerEntry*)(arg[2]) );
          break;
 
-      case VG_USERREQ__MAKE_NOACCESS:
-      case VG_USERREQ__MAKE_WRITABLE:
-      case VG_USERREQ__MAKE_READABLE:
-      case VG_USERREQ__DISCARD:
-      case VG_USERREQ__CHECK_WRITABLE:
-      case VG_USERREQ__CHECK_READABLE:
-      case VG_USERREQ__MAKE_NOACCESS_STACK:
-      case VG_USERREQ__DO_LEAK_CHECK:
-      case VG_USERREQ__DISCARD_TRANSLATIONS:
-         SET_EDX(
-            tid, 
-            VG_(handle_client_request) ( &VG_(threads)[tid], arg )
-         );
-	 break;
-
       case VG_USERREQ__SIGNAL_RETURNS: 
          handle_signal_return(tid);
 	 break;
 
+      /* Requests from the client program */
+
+      case VG_USERREQ__DISCARD_TRANSLATIONS:
+         if (VG_(clo_verbosity) > 2)
+            VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
+                         " addr %p,  len %d\n",
+                         (void*)arg[1], arg[2] );
+
+         VG_(invalidate_translations)( arg[1], arg[2] );
+
+         SET_EDX( tid, 0 );     /* return value is meaningless */
+	 break;
+
       default:
-         VG_(printf)("panic'd on client request = 0x%x\n", arg[0] );
-         VG_(panic)("do_client_request: "
-                    "unknown request");
-         /*NOTREACHED*/
+         if (VG_(needs).client_requests) {
+            if (VG_(clo_verbosity) > 2)
+               VG_(printf)("client request: code %d,  addr %p,  len %d\n",
+                           arg[0], (void*)arg[1], arg[2] );
+
+            SET_EDX(tid,
+                    SK_(handle_client_request) ( &VG_(threads)[tid], arg )
+            );
+         } else {
+            VG_(printf)("\nError:\n"
+                        "  unhandled client request: 0x%x.  Perhaps\n" 
+                        "  VG_(needs).client_requests should be set?\n",
+                        arg[0]);
+            VG_(panic)("do_client_request: unknown request");
+            /*NOTREACHED*/
+         }
          break;
    }
 
@@ -3392,7 +3454,7 @@
              && stack_used 
                 >= (VG_PTHREAD_STACK_MIN - 1000 /* paranoia */)) {
             VG_(message)(Vg_UserMsg,
-               "Warning: STACK OVERFLOW: "
+               "Error: STACK OVERFLOW: "
                "thread %d: stack used %d, available %d", 
                i, stack_used, VG_PTHREAD_STACK_MIN );
             VG_(message)(Vg_UserMsg,
diff --git a/coregrind/vg_signals.c b/coregrind/vg_signals.c
index f58ec11..f849544 100644
--- a/coregrind/vg_signals.c
+++ b/coregrind/vg_signals.c
@@ -26,12 +26,11 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 
 #include "vg_include.h"
-#include "vg_constants.h"
 #include "vg_unsafe.h"
 #include "valgrind.h"  /* for VALGRIND_MAGIC_SEQUENCE */
 
@@ -598,16 +597,18 @@
    return;
 
   bad_signo:
-   VG_(message)(Vg_UserMsg,
-                "Warning: bad signal number %d in __NR_sigaction.", 
-                signo);
+   if (VG_(needs).core_errors)
+      VG_(message)(Vg_UserMsg,
+                   "Warning: bad signal number %d in __NR_sigaction.", 
+                   signo);
    SET_EAX(tid, -VKI_EINVAL);
    return;
 
   bad_sigkill_or_sigstop:
-   VG_(message)(Vg_UserMsg,
-      "Warning: attempt to set %s handler in __NR_sigaction.", 
-      signo == VKI_SIGKILL ? "SIGKILL" : "SIGSTOP" );
+   if (VG_(needs).core_errors)
+      VG_(message)(Vg_UserMsg,
+         "Warning: attempt to set %s handler in __NR_sigaction.", 
+         signo == VKI_SIGKILL ? "SIGKILL" : "SIGSTOP" );
 
    SET_EAX(tid, -VKI_EINVAL);
    return;
@@ -939,11 +940,19 @@
    esp = esp_top_of_frame;
    esp -= sizeof(VgSigFrame);
    frame = (VgSigFrame*)esp;
+
+   /* For tracking memory events, indicate the entire frame has been
+    * allocated, but pretend that only the first four words are written */
+   VG_TRACK( new_mem_stack_signal, (Addr)frame, sizeof(VgSigFrame) );
+
    /* Assert that the frame is placed correctly. */
    vg_assert( (sizeof(VgSigFrame) & 0x3) == 0 );
    vg_assert( ((Char*)(&frame->magicE)) + sizeof(UInt) 
               == ((Char*)(esp_top_of_frame)) );
 
+   /* retaddr, sigNo, psigInfo, puContext fields are to be written */
+   VG_TRACK( pre_mem_write, Vg_CoreSignal, tst, "signal handler frame", 
+                            (Addr)esp, 16 );
    frame->retaddr    = (UInt)(&VG_(signalreturn_bogusRA));
    frame->sigNo      = sigNo;
    frame->psigInfo   = (Addr)NULL;
@@ -974,14 +983,9 @@
    /* This thread needs to be marked runnable, but we leave that the
       caller to do. */
 
-   /* Make retaddr, sigNo, psigInfo, puContext fields readable -- at
-      0(%ESP) .. 12(%ESP) */
-   if (VG_(clo_instrument)) {
-      VGM_(make_readable) ( ((Addr)esp)+0,  4 );
-      VGM_(make_readable) ( ((Addr)esp)+4,  4 );
-      VGM_(make_readable) ( ((Addr)esp)+8,  4 );
-      VGM_(make_readable) ( ((Addr)esp)+12, 4 );
-   }
+   /* retaddr, sigNo, psigInfo, puContext fields have been written -- 
+      at 0(%ESP) .. 12(%ESP) */
+   VG_TRACK( post_mem_write, (Addr)esp, 16 );
 
    /* 
    VG_(printf)("pushed signal frame; %%ESP now = %p, next %%EBP = %p\n", 
@@ -1021,8 +1025,7 @@
       tst->m_fpu[i] = frame->fpustate[i];
 
    /* Mark the frame structure as nonaccessible. */
-   if (VG_(clo_instrument))
-      VGM_(make_noaccess)( (Addr)frame, sizeof(VgSigFrame) );
+   VG_TRACK( die_mem_stack_signal, (Addr)frame, sizeof(VgSigFrame) );
 
    /* Restore machine state from the saved context. */
    tst->m_eax     = frame->eax;
@@ -1140,9 +1143,7 @@
          sigwait_args = (UInt*)(tst->m_eax);
          if (NULL != (UInt*)(sigwait_args[2])) {
             *(Int*)(sigwait_args[2]) = sigNo;
-            if (VG_(clo_instrument))
-               VGM_(make_readable)( (Addr)(sigwait_args[2]), 
-                                    sizeof(UInt));
+            VG_TRACK( post_mem_write, (Addr)sigwait_args[2], sizeof(UInt));
          }
 	 SET_EDX(tid, 0);
          tst->status = VgTs_Runnable;
@@ -1194,7 +1195,11 @@
             vg_dcss.dcss_sigpending[sigNo] = False;
             vg_dcss.dcss_destthread[sigNo] = VG_INVALID_THREADID;
             continue; /* for (sigNo = 1; ...) loop */
-	 }
+	 } else if (VG_(ksigismember)(&(tst->sig_mask), sigNo)) {
+            /* signal blocked in specific thread, so we can't
+               deliver it just now */
+            continue; /* for (sigNo = 1; ...) loop */
+         }
       } else {
          /* not directed to a specific thread, so search for a
             suitable candidate */
diff --git a/coregrind/vg_startup.S b/coregrind/vg_startup.S
index 63ee590..d6c202e 100644
--- a/coregrind/vg_startup.S
+++ b/coregrind/vg_startup.S
@@ -26,7 +26,7 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.
 
-  The GNU General Public License is contained in the file LICENSE.
+  The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_constants.h"
diff --git a/coregrind/vg_symtab2.c b/coregrind/vg_symtab2.c
index 8330794..728f228 100644
--- a/coregrind/vg_symtab2.c
+++ b/coregrind/vg_symtab2.c
@@ -25,7 +25,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -37,17 +37,12 @@
 /* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
    dlopen()ed libraries, which is something that KDE3 does a lot.
 
-   Stabs reader greatly improved by Nick Nethercode, Apr 02.
-
-   16 May 02: when notified about munmap, return a Bool indicating
-   whether or not the area being munmapped had executable permissions.
-   This is then used to determine whether or not
-   VG_(invalid_translations) should be called for that area.  In order
-   that this work even if --instrument=no, in this case we still keep
-   track of the mapped executable segments, but do not load any debug
-   info or symbols.
+   Stabs reader greatly improved by Nick Nethercote, Apr 02.
 */
 
+/* Set to True when first debug info search is performed */
+Bool VG_(using_debug_info) = False;
+
 /*------------------------------------------------------------*/
 /*--- Structs n stuff                                      ---*/
 /*------------------------------------------------------------*/
@@ -126,23 +121,14 @@
    SegInfo;
 
 
-/* -- debug helper -- */
-static void ppSegInfo ( SegInfo* si )
-{
-   VG_(printf)("name: %s\n"
-               "start %p, size %d, foffset %d\n",
-               si->filename?si->filename : (UChar*)"NULL",
-               si->start, si->size, si->foffset );
-}
-
 static void freeSegInfo ( SegInfo* si )
 {
    vg_assert(si != NULL);
-   if (si->filename) VG_(free)(VG_AR_SYMTAB, si->filename);
-   if (si->symtab) VG_(free)(VG_AR_SYMTAB, si->symtab);
-   if (si->loctab) VG_(free)(VG_AR_SYMTAB, si->loctab);
-   if (si->strtab) VG_(free)(VG_AR_SYMTAB, si->strtab);
-   VG_(free)(VG_AR_SYMTAB, si);
+   if (si->filename) VG_(arena_free)(VG_AR_SYMTAB, si->filename);
+   if (si->symtab)   VG_(arena_free)(VG_AR_SYMTAB, si->symtab);
+   if (si->loctab)   VG_(arena_free)(VG_AR_SYMTAB, si->loctab);
+   if (si->strtab)   VG_(arena_free)(VG_AR_SYMTAB, si->strtab);
+   VG_(arena_free)(VG_AR_SYMTAB, si);
 }
 
 
@@ -151,23 +137,54 @@
 /*------------------------------------------------------------*/
 
 /* Add a str to the string table, including terminating zero, and
-   return offset of the string in vg_strtab. */
+   return offset of the string in vg_strtab.  Unless it's been seen
+   recently, in which case we find the old index and return that.
+   This avoids the most egregious duplications. */
 
 static __inline__
 Int addStr ( SegInfo* si, Char* str )
 {
+#  define EMPTY    0xffffffff
+#  define NN       5
+   
+   /* prevN[0] has the most recent, prevN[NN-1] the least recent */
+   static UInt     prevN[] = { EMPTY, EMPTY, EMPTY, EMPTY, EMPTY };
+   static SegInfo* curr_si = NULL;
+
    Char* new_tab;
    Int   new_sz, i, space_needed;
-   
+
+   /* Avoid gratuitous duplication:  if we saw `str' within the last NN,
+    * within this segment, return that index.  Saves about 200KB in glibc,
+    * extra time taken is too small to measure.  --NJN 2002-Aug-30 */
+   if (curr_si == si) {
+      for (i = NN-1; i >= 0; i--) {
+         if (EMPTY != prevN[i] &&
+             (0 == VG_(strcmp)(str, &si->strtab[prevN[i]]))) {
+            return prevN[i];
+         }
+      }
+   } else {
+      /* New segment */
+      curr_si = si;
+      for (i = 0; i < 5; i++) prevN[i] = EMPTY;
+   }
+   /* Shuffle prevous ones along, put new one in. */
+   for (i = NN-1; i > 0; i--) prevN[i] = prevN[i-1];
+   prevN[0] = si->strtab_used;
+
+#  undef EMPTY
+
    space_needed = 1 + VG_(strlen)(str);
+
    if (si->strtab_used + space_needed > si->strtab_size) {
       new_sz = 2 * si->strtab_size;
       if (new_sz == 0) new_sz = 5000;
-      new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz);
+      new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz);
       if (si->strtab != NULL) {
          for (i = 0; i < si->strtab_used; i++)
             new_tab[i] = si->strtab[i];
-         VG_(free)(VG_AR_SYMTAB, si->strtab);
+         VG_(arena_free)(VG_AR_SYMTAB, si->strtab);
       }
       si->strtab      = new_tab;
       si->strtab_size = new_sz;
@@ -178,6 +195,7 @@
 
    si->strtab_used += space_needed;
    vg_assert(si->strtab_used <= si->strtab_size);
+
    return si->strtab_used - space_needed;
 }
 
@@ -195,11 +213,11 @@
    if (si->symtab_used == si->symtab_size) {
       new_sz = 2 * si->symtab_size;
       if (new_sz == 0) new_sz = 500;
-      new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
+      new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
       if (si->symtab != NULL) {
          for (i = 0; i < si->symtab_used; i++)
             new_tab[i] = si->symtab[i];
-         VG_(free)(VG_AR_SYMTAB, si->symtab);
+         VG_(arena_free)(VG_AR_SYMTAB, si->symtab);
       }
       si->symtab = new_tab;
       si->symtab_size = new_sz;
@@ -224,11 +242,11 @@
    if (si->loctab_used == si->loctab_size) {
       new_sz = 2 * si->loctab_size;
       if (new_sz == 0) new_sz = 500;
-      new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
+      new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
       if (si->loctab != NULL) {
          for (i = 0; i < si->loctab_used; i++)
             new_tab[i] = si->loctab[i];
-         VG_(free)(VG_AR_SYMTAB, si->loctab);
+         VG_(arena_free)(VG_AR_SYMTAB, si->loctab);
       }
       si->loctab = new_tab;
       si->loctab_size = new_sz;
@@ -732,8 +750,7 @@
                      next_addr = (UInt)stab[i+1].n_value;
                      break;
 
-                  /* Boring one: skip, look for something more
-                     useful. */
+                  /* Boring one: skip, look for something more useful. */
                   case N_RSYM: case N_LSYM: case N_LBRAC: case N_RBRAC: 
                   case N_STSYM: case N_LCSYM: case N_GSYM:
                      i++;
@@ -1006,10 +1023,10 @@
       ++ state_machine_regs.last_file_entry;
       name = data;
       if (*fnames == NULL)
-        *fnames = VG_(malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
+        *fnames = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
       else
-        *fnames = VG_(realloc)(
-                     VG_AR_SYMTAB, *fnames, 
+        *fnames = VG_(arena_realloc)(
+                     VG_AR_SYMTAB, *fnames, /*alignment*/4,
                      sizeof(UInt) 
                         * (state_machine_regs.last_file_entry + 1));
       (*fnames)[state_machine_regs.last_file_entry] = addStr (si,name);
@@ -1136,9 +1153,9 @@
 		semantics, we need to malloc the first time. */
 
              if (fnames == NULL)
-               fnames = VG_(malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
+               fnames = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
              else
-               fnames = VG_(realloc)(VG_AR_SYMTAB, fnames, 
+               fnames = VG_(arena_realloc)(VG_AR_SYMTAB, fnames, /*alignment*/4,
                            sizeof(UInt) 
                               * (state_machine_regs.last_file_entry + 1));
              data += VG_(strlen) ((Char *) data) + 1;
@@ -1281,7 +1298,7 @@
              break;
            }
        }
-      VG_(free)(VG_AR_SYMTAB, fnames);
+      VG_(arena_free)(VG_AR_SYMTAB, fnames);
       fnames = NULL;
     }
 }
@@ -1327,7 +1344,7 @@
    }
    n_oimage = stat_buf.st_size;
 
-   fd = VG_(open_read)(si->filename);
+   fd = VG_(open)(si->filename, VKI_O_RDONLY, 0);
    if (fd == -1) {
       vg_symerr("Can't open .so/.exe to read symbols?!");
       return;
@@ -1650,8 +1667,7 @@
 static SegInfo* segInfo = NULL;
 
 
-static
-void read_symtab_callback ( 
+void VG_(read_symtab_callback) ( 
         Addr start, UInt size, 
         Char rr, Char ww, Char xx, 
         UInt foffset, UChar* filename )
@@ -1686,14 +1702,14 @@
    }
 
    /* Get the record initialised right. */
-   si = VG_(malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
+   si = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
    si->next = segInfo;
    segInfo = si;
 
    si->start    = start;
    si->size     = size;
    si->foffset  = foffset;
-   si->filename = VG_(malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
+   si->filename = VG_(arena_malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
    VG_(strcpy)(si->filename, filename);
 
    si->symtab = NULL;
@@ -1704,15 +1720,12 @@
    si->strtab_size = si->strtab_used = 0;
 
    /* Kludge ... */
-   si->offset 
-      = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
+   si->offset = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
 
    /* And actually fill it up. */
-   if (VG_(clo_instrument) || VG_(clo_cachesim)) {
-      vg_read_lib_symbols ( si );
-      canonicaliseSymtab ( si );
-      canonicaliseLoctab ( si );
-   }
+   vg_read_lib_symbols ( si );
+   canonicaliseSymtab ( si );
+   canonicaliseLoctab ( si );
 }
 
 
@@ -1724,57 +1737,29 @@
    libraries as they are dlopen'd.  Conversely, when the client does
    munmap(), vg_symtab_notify_munmap() throws away any symbol tables
    which happen to correspond to the munmap()d area.  */
-void VG_(read_symbols) ( void )
+void VG_(maybe_read_symbols) ( void )
 {
-   VG_(read_procselfmaps) ( read_symtab_callback );
+   if (!VG_(using_debug_info))
+      return;
 
-   /* Do a sanity check on the symbol tables: ensure that the address
-      space pieces they cover do not overlap (otherwise we are severely
-      hosed).  This is a quadratic algorithm, but there shouldn't be
-      many of them.  
-   */
-   { SegInfo *si, *si2;
-     for (si = segInfo; si != NULL; si = si->next) {
-        /* Check no overlap between *si and those in the rest of the
-           list. */
-        for (si2 = si->next; si2 != NULL; si2 = si2->next) {
-           Addr lo = si->start;
-           Addr hi = si->start + si->size - 1;
-           Addr lo2 = si2->start;
-           Addr hi2 = si2->start + si2->size - 1;
-           Bool overlap;
-           vg_assert(lo < hi);
-	   vg_assert(lo2 < hi2);
-           /* the main assertion */
-           overlap = (lo <= lo2 && lo2 <= hi)
-                      || (lo <= hi2 && hi2 <= hi);
-	   if (overlap) {
-              VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
-              ppSegInfo ( si );
-              ppSegInfo ( si2 );
-              VG_(printf)("\n\n"); 
-              vg_assert(! overlap);
-	   }
-        }
-     }
-   }    
+   VGP_PUSHCC(VgpReadSyms);
+      VG_(read_procselfmaps) ( VG_(read_symtab_callback) );
+   VGP_POPCC(VgpReadSyms);
 }
 
-
 /* When an munmap() call happens, check to see whether it corresponds
    to a segment for a .so, and if so discard the relevant SegInfo.
    This might not be a very clever idea from the point of view of
    accuracy of error messages, but we need to do it in order to
    maintain the no-overlapping invariant.
-
-   16 May 02: Returns a Bool indicating whether or not the discarded
-   range falls inside a known executable segment.  See comment at top
-   of file for why.
 */
-Bool VG_(symtab_notify_munmap) ( Addr start, UInt length )
+void VG_(maybe_unload_symbols) ( Addr start, UInt length )
 {
    SegInfo *prev, *curr;
 
+   if (!VG_(using_debug_info))
+      return;
+
    prev = NULL;
    curr = segInfo;
    while (True) {
@@ -1784,7 +1769,7 @@
       curr = curr->next;
    }
    if (curr == NULL) 
-      return False;
+      return;
 
    VG_(message)(Vg_UserMsg, 
                 "discard syms in %s due to munmap()", 
@@ -1799,7 +1784,7 @@
    }
 
    freeSegInfo(curr);
-   return True;
+   return;
 }
 
 
@@ -1808,13 +1793,22 @@
 /*--- plausible-looking stack dumps.                       ---*/
 /*------------------------------------------------------------*/
 
+static __inline__ void ensure_debug_info_inited ( void )
+{
+   if (!VG_(using_debug_info)) {
+      VG_(using_debug_info) = True;
+      VG_(maybe_read_symbols)();
+   }
+}
+
 /* Find a symbol-table index containing the specified pointer, or -1
    if not found.  Binary search.  */
 
-static Int search_one_symtab ( SegInfo* si, Addr ptr )
+static Int search_one_symtab ( SegInfo* si, Addr ptr,
+                               Bool match_anywhere_in_fun )
 {
    Addr a_mid_lo, a_mid_hi;
-   Int  mid, 
+   Int  mid, size, 
         lo = 0, 
         hi = si->symtab_used-1;
    while (True) {
@@ -1822,7 +1816,10 @@
       if (lo > hi) return -1; /* not found */
       mid      = (lo + hi) / 2;
       a_mid_lo = si->symtab[mid].addr;
-      a_mid_hi = ((Addr)si->symtab[mid].addr) + si->symtab[mid].size - 1;
+      size = ( match_anywhere_in_fun
+             ? si->symtab[mid].size
+             : 1);
+      a_mid_hi = ((Addr)si->symtab[mid].addr) + size - 1;
 
       if (ptr < a_mid_lo) { hi = mid-1; continue; } 
       if (ptr > a_mid_hi) { lo = mid+1; continue; }
@@ -1836,21 +1833,29 @@
    *psi to the relevant SegInfo, and *symno to the symtab entry number
    within that.  If not found, *psi is set to NULL.  */
 
-static void search_all_symtabs ( Addr ptr, SegInfo** psi, Int* symno )
+static void search_all_symtabs ( Addr ptr, /*OUT*/SegInfo** psi, 
+                                           /*OUT*/Int* symno,
+                                 Bool match_anywhere_in_fun )
 {
    Int      sno;
    SegInfo* si;
+
+   ensure_debug_info_inited();
+   VGP_PUSHCC(VgpSearchSyms);
+   
    for (si = segInfo; si != NULL; si = si->next) {
       if (si->start <= ptr && ptr < si->start+si->size) {
-         sno = search_one_symtab ( si, ptr );
+         sno = search_one_symtab ( si, ptr, match_anywhere_in_fun );
          if (sno == -1) goto not_found;
          *symno = sno;
          *psi = si;
+         VGP_POPCC(VgpSearchSyms);
          return;
       }
    }
   not_found:
    *psi = NULL;
+   VGP_POPCC(VgpSearchSyms);
 }
 
 
@@ -1882,54 +1887,84 @@
    *psi to the relevant SegInfo, and *locno to the loctab entry number
    within that.  If not found, *psi is set to NULL.
 */
-static void search_all_loctabs ( Addr ptr, SegInfo** psi, Int* locno )
+static void search_all_loctabs ( Addr ptr, /*OUT*/SegInfo** psi,
+                                           /*OUT*/Int* locno )
 {
    Int      lno;
    SegInfo* si;
+
+   VGP_PUSHCC(VgpSearchSyms);
+
+   ensure_debug_info_inited();
    for (si = segInfo; si != NULL; si = si->next) {
       if (si->start <= ptr && ptr < si->start+si->size) {
          lno = search_one_loctab ( si, ptr );
          if (lno == -1) goto not_found;
          *locno = lno;
          *psi = si;
+         VGP_POPCC(VgpSearchSyms);
          return;
       }
    }
   not_found:
    *psi = NULL;
+   VGP_POPCC(VgpSearchSyms);
 }
 
 
 /* The whole point of this whole big deal: map a code address to a
    plausible symbol name.  Returns False if no idea; otherwise True.
-   Caller supplies buf and nbuf.  If no_demangle is True, don't do
+   Caller supplies buf and nbuf.  If demangle is False, don't do
    demangling, regardless of vg_clo_demangle -- probably because the
    call has come from vg_what_fn_or_object_is_this. */
-Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a, 
-                            Char* buf, Int nbuf )
+static
+Bool get_fnname ( Bool demangle, Addr a, Char* buf, Int nbuf,
+                  Bool match_anywhere_in_fun )
 {
    SegInfo* si;
    Int      sno;
-   search_all_symtabs ( a, &si, &sno );
+   search_all_symtabs ( a, &si, &sno, match_anywhere_in_fun );
    if (si == NULL) 
       return False;
-   if (no_demangle) {
+   if (demangle) {
+      VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
+   } else {
       VG_(strncpy_safely) 
          ( buf, & si->strtab[si->symtab[sno].nmoff], nbuf );
-   } else {
-      VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
    }
    return True;
 }
 
+/* This is available to skins... always demangle C++ names */
+Bool VG_(get_fnname) ( Addr a, Char* buf, Int nbuf )
+{
+   return get_fnname ( /*demangle*/True, a, buf, nbuf,
+                       /*match_anywhere_in_fun*/True );
+}
 
-/* Map a code address to the name of a shared object file.  Returns
-   False if no idea; otherwise False.  Caller supplies buf and
-   nbuf. */
-static
-Bool vg_what_object_is_this ( Addr a, Char* buf, Int nbuf )
+/* This is available to skins... always demangle C++ names,
+   only succeed if 'a' matches first instruction of function. */
+Bool VG_(get_fnname_if_entry) ( Addr a, Char* buf, Int nbuf )
+{
+   return get_fnname ( /*demangle*/True, a, buf, nbuf,
+                       /*match_anywhere_in_fun*/False );
+}
+
+/* This is only available to core... don't demangle C++ names */
+Bool VG_(get_fnname_nodemangle) ( Addr a, Char* buf, Int nbuf )
+{
+   return get_fnname ( /*demangle*/False, a, buf, nbuf,
+                       /*match_anywhere_in_fun*/True );
+}
+
+/* Map a code address to the name of a shared object file or the executable.
+   Returns False if no idea; otherwise True.  Doesn't require debug info.
+   Caller supplies buf and nbuf. */
+Bool VG_(get_objname) ( Addr a, Char* buf, Int nbuf )
 {
    SegInfo* si;
+
+   ensure_debug_info_inited();
    for (si = segInfo; si != NULL; si = si->next) {
       if (si->start <= a && a < si->start+si->size) {
          VG_(strncpy_safely)(buf, si->filename, nbuf);
@@ -1939,27 +1974,39 @@
    return False;
 }
 
-/* Return the name of an erring fn in a way which is useful
-   for comparing against the contents of a suppressions file. 
-   Always writes something to buf.  Also, doesn't demangle the
-   name, because we want to refer to mangled names in the 
-   suppressions file.
-*/
-void VG_(what_obj_and_fun_is_this) ( Addr a,
-                                     Char* obj_buf, Int n_obj_buf,
-                                     Char* fun_buf, Int n_fun_buf )
+
+/* Map a code address to a filename.  Returns True if successful.  */
+Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename )
 {
-   (void)vg_what_object_is_this ( a, obj_buf, n_obj_buf );
-   (void)VG_(what_fn_is_this) ( True, a, fun_buf, n_fun_buf );
+   SegInfo* si;
+   Int      locno;
+   search_all_loctabs ( a, &si, &locno );
+   if (si == NULL) 
+      return False;
+   VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff], 
+                       n_filename);
+   return True;
 }
 
+/* Map a code address to a line number.  Returns True if successful. */
+Bool VG_(get_linenum)( Addr a, UInt* lineno )
+{
+   SegInfo* si;
+   Int      locno;
+   search_all_loctabs ( a, &si, &locno );
+   if (si == NULL) 
+      return False;
+   *lineno = si->loctab[locno].lineno;
+
+   return True;
+}
 
 /* Map a code address to a (filename, line number) pair.  
    Returns True if successful.
 */
-Bool VG_(what_line_is_this)( Addr a, 
-                             UChar* filename, Int n_filename, 
-                             UInt* lineno )
+Bool VG_(get_filename_linenum)( Addr a, 
+                                Char* filename, Int n_filename, 
+                                UInt* lineno )
 {
    SegInfo* si;
    Int      locno;
@@ -2001,11 +2048,13 @@
 
    n = 0;
 
-   know_fnname  = VG_(what_fn_is_this)(False,ec->eips[0], buf_fn, M_VG_ERRTXT);
-   know_objname = vg_what_object_is_this(ec->eips[0], buf_obj, M_VG_ERRTXT);
-   know_srcloc  = VG_(what_line_is_this)(ec->eips[0], 
-                                         buf_srcloc, M_VG_ERRTXT, 
-                                         &lineno);
+   // SSS: factor this repeated code out!
+
+   know_fnname  = VG_(get_fnname) (ec->eips[0], buf_fn,  M_VG_ERRTXT);
+   know_objname = VG_(get_objname)(ec->eips[0], buf_obj, M_VG_ERRTXT);
+   know_srcloc  = VG_(get_filename_linenum)(ec->eips[0], 
+                                            buf_srcloc, M_VG_ERRTXT, 
+                                            &lineno);
 
    APPEND("   at ");
    VG_(sprintf)(ibuf,"0x%x: ", ec->eips[0]);
@@ -2035,11 +2084,11 @@
    VG_(message)(Vg_UserMsg, "%s", buf);
 
    for (i = 1; i < stop_at && ec->eips[i] != 0; i++) {
-      know_fnname  = VG_(what_fn_is_this)(False,ec->eips[i], buf_fn, M_VG_ERRTXT);
-      know_objname = vg_what_object_is_this(ec->eips[i],buf_obj, M_VG_ERRTXT);
-      know_srcloc  = VG_(what_line_is_this)(ec->eips[i], 
-                                          buf_srcloc, M_VG_ERRTXT, 
-                                          &lineno);
+      know_fnname  = VG_(get_fnname) (ec->eips[i], buf_fn,  M_VG_ERRTXT);
+      know_objname = VG_(get_objname)(ec->eips[i], buf_obj, M_VG_ERRTXT);
+      know_srcloc  = VG_(get_filename_linenum)(ec->eips[i], 
+                                               buf_srcloc, M_VG_ERRTXT, 
+                                               &lineno);
       n = 0;
       APPEND("   by ");
       VG_(sprintf)(ibuf,"0x%x: ",ec->eips[i]);
diff --git a/coregrind/vg_syscall.S b/coregrind/vg_syscall.S
index adabbed..52d6091 100644
--- a/coregrind/vg_syscall.S
+++ b/coregrind/vg_syscall.S
@@ -26,7 +26,7 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.
 
-  The GNU General Public License is contained in the file LICENSE.
+  The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_constants.h"
diff --git a/coregrind/vg_syscalls.c b/coregrind/vg_syscalls.c
new file mode 100644
index 0000000..a500deb
--- /dev/null
+++ b/coregrind/vg_syscalls.c
@@ -0,0 +1,3164 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Update the byte permission maps following a system call.     ---*/
+/*---                                             vg_syscall_mem.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_include.h"
+
+/* vg_unsafe.h should NOT be included into any file except this
+   one. */
+#include "vg_unsafe.h"
+
+
+/* All system calls are channelled through here, doing two things:
+
+   * notify the skin of the memory events (reads, writes) happening
+
+   * perform the syscall, usually by passing it along to the kernel
+     unmodified.  However, because we simulate signals ourselves,
+     signal-related syscalls are routed to vg_signal.c, and are not
+     delivered to the kernel.
+
+   A magical piece of assembly code, vg_do_syscall(), in vg_syscall.S
+   does the tricky bit of passing a syscall to the kernel, whilst
+   having the simulator retain control.
+*/
+
+#define SYSCALL_TRACK(fn, args...)  VG_TRACK(fn, Vg_CoreSysCall, ## args)
+
+#define MAYBE_PRINTF(format, args...)  \
+   if (VG_(clo_trace_syscalls))        \
+      VG_(printf)(format, ## args)
+
+/* ---------------------------------------------------------------------
+   Doing mmap, munmap, mremap, mprotect
+   ------------------------------------------------------------------ */
+
+// Nb: this isn't done as precisely as possible, but it seems that programs
+// are usually sufficiently well-behaved that the more obscure corner cases
+// aren't important.  Various comments in the few functions below give more
+// details... njn 2002-Sep-17
+
+/* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
+   munmap, mprotect (and mremap??) work at the page level.  So addresses
+   and lengths must be adjusted for this. */
+
+/* Mash around start and length so that the area exactly covers
+   an integral number of pages.  If we don't do that, memcheck's
+   idea of addressible memory diverges from that of the
+   kernel's, which causes the leak detector to crash. */
+static 
+void mash_addr_and_len( Addr* a, UInt* len)
+{
+   while (( *a         % VKI_BYTES_PER_PAGE) > 0) { (*a)--; (*len)++; }
+   while (((*a + *len) % VKI_BYTES_PER_PAGE) > 0) {         (*len)++; }
+}
+
+static
+void mmap_segment ( Addr a, UInt len, UInt prot, Int fd )
+{
+   Bool nn, rr, ww, xx;
+
+   /* Records segment, reads debug symbols if necessary */
+   if (prot & PROT_EXEC && fd != -1)
+      VG_(new_exe_segment) ( a, len );
+
+   nn = prot & PROT_NONE;
+   rr = prot & PROT_READ;
+   ww = prot & PROT_WRITE;
+   xx = prot & PROT_EXEC;
+
+   VG_TRACK( new_mem_mmap, a, len, nn, rr, ww, xx );
+}
+
+static
+void munmap_segment ( Addr a, UInt len )
+{
+   /* Addr orig_a   = a;
+      Addr orig_len = len; */
+
+   mash_addr_and_len(&a, &len);
+   /*
+   VG_(printf)("MUNMAP: correct (%p for %d) to (%p for %d) %s\n", 
+      orig_a, orig_len, a, len, (orig_a!=start || orig_len!=length) 
+                                    ? "CHANGE" : "");
+   */
+
+   /* Invalidate translations as necessary (also discarding any basic
+      block-specific info retained by the skin) and unload any debug
+      symbols. */
+   // This doesn't handle partial unmapping of exe segs correctly, if that
+   // ever happens...
+   VG_(remove_if_exe_segment) ( a, len );
+
+   VG_TRACK( die_mem_munmap, a, len );
+}
+
+static 
+void mprotect_segment ( Addr a, UInt len, Int prot )
+{
+   Bool nn, rr, ww, xx;
+   nn = prot & PROT_NONE;
+   rr = prot & PROT_READ;
+   ww = prot & PROT_WRITE;
+   xx = prot & PROT_EXEC;
+
+   // if removing exe permission, should check and remove from exe_seg list
+   // if adding, should check and add to exe_seg list
+   // easier to ignore both cases -- both v. unlikely?
+   mash_addr_and_len(&a, &len);
+   VG_TRACK( change_mem_mprotect, a, len, nn, rr, ww, xx );
+}
+
+static 
+void mremap_segment ( old_addr, old_size, new_addr, new_size )
+{
+   /* If the block moves, assume new and old blocks can't overlap; seems to
+    * be valid judging from Linux kernel code in mm/mremap.c */
+   vg_assert(old_addr == new_addr         ||
+             old_addr+old_size < new_addr ||
+             new_addr+new_size < old_addr);
+
+   if (new_size < old_size) {
+      // if exe_seg
+      //    unmap old symbols from old_addr+new_size..old_addr+new_size
+      //    update exe_seg size = new_size
+      //    update exe_seg addr = new_addr...
+      VG_TRACK( copy_mem_remap, old_addr, new_addr, new_size );
+      VG_TRACK( die_mem_munmap, old_addr+new_size, old_size-new_size );
+
+   } else {
+      // if exe_seg
+      //    map new symbols from new_addr+old_size..new_addr+new_size
+      //    update exe_seg size = new_size
+      //    update exe_seg addr = new_addr...
+      VG_TRACK( copy_mem_remap, old_addr, new_addr, old_size );
+      // what should the permissions on the new extended part be??
+      // using 'rwx'
+      VG_TRACK( new_mem_mmap,   new_addr+old_size, new_size-old_size,
+                                False, True, True, True );
+   }
+}
+
+
+/* Is this a Linux kernel error return value? */
+/* From:
+   http://sources.redhat.com/cgi-bin/cvsweb.cgi/libc/sysdeps/unix/sysv/
+   linux/i386/sysdep.h?
+   rev=1.28&content-type=text/x-cvsweb-markup&cvsroot=glibc
+
+   \begin{quote}:
+
+   Linux uses a negative return value to indicate syscall errors,
+   unlike most Unices, which use the condition codes' carry flag.
+
+   Since version 2.1 the return value of a system call might be
+   negative even if the call succeeded.  E.g., the `lseek' system call
+   might return a large offset.  Therefore we must not anymore test
+   for < 0, but test for a real error by making sure the value in %eax
+   is a real error number.  Linus said he will make sure the no syscall
+   returns a value in -1 .. -4095 as a valid result so we can savely
+   test with -4095.  
+
+   END QUOTE
+*/
+Bool VG_(is_kerror) ( Int res )
+{
+   if (res >= -4095 && res <= -1)
+      return True;
+   else
+      return False;
+}
+
+static
+UInt get_shm_size ( Int shmid )
+{
+   struct shmid_ds buf;
+   long __res;
+    __asm__ volatile ( "int $0x80"
+                       : "=a" (__res)
+                       : "0" (__NR_ipc),
+                         "b" ((long)(24) /*IPCOP_shmctl*/),
+                         "c" ((long)(shmid)),
+                         "d" ((long)(IPC_STAT)),
+                         "S" ((long)(0)),
+                         "D" ((long)(&buf)) );
+    if ( VG_(is_kerror) ( __res ) )
+       return 0;
+ 
+   return buf.shm_segsz;
+}
+ 
+static
+Char *strdupcat ( const Char *s1, const Char *s2, ArenaId aid )
+{
+   UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
+   Char *result = VG_(arena_malloc) ( aid, len );
+   VG_(strcpy) ( result, s1 );
+   VG_(strcat) ( result, s2 );
+   return result;
+}
+
+static 
+void pre_mem_read_sendmsg ( ThreadState* tst, 
+                            Char *msg, UInt base, UInt size )
+{
+   Char *outmsg = strdupcat ( "socketcall.sendmsg", msg, VG_AR_TRANSIENT );
+   SYSCALL_TRACK( pre_mem_read, tst, outmsg, base, size );
+
+   VG_(arena_free) ( VG_AR_TRANSIENT, outmsg );
+}
+
+static 
+void pre_mem_write_recvmsg ( ThreadState* tst, 
+                             Char *msg, UInt base, UInt size )
+{
+   Char *outmsg = strdupcat ( "socketcall.recvmsg", msg, VG_AR_TRANSIENT );
+   SYSCALL_TRACK( pre_mem_write, tst, outmsg, base, size );
+   VG_(arena_free) ( VG_AR_TRANSIENT, outmsg );
+}
+
+static
+void post_mem_write_recvmsg ( ThreadState* tst,
+                              Char *fieldName, UInt base, UInt size )
+{
+   VG_TRACK( post_mem_write, base, size );
+}
+ 
+static
+void msghdr_foreachfield ( 
+        ThreadState* tst, 
+        struct msghdr *msg, 
+        void (*foreach_func)( ThreadState*, Char *, UInt, UInt ) 
+     )
+{
+   if ( !msg )
+      return;
+
+   foreach_func ( tst, "(msg)", (Addr)msg, sizeof( struct msghdr ) );
+
+   if ( msg->msg_name )
+      foreach_func ( tst, 
+                     "(msg.msg_name)", 
+                     (Addr)msg->msg_name, msg->msg_namelen );
+
+   if ( msg->msg_iov ) {
+      struct iovec *iov = msg->msg_iov;
+      UInt i;
+
+      foreach_func ( tst, 
+                     "(msg.msg_iov)", 
+                     (Addr)iov, msg->msg_iovlen * sizeof( struct iovec ) );
+
+      for ( i = 0; i < msg->msg_iovlen; ++i, ++iov )
+         foreach_func ( tst, 
+                        "(msg.msg_iov[i]", 
+                        (Addr)iov->iov_base, iov->iov_len );
+   }
+
+   if ( msg->msg_control )
+      foreach_func ( tst, 
+                     "(msg.msg_control)", 
+                     (Addr)msg->msg_control, msg->msg_controllen );
+}
+
+static
+void pre_mem_read_sockaddr ( ThreadState* tst,
+                                 Char *description,
+                                 struct sockaddr *sa, UInt salen )
+{
+   Char *outmsg = VG_(arena_malloc) ( VG_AR_TRANSIENT, 
+                                      strlen( description ) + 30 );
+
+   VG_(sprintf) ( outmsg, description, ".sa_family" );
+   SYSCALL_TRACK( pre_mem_read, tst, outmsg, (UInt) &sa->sa_family, sizeof (sa_family_t));
+               
+   switch (sa->sa_family) {
+                  
+      case AF_UNIX:
+         VG_(sprintf) ( outmsg, description, ".sun_path" );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, outmsg,
+            (UInt) ((struct sockaddr_un *) sa)->sun_path);
+         break;
+                     
+      case AF_INET:
+         VG_(sprintf) ( outmsg, description, ".sin_port" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in *) sa)->sin_port,
+            sizeof (((struct sockaddr_in *) sa)->sin_port));
+         VG_(sprintf) ( outmsg, description, ".sin_addr" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in *) sa)->sin_addr,
+            sizeof (struct in_addr));
+         break;
+                           
+      case AF_INET6:
+         VG_(sprintf) ( outmsg, description, ".sin6_port" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in6 *) sa)->sin6_port,
+            sizeof (((struct sockaddr_in6 *) sa)->sin6_port));
+         VG_(sprintf) ( outmsg, description, ".sin6_flowinfo" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in6 *) sa)->sin6_flowinfo,
+            sizeof (uint32_t));
+         VG_(sprintf) ( outmsg, description, ".sin6_addr" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in6 *) sa)->sin6_addr,
+            sizeof (struct in6_addr));
+#        ifndef GLIBC_2_1
+         VG_(sprintf) ( outmsg, description, ".sin6_scope_id" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in6 *) sa)->sin6_scope_id,
+            sizeof (uint32_t));
+#        endif
+         break;
+               
+      default:
+         VG_(sprintf) ( outmsg, description, "" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg, (UInt) sa, salen );
+         break;
+   }
+   
+   VG_(arena_free) ( VG_AR_TRANSIENT, outmsg );
+}
+
+/* Dereference a pointer to a UInt. */
+static UInt deref_UInt ( ThreadState* tst, Addr a, Char* s )
+{
+   UInt* a_p = (UInt*)a;
+   SYSCALL_TRACK( pre_mem_read, tst, s, (Addr)a_p, sizeof(UInt) );
+   if (a_p == NULL)
+      return 0;
+   else
+      return *a_p;
+}
+
+/* Dereference a pointer to a pointer. */
+static Addr deref_Addr ( ThreadState* tst, Addr a, Char* s )
+{
+   Addr* a_p = (Addr*)a;
+   SYSCALL_TRACK( pre_mem_read, tst, s, (Addr)a_p, sizeof(Addr) );
+   return *a_p;
+}
+
+static 
+void buf_and_len_pre_check( ThreadState* tst, Addr buf_p, Addr buflen_p,
+                            Char* buf_s, Char* buflen_s )
+{
+   if (VG_(track_events).pre_mem_write) {
+      UInt buflen_in = deref_UInt( tst, buflen_p, buflen_s);
+      if (buflen_in > 0) {
+         VG_(track_events).pre_mem_write ( Vg_CoreSysCall,
+                                           tst, buf_s, buf_p, buflen_in );
+      }
+   }
+}
+
+static 
+void buf_and_len_post_check( ThreadState* tst, Int res,
+                             Addr buf_p, Addr buflen_p, Char* s )
+{
+   if (!VG_(is_kerror)(res) && VG_(track_events).post_mem_write) {
+      UInt buflen_out = deref_UInt( tst, buflen_p, s);
+      if (buflen_out > 0 && buf_p != (Addr)NULL) {
+         VG_(track_events).post_mem_write ( buf_p, buflen_out );
+      }
+   }
+}
+
+/* ---------------------------------------------------------------------
+   Data seg end, for brk()
+   ------------------------------------------------------------------ */
+
+/* Records the current end of the data segment so we can make sense of
+   calls to brk(). */
+Addr curr_dataseg_end;
+
+void VG_(init_dataseg_end_for_brk) ( void )
+{
+   curr_dataseg_end = (Addr)VG_(brk)(0);
+   if (curr_dataseg_end == (Addr)(-1))
+      VG_(panic)("can't determine data-seg end for brk()");
+   if (0)
+      VG_(printf)("DS END is %p\n", (void*)curr_dataseg_end);
+}
+
+/* ---------------------------------------------------------------------
+   The Main Entertainment ...
+   ------------------------------------------------------------------ */
+
+void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid )
+{
+   ThreadState* tst;
+   UInt         syscallno, arg1, arg2, arg3, arg4, arg5;
+   /* Do not make this unsigned! */
+   Int res;
+   void* pre_res = 0;   /* shut gcc up */
+
+   VGP_PUSHCC(VgpCoreSysWrap);
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   tst              = & VG_(threads)[tid];
+   syscallno        = tst->m_eax;
+   arg1             = tst->m_ebx;
+   arg2             = tst->m_ecx;
+   arg3             = tst->m_edx;
+   arg4             = tst->m_esi;
+   arg5             = tst->m_edi;
+
+   /* Do any pre-syscall actions */
+   if (VG_(needs).syscall_wrapper) {
+      VGP_PUSHCC(VgpSkinSysWrap);
+      pre_res = SK_(pre_syscall)(tid, syscallno, /*isBlocking*/False);
+      VGP_POPCC(VgpSkinSysWrap);
+   }
+
+   /* the syscall no is in %eax.  For syscalls with <= 5 args,
+      args 1 .. 5 to the syscall are in %ebx %ecx %edx %esi %edi.
+      For calls with > 5 args, %ebx points to a lump of memory
+      containing the args.
+
+      The result is returned in %eax.  If this value >= 0, the call
+      succeeded, and this is the return value.  If < 0, it failed, and
+      the negation of this value is errno.  To be more specific, 
+      if res is in the range -EMEDIUMTYPE (-124) .. -EPERM (-1)
+      (kernel 2.4.9 sources, include/asm-i386/errno.h)
+      then it indicates an error.  Otherwise it doesn't.
+
+      Dirk Mueller (mueller@kde.org) says that values -4095 .. -1
+      (inclusive?) indicate error returns.  Not sure where the -4095
+      comes from.
+   */
+
+   MAYBE_PRINTF("SYSCALL[%d,%d](%3d): ", 
+                  VG_(getpid)(), tid, syscallno);
+
+   switch (syscallno) {
+
+      case __NR_exit:
+         VG_(panic)("syscall exit() not caught by the scheduler?!");
+         break;
+
+      case __NR_clone:
+         VG_(unimplemented)
+            ("clone(): not supported by Valgrind.\n   "
+             "We do now support programs linked against\n   "
+             "libpthread.so, though.  Re-run with -v and ensure that\n   "
+             "you are picking up Valgrind's implementation of libpthread.so.");
+         break;
+
+#     if defined(__NR_modify_ldt)
+      case __NR_modify_ldt:
+         VG_(nvidia_moan)();
+         VG_(unimplemented)
+            ("modify_ldt(): I (JRS) haven't investigated this yet; sorry.");
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls !!!!!!!!!!!!!!!!!!!!! */
+
+#     if defined(__NR_vhangup)
+      case __NR_vhangup: /* syscall 111 */
+         /* int vhangup(void); */
+         MAYBE_PRINTF("vhangup()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_iopl)
+      case __NR_iopl: /* syscall 110 */
+         /* int iopl(int level); */
+         MAYBE_PRINTF("iopl ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_getxattr)
+      case __NR_getxattr: /* syscall 229 */
+         /* ssize_t getxattr (const char *path, const char* name,
+                              void* value, size_t size); */
+         MAYBE_PRINTF("getxattr ( %p, %p, %p, %d )\n", 
+                        arg1,arg2,arg3, arg4);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "getxattr(path)", arg1 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "getxattr(name)", arg2 );
+         SYSCALL_TRACK( pre_mem_write, tst, "getxattr(value)", arg3, arg4 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0 
+                                  && arg3 != (Addr)NULL) {
+            VG_TRACK( post_mem_write, arg3, res );
+         }
+         break;
+#     endif
+      
+#     if defined(__NR_quotactl)
+      case __NR_quotactl: /* syscall 131 */
+         /* int quotactl(int cmd, char *special, int uid, caddr_t addr); */
+         MAYBE_PRINTF("quotactl (0x%x, %p, 0x%x, 0x%x )\n", 
+                        arg1,arg2,arg3, arg4);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "quotactl(special)", arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_truncate64)
+      case __NR_truncate64: /* syscall 193 */
+         /* int truncate64(const char *path, off64_t length); */
+         MAYBE_PRINTF("truncate64 ( %p, %lld )\n",
+                        arg1, ((ULong)arg2) | (((ULong) arg3) << 32));
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "truncate64(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_fdatasync)
+      case __NR_fdatasync: /* syscall 148 */
+         /* int fdatasync(int fd); */
+         MAYBE_PRINTF("fdatasync ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_msync) /* syscall 144 */
+      case __NR_msync:
+         /* int msync(const void *start, size_t length, int flags); */
+         MAYBE_PRINTF("msync ( %p, %d, %d )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_read, tst, "msync(start)", arg1, arg2 );
+         KERNEL_DO_SYSCALL(tid,res);  
+         break;
+#     endif
+
+#     if defined(__NR_getpmsg) /* syscall 188 */
+      case __NR_getpmsg: 
+      {
+      /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
+      /* int getpmsg(int fd, struct strbuf *ctrl, struct strbuf *data, 
+                             int *bandp, int *flagsp); */
+      struct strbuf {
+         int     maxlen;         /* no. of bytes in buffer */
+         int     len;            /* no. of bytes returned */
+         caddr_t buf;            /* pointer to data */
+      };
+      struct strbuf *ctrl;
+      struct strbuf *data;
+      MAYBE_PRINTF("getpmsg ( %d, %p, %p, %p, %p )\n",
+                      arg1,arg2,arg3,arg4,arg5);
+      ctrl = (struct strbuf *)arg2;
+      data = (struct strbuf *)arg3;
+      if (ctrl && ctrl->maxlen > 0)
+          SYSCALL_TRACK( pre_mem_write,tst, "getpmsg(ctrl)", 
+                                (UInt)ctrl->buf, ctrl->maxlen);
+      if (data && data->maxlen > 0)
+          SYSCALL_TRACK( pre_mem_write,tst, "getpmsg(data)", 
+                                 (UInt)data->buf, data->maxlen);
+      if (arg4)
+          SYSCALL_TRACK( pre_mem_write,tst, "getpmsg(bandp)", 
+                                (UInt)arg4, sizeof(int));
+      if (arg5)
+          SYSCALL_TRACK( pre_mem_write,tst, "getpmsg(flagsp)", 
+                                (UInt)arg5, sizeof(int));
+      KERNEL_DO_SYSCALL(tid,res);
+      if (!VG_(is_kerror)(res) && res == 0 && ctrl && ctrl->len > 0) {
+         VG_TRACK( post_mem_write, (UInt)ctrl->buf, ctrl->len);
+      }
+      if (!VG_(is_kerror)(res) && res == 0 && data && data->len > 0) {
+         VG_TRACK( post_mem_write, (UInt)data->buf, data->len);
+      }
+      }
+      break;
+#     endif
+
+
+#     if defined(__NR_putpmsg) /* syscall 189 */
+      case __NR_putpmsg: 
+      {
+      /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
+      /* int putpmsg(int fd, struct strbuf *ctrl, struct strbuf *data, 
+                             int band, int flags); */
+      struct strbuf {
+         int     maxlen;         /* no. of bytes in buffer */
+         int     len;            /* no. of bytes returned */
+         caddr_t buf;            /* pointer to data */
+      };
+      struct strbuf *ctrl;
+      struct strbuf *data;
+      MAYBE_PRINTF("putpmsg ( %d, %p, %p, %d, %d )\n",
+                     arg1,arg2,arg3,arg4,arg5);
+      ctrl = (struct strbuf *)arg2;
+      data = (struct strbuf *)arg3;
+      if (ctrl && ctrl->len > 0)
+          SYSCALL_TRACK( pre_mem_read,tst, "putpmsg(ctrl)",
+                                (UInt)ctrl->buf, ctrl->len);
+      if (data && data->len > 0)
+          SYSCALL_TRACK( pre_mem_read,tst, "putpmsg(data)",
+                                (UInt)data->buf, data->len);
+      KERNEL_DO_SYSCALL(tid,res);
+      }
+      break;
+#     endif
+
+      case __NR_getitimer: /* syscall 105 */
+         /* int getitimer(int which, struct itimerval *value); */
+         MAYBE_PRINTF("getitimer ( %d, %p )\n", arg1, arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "getitimer(timer)", arg2, 
+                           sizeof(struct itimerval) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg2 != (Addr)NULL) {
+            VG_TRACK( post_mem_write,arg2, sizeof(struct itimerval));
+         }
+         break;
+
+#     if defined(__NR_syslog)
+      case __NR_syslog: /* syscall 103 */
+         /* int syslog(int type, char *bufp, int len); */
+         MAYBE_PRINTF("syslog (%d, %p, %d)\n",arg1,arg2,arg3);
+         switch(arg1) {
+            case 2: case 3: case 4:
+               SYSCALL_TRACK( pre_mem_write, tst, "syslog(buf)", arg2, arg3);
+	       break;
+            default: 
+               break;
+         }
+         KERNEL_DO_SYSCALL(tid, res);
+         if (!VG_(is_kerror)(res)) {
+            switch (arg1) {
+               case 2: case 3: case 4:
+                  VG_TRACK( post_mem_write, arg2, arg3 );
+                  break;
+               default:
+                  break;
+            }
+         }
+         break;
+#     endif
+
+      case __NR_personality: /* syscall 136 */
+         /* int personality(unsigned long persona); */
+         MAYBE_PRINTF("personality ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_chroot: /* syscall 61 */
+         /* int chroot(const char *path); */
+         MAYBE_PRINTF("chroot ( %p )\n", arg1);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "chroot(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_madvise)
+      case __NR_madvise: /* syscall 219 */
+         /* int madvise(void *start, size_t length, int advice ); */
+         MAYBE_PRINTF("madvise ( %p, %d, %d )\n", arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_mremap)
+      /* Treating it like an munmap() followed by a mmap() */
+      case __NR_mremap: /* syscall 163 */
+         /* void* mremap(void * old_address, size_t old_size, 
+                         size_t new_size, unsigned long flags); */
+         MAYBE_PRINTF("mremap ( %p, %d, %d, 0x%x )\n", 
+                        arg1, arg2, arg3, arg4);
+         SYSCALL_TRACK( pre_mem_write, tst, "mremap(old_address)", arg1, arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            mremap_segment( arg1, arg2, (Addr)res, arg3 );
+         }
+         break;         
+#     endif
+
+      case __NR_nice: /* syscall 34 */
+         /* int nice(int inc); */
+         MAYBE_PRINTF("nice ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      /* !!!!!!!!!! New, untested syscalls, 14 Mar 02 !!!!!!!!!! */
+
+#     if defined(__NR_setresgid32)
+      case __NR_setresgid32: /* syscall 210 */
+         /* int setresgid(gid_t rgid, gid_t egid, gid_t sgid); */
+         MAYBE_PRINTF("setresgid32 ( %d, %d, %d )\n", arg1, arg2, arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setfsuid32)
+      case __NR_setfsuid32: /* syscall 215 */
+         /* int setfsuid(uid_t fsuid); */
+          MAYBE_PRINTF("setfsuid ( %d )\n", arg1);
+          KERNEL_DO_SYSCALL(tid,res);
+          break;
+#     endif
+
+#     if defined(__NR__sysctl)
+      case __NR__sysctl:
+      /* int _sysctl(struct __sysctl_args *args); */
+         MAYBE_PRINTF("_sysctl ( %p )\n", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "_sysctl(args)", arg1, 
+                            sizeof(struct __sysctl_args) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg1, sizeof(struct __sysctl_args) );
+         break;
+#     endif
+
+#     if defined(__NR_sched_getscheduler)
+      case __NR_sched_getscheduler:
+         /* int sched_getscheduler(pid_t pid); */
+         MAYBE_PRINTF("sched_getscheduler ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_sched_setscheduler)
+      case __NR_sched_setscheduler:
+         /* int sched_setscheduler(pid_t pid, int policy, 
+                const struct sched_param *p); */
+         MAYBE_PRINTF("sched_setscheduler ( %d, %d, %p )\n",arg1,arg2,arg3);
+         if (arg3 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_read, tst,
+                              "sched_setscheduler(struct sched_param *p)", 
+                              arg3, sizeof(struct sched_param));
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_mlock)
+      case __NR_mlock:
+         /* int mlock(const void * addr, size_t len) */
+         MAYBE_PRINTF("mlock ( %p, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_mlockall)
+      case __NR_mlockall:
+         /* int mlockall(int flags); */
+         MAYBE_PRINTF("mlockall ( %x )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_munlockall)
+      case __NR_munlockall:
+         /* int munlockall(void); */
+         MAYBE_PRINTF("munlockall ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#if   defined(__NR_sched_get_priority_max)
+      case __NR_sched_get_priority_max:
+         /* int sched_get_priority_max(int policy); */
+         MAYBE_PRINTF("sched_get_priority_max ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#if   defined(__NR_sched_get_priority_min)
+      case __NR_sched_get_priority_min: /* syscall 160 */
+         /* int sched_get_priority_min(int policy); */
+         MAYBE_PRINTF("sched_get_priority_min ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#if   defined(__NR_setpriority)
+      case __NR_setpriority: /* syscall 97 */
+         /* int setpriority(int which, int who, int prio); */
+         MAYBE_PRINTF("setpriority ( %d, %d, %d )\n", arg1, arg2, arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#if   defined(__NR_getpriority)
+      case __NR_getpriority: /* syscall 96 */
+         /* int getpriority(int which, int who); */
+         MAYBE_PRINTF("getpriority ( %d, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setfsgid)
+      case __NR_setfsgid: /* syscall 139 */
+         /* int setfsgid(gid_t gid); */
+         MAYBE_PRINTF("setfsgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setregid)
+      case __NR_setregid: /* syscall 71 */
+         /* int setregid(gid_t rgid, gid_t egid); */
+         MAYBE_PRINTF("setregid ( %d, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setresuid)
+      case __NR_setresuid: /* syscall 164 */
+         /* int setresuid(uid_t ruid, uid_t euid, uid_t suid); */
+         MAYBE_PRINTF("setresuid ( %d, %d, %d )\n", arg1, arg2, arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setfsuid)
+      case __NR_setfsuid: /* syscall 138 */
+         /* int setfsuid(uid_t uid); */
+         MAYBE_PRINTF("setfsuid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 8 Mar 02 !!!!!!!!!!! */
+
+#     if defined(__NR_sendfile)
+      case __NR_sendfile: /* syscall 187 */
+         /* ssize_t sendfile(int out_fd, int in_fd, off_t *offset, 
+                             size_t count) */
+         MAYBE_PRINTF("sendfile ( %d, %d, %p, %d )\n",arg1,arg2,arg3,arg4);
+         if (arg3 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "sendfile(offset)", arg3, sizeof(off_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg3 != (UInt)NULL) {
+            VG_TRACK( post_mem_write, arg3, sizeof( off_t ) );
+         }
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 7 Mar 02 !!!!!!!!!!! */
+
+#     if defined(__NR_pwrite)
+      case __NR_pwrite: /* syscall 181 */
+         /* ssize_t pwrite (int fd, const void *buf, size_t nbytes,
+                            off_t offset); */
+         MAYBE_PRINTF("pwrite ( %d, %p, %d, %d )\n", arg1, arg2, arg3, arg4);
+         SYSCALL_TRACK( pre_mem_read, tst, "pwrite(buf)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 6 Mar 02 !!!!!!!!!!! */
+
+      case __NR_sync: /* syscall 36 */
+         /* int sync(); */
+         MAYBE_PRINTF("sync ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break; 
+ 
+      case __NR_fstatfs: /* syscall 100 */
+         /* int fstatfs(int fd, struct statfs *buf); */
+         MAYBE_PRINTF("fstatfs ( %d, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "stat(buf)", arg2, sizeof(struct statfs) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct statfs) );
+         break;
+
+      /* !!!!!!!!!! New, untested syscalls, 4 Mar 02 !!!!!!!!!!! */
+
+      case __NR_pause: /* syscall 29 */
+         /* int pause(void); */
+         MAYBE_PRINTF("pause ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_getsid: /* syscall 147 */
+         /* pid_t getsid(pid_t pid); */
+         MAYBE_PRINTF("getsid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_pread)
+      case __NR_pread: /* syscall 180 */
+         /* ssize_t pread(int fd, void *buf, size_t count, off_t offset); */
+         MAYBE_PRINTF("pread ( %d, %p, %d, %d ) ...\n",arg1,arg2,arg3,arg4);
+         SYSCALL_TRACK( pre_mem_write, tst, "pread(buf)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         MAYBE_PRINTF("SYSCALL[%d]       pread ( %d, %p, %d, %d ) --> %d\n",
+                        VG_(getpid)(),
+                        arg1, arg2, arg3, arg4, res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            VG_TRACK( post_mem_write, arg2, res );
+         }
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 27 Feb 02 !!!!!!!!!! */
+
+      case __NR_mknod: /* syscall 14 */
+         /* int mknod(const char *pathname, mode_t mode, dev_t dev); */
+         MAYBE_PRINTF("mknod ( %p, 0x%x, 0x%x )\n", arg1, arg2, arg3 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "mknod(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_flock: /* syscall 143 */
+         /* int flock(int fd, int operation); */
+         MAYBE_PRINTF("flock ( %d, %d )\n", arg1, arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_rt_sigsuspend)
+      /* Viewed with great suspicion by me, but, hey, let's do it
+         anyway ... */
+      case __NR_rt_sigsuspend: /* syscall 179 */
+         /* int sigsuspend(const sigset_t *mask); */
+         MAYBE_PRINTF("sigsuspend ( %p )\n", arg1 );
+         if (arg1 != (Addr)NULL) {
+            /* above NULL test is paranoia */
+            SYSCALL_TRACK( pre_mem_read, tst, "sigsuspend(mask)", arg1, 
+                              sizeof(vki_ksigset_t) );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_init_module: /* syscall 128 */
+         /* int init_module(const char *name, struct module *image); */
+         MAYBE_PRINTF("init_module ( %p, %p )\n", arg1, arg2 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "init_module(name)", arg1 );
+         SYSCALL_TRACK( pre_mem_read, tst, "init_module(image)", arg2, 
+                           VKI_SIZEOF_STRUCT_MODULE );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_ioperm: /* syscall 101 */
+         /* int ioperm(unsigned long from, unsigned long num, int turn_on); */
+         MAYBE_PRINTF("ioperm ( %d, %d, %d )\n", arg1, arg2, arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_capget: /* syscall 184 */
+         /* int capget(cap_user_header_t header, cap_user_data_t data); */
+         MAYBE_PRINTF("capget ( %p, %p )\n", arg1, arg2 );
+         SYSCALL_TRACK( pre_mem_read, tst, "capget(header)", arg1, 
+                                             sizeof(vki_cap_user_header_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "capget(data)", arg2, 
+                                           sizeof( vki_cap_user_data_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg2 != (Addr)NULL)
+            VG_TRACK( post_mem_write, arg2, sizeof( vki_cap_user_data_t) );
+         break;
+
+      /* !!!!!!!!!!!!!!!!!!!!! mutant ones !!!!!!!!!!!!!!!!!!!!! */
+
+      case __NR_execve:
+         /* int execve (const char *filename, 
+                        char *const argv [], 
+                        char *const envp[]); */
+         MAYBE_PRINTF("execve ( %p(%s), %p, %p ) --- NOT CHECKED\n", 
+                        arg1, arg1, arg2, arg3);
+         /* Resistance is futile.  Nuke all other threads.  POSIX
+            mandates this. */
+            VG_(nuke_all_threads_except)( tid );
+         /* Make any binding for LD_PRELOAD disappear, so that child
+            processes don't get traced into. */
+         if (!VG_(clo_trace_children)) {
+            Int i;
+            Char** envp = (Char**)arg3;
+            Char*  ld_preload_str = NULL;
+            Char*  ld_library_path_str = NULL;
+            for (i = 0; envp[i] != NULL; i++) {
+               if (VG_(strncmp)(envp[i], "LD_PRELOAD=", 11) == 0)
+                  ld_preload_str = &envp[i][11];
+               if (VG_(strncmp)(envp[i], "LD_LIBRARY_PATH=", 16) == 0)
+                  ld_library_path_str = &envp[i][16];
+            }
+            VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH)(
+	       ld_preload_str, ld_library_path_str );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         /* Should we still be alive here?  Don't think so. */
+         /* Actually, above comment is wrong.  execve can fail, just
+            like any other syscall -- typically the file to exec does
+            not exist.  Hence: */
+         vg_assert(VG_(is_kerror)(res));
+         break;
+
+      /* !!!!!!!!!!!!!!!!!!!!!     end     !!!!!!!!!!!!!!!!!!!!! */
+
+      case __NR_access: /* syscall 33 */
+         /* int access(const char *pathname, int mode); */
+         MAYBE_PRINTF("access ( %p, %d )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "access(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_alarm: /* syscall 27 */
+         /* unsigned int alarm(unsigned int seconds); */
+         MAYBE_PRINTF("alarm ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_brk: /* syscall 45 */
+         /* Haven't a clue if this is really right. */
+         /* int brk(void *end_data_segment); */
+         MAYBE_PRINTF("brk ( %p ) --> ",arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         MAYBE_PRINTF("0x%x\n", res);
+
+         if (!VG_(is_kerror)(res)) {
+            if (arg1 == 0) {
+               /* Just asking where the current end is. (???) */
+               curr_dataseg_end = res;
+            } else
+            if (arg1 < curr_dataseg_end) {
+               /* shrinking the data segment. */
+               VG_TRACK( die_mem_brk, (Addr)arg1, 
+                                      curr_dataseg_end-arg1 );
+               curr_dataseg_end = arg1;
+            } else
+            if (arg1 > curr_dataseg_end && res != 0) {
+               /* asked for more memory, and got it */
+               /* 
+               VG_(printf)("BRK: new area %x .. %x\n", 
+                           VG_(curr_dataseg_end, arg1-1 );
+               */
+               VG_TRACK( new_mem_brk, (Addr)curr_dataseg_end, 
+                                         arg1-curr_dataseg_end );
+               curr_dataseg_end = arg1;         
+            }
+         }
+         break;
+
+      case __NR_chdir: /* syscall 12 */
+         /* int chdir(const char *path); */
+         MAYBE_PRINTF("chdir ( %p )\n", arg1);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "chdir(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_chmod: /* syscall 15 */
+         /* int chmod(const char *path, mode_t mode); */
+         MAYBE_PRINTF("chmod ( %p, %d )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "chmod(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_chown32)
+      case __NR_chown32: /* syscall 212 */
+#     endif
+#     if defined(__NR_lchown32)
+      case __NR_lchown32: /* syscall 198 */
+#     endif
+      case __NR_chown: /* syscall 16 */
+         /* int chown(const char *path, uid_t owner, gid_t group); */
+         MAYBE_PRINTF("chown ( %p, 0x%x, 0x%x )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "chown(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_close: /* syscall 6 */
+         /* int close(int fd); */
+         MAYBE_PRINTF("close ( %d )\n",arg1);
+         /* Detect and negate attempts by the client to close Valgrind's
+            logfile fd ... */
+         if (arg1 == VG_(clo_logfile_fd)) {
+            VG_(message)(Vg_UserMsg, 
+              "Warning: client attempted to close "
+               "Valgrind's logfile fd (%d).", 
+               VG_(clo_logfile_fd));
+            VG_(message)(Vg_UserMsg, 
+              "   Use --logfile-fd=<number> to select an "
+              "alternative logfile fd." );
+            /* Pretend the close succeeded, regardless.  (0 == success) */
+            res = 0;
+            SET_EAX(tid, res);
+         } else {
+            KERNEL_DO_SYSCALL(tid,res);
+         }
+         break;
+
+      case __NR_dup: /* syscall 41 */
+         /* int dup(int oldfd); */
+         MAYBE_PRINTF("dup ( %d ) --> ", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         MAYBE_PRINTF("%d\n", res);
+         break;
+
+      case __NR_dup2: /* syscall 63 */
+         /* int dup2(int oldfd, int newfd); */
+         MAYBE_PRINTF("dup2 ( %d, %d ) ...\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         MAYBE_PRINTF("SYSCALL[%d]       dup2 ( %d, %d ) = %d\n", 
+                        VG_(getpid)(), 
+                        arg1, arg2, res);
+         break;
+
+      case __NR_fcntl: /* syscall 55 */
+         /* int fcntl(int fd, int cmd, int arg); */
+         MAYBE_PRINTF("fcntl ( %d, %d, %d )\n",arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_fchdir: /* syscall 133 */
+         /* int fchdir(int fd); */
+         MAYBE_PRINTF("fchdir ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_fchown32)
+      case __NR_fchown32: /* syscall 207 */
+#     endif
+      case __NR_fchown: /* syscall 95 */
+         /* int fchown(int filedes, uid_t owner, gid_t group); */
+         MAYBE_PRINTF("fchown ( %d, %d, %d )\n", arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_fchmod: /* syscall 94 */
+         /* int fchmod(int fildes, mode_t mode); */
+         MAYBE_PRINTF("fchmod ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_fcntl64)
+      case __NR_fcntl64: /* syscall 221 */
+         /* I don't know what the prototype for this is supposed to be. */
+         /* ??? int fcntl(int fd, int cmd); */
+         MAYBE_PRINTF("fcntl64 (?!) ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_fstat: /* syscall 108 */
+         /* int fstat(int filedes, struct stat *buf); */
+         MAYBE_PRINTF("fstat ( %d, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "fstat", arg2, sizeof(struct stat) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat) );
+         break;
+
+      case __NR_vfork: /* syscall 190 */
+         /* pid_t vfork(void); */
+         MAYBE_PRINTF("vfork ( ) ... becomes ... ");
+         /* KLUDGE: we prefer to do a fork rather than vfork. 
+            vfork gives a SIGSEGV, and the stated semantics looks
+            pretty much impossible for us. */
+         tst->m_eax = __NR_fork;
+         /* fall through ... */
+      case __NR_fork: /* syscall 2 */
+         /* pid_t fork(void); */
+         MAYBE_PRINTF("fork ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         if (res == 0) {
+            /* I am the child.  Nuke all other threads which I might
+               have inherited from my parent.  POSIX mandates this. */
+            VG_(nuke_all_threads_except)( tid );
+         }
+         break;
+
+      case __NR_fsync: /* syscall 118 */
+         /* int fsync(int fd); */
+         MAYBE_PRINTF("fsync ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_ftruncate: /* syscall 93 */
+         /* int ftruncate(int fd, size_t length); */
+         MAYBE_PRINTF("ftruncate ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_ftruncate64)
+      case __NR_ftruncate64: /* syscall 194 */
+         /* int ftruncate64(int fd, off64_t length); */
+         MAYBE_PRINTF("ftruncate64 ( %d, %lld )\n", 
+                        arg1,arg2|((long long) arg3 << 32));
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_getdents: /* syscall 141 */
+         /* int getdents(unsigned int fd, struct dirent *dirp, 
+                         unsigned int count); */
+         MAYBE_PRINTF("getdents ( %d, %p, %d )\n",arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getdents(dirp)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0)
+            VG_TRACK( post_mem_write, arg2, res );
+         break;
+
+#     if defined(__NR_getdents64)
+      case __NR_getdents64: /* syscall 220 */
+         /* int getdents(unsigned int fd, struct dirent64 *dirp, 
+                         unsigned int count); */
+         MAYBE_PRINTF("getdents64 ( %d, %p, %d )\n",arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getdents64(dirp)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0)
+            VG_TRACK( post_mem_write, arg2, res );
+         break;
+#     endif
+
+#     if defined(__NR_getgroups32)
+      case __NR_getgroups32: /* syscall 205 */
+#     endif
+      case __NR_getgroups: /* syscall 80 */
+         /* int getgroups(int size, gid_t list[]); */
+         MAYBE_PRINTF("getgroups ( %d, %p )\n", arg1, arg2);
+         if (arg1 > 0)
+            SYSCALL_TRACK( pre_mem_write, tst, "getgroups(list)", arg2, 
+                               arg1 * sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (arg1 > 0 && !VG_(is_kerror)(res) && res > 0)
+            VG_TRACK( post_mem_write, arg2, res * sizeof(gid_t) );
+         break;
+
+      case __NR_getcwd: /* syscall 183 */
+         /* char *getcwd(char *buf, size_t size); */
+         MAYBE_PRINTF("getcwd ( %p, %d )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "getcwd(buf)", arg1, arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res != (Addr)NULL)
+            VG_TRACK( post_mem_write, arg1, arg2 );
+         /* Not really right -- really we should have the asciiz
+            string starting at arg1 readable, or up to arg2 bytes,
+            whichever finishes first. */
+         break;
+
+      case __NR_geteuid: /* syscall 49 */
+         /* uid_t geteuid(void); */
+         MAYBE_PRINTF("geteuid ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_geteuid32)
+      case __NR_geteuid32: /* syscall 201 */
+         /* ?? uid_t geteuid32(void); */
+         MAYBE_PRINTF("geteuid32(?) ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_getegid: /* syscall 50 */
+         /* gid_t getegid(void); */
+         MAYBE_PRINTF("getegid ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_getegid32)
+      case __NR_getegid32: /* syscall 202 */
+         /* gid_t getegid32(void); */
+         MAYBE_PRINTF("getegid32 ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_getgid: /* syscall 47 */
+         /* gid_t getgid(void); */
+         MAYBE_PRINTF("getgid ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_getgid32)
+      case __NR_getgid32: /* syscall 200 */
+         /* gid_t getgid32(void); */
+         MAYBE_PRINTF("getgid32 ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_getpid: /* syscall 20 */
+         /* pid_t getpid(void); */
+         MAYBE_PRINTF("getpid ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_getpgid: /* syscall 132 */
+         /* pid_t getpgid(pid_t pid); */
+         MAYBE_PRINTF("getpgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_getpgrp: /* syscall 65 */
+         /* pid_t getpprp(void); */
+         MAYBE_PRINTF("getpgrp ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_getppid: /* syscall 64 */
+         /* pid_t getppid(void); */
+         MAYBE_PRINTF("getppid ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_getresgid: /* syscall 171 */
+         /* int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); */
+         MAYBE_PRINTF("getresgid ( %p, %p, %p )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid(rgid)", arg1, sizeof(gid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid(egid)", arg2, sizeof(gid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid(sgid)", arg3, sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg1, sizeof(gid_t) );
+            VG_TRACK( post_mem_write, arg2, sizeof(gid_t) );
+            VG_TRACK( post_mem_write, arg3, sizeof(gid_t) );
+         }
+         break;
+
+#     if defined(__NR_getresgid32)
+      case __NR_getresgid32: /* syscall 211 */
+         /* int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); */
+         MAYBE_PRINTF("getresgid32 ( %p, %p, %p )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid32(rgid)", arg1, sizeof(gid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid32(egid)", arg2, sizeof(gid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid32(sgid)", arg3, sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg1, sizeof(gid_t) );
+            VG_TRACK( post_mem_write, arg2, sizeof(gid_t) );
+            VG_TRACK( post_mem_write, arg3, sizeof(gid_t) );
+         }
+         break;
+#     endif
+
+      case __NR_getresuid: /* syscall 165 */
+         /* int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); */
+         MAYBE_PRINTF("getresuid ( %p, %p, %p )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid(ruid)", arg1, sizeof(uid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid(euid)", arg2, sizeof(uid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid(suid)", arg3, sizeof(uid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg1, sizeof(uid_t) );
+            VG_TRACK( post_mem_write, arg2, sizeof(uid_t) );
+            VG_TRACK( post_mem_write, arg3, sizeof(uid_t) );
+         }
+         break;
+
+#     if defined(__NR_getresuid32)
+      case __NR_getresuid32: /* syscall 209 */
+         /* int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); */
+         MAYBE_PRINTF("getresuid32 ( %p, %p, %p )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid32(ruid)", arg1, sizeof(uid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid32(euid)", arg2, sizeof(uid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid32(suid)", arg3, sizeof(uid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg1, sizeof(uid_t) );
+            VG_TRACK( post_mem_write, arg2, sizeof(uid_t) );
+            VG_TRACK( post_mem_write, arg3, sizeof(uid_t) );
+         }
+         break;
+#     endif
+
+#     if defined(__NR_ugetrlimit)
+      case __NR_ugetrlimit: /* syscall 191 */
+#     endif
+      case __NR_getrlimit: /* syscall 76 */
+         /* int getrlimit (int resource, struct rlimit *rlim); */
+         MAYBE_PRINTF("getrlimit ( %d, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "getrlimit(rlim)", arg2, 
+                           sizeof(struct rlimit) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0)
+            VG_TRACK( post_mem_write, arg2, sizeof(struct rlimit) );
+         break;
+
+      case __NR_getrusage: /* syscall 77 */
+         /* int getrusage (int who, struct rusage *usage); */
+         MAYBE_PRINTF("getrusage ( %d, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "getrusage(usage)", arg2, 
+                           sizeof(struct rusage) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0)
+            VG_TRACK( post_mem_write,arg2, sizeof(struct rusage) );
+         break;
+
+      case __NR_gettimeofday: /* syscall 78 */
+         /* int gettimeofday(struct timeval *tv, struct timezone *tz); */
+         MAYBE_PRINTF("gettimeofday ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "gettimeofday(tv)", arg1, 
+                           sizeof(struct timeval) );
+         if (arg2 != 0)
+            SYSCALL_TRACK( pre_mem_write, tst, "gettimeofday(tz)", arg2, 
+                              sizeof(struct timezone) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg1, sizeof(struct timeval) );
+            if (arg2 != 0)
+               VG_TRACK( post_mem_write, arg2, sizeof(struct timezone) );
+         }
+         break;
+
+      case __NR_getuid: /* syscall 24 */
+         /* uid_t getuid(void); */
+         MAYBE_PRINTF("getuid ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_getuid32)
+      case __NR_getuid32: /* syscall 199 */
+         /* ???uid_t getuid32(void); */
+         MAYBE_PRINTF("getuid32 ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_ipc: /* syscall 117 */
+         /* int ipc ( unsigned int call, int first, int second, 
+                      int third, void *ptr, long fifth); */
+         {
+         UInt arg6 = tst->m_ebp;
+
+         MAYBE_PRINTF("ipc ( %d, %d, %d, %d, %p, %d )\n",
+                        arg1,arg2,arg3,arg4,arg5,arg6);
+         switch (arg1 /* call */) {
+            case 1: /* IPCOP_semop */
+               SYSCALL_TRACK( pre_mem_read, tst, "semop(sops)", arg5, 
+                                  arg3 * sizeof(struct sembuf) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case 2: /* IPCOP_semget */
+            case 3: /* IPCOP_semctl */
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case 11: /* IPCOP_msgsnd */
+               {
+                  struct msgbuf *msgp = (struct msgbuf *)arg5;
+                  Int msgsz = arg3;
+
+                  SYSCALL_TRACK( pre_mem_read, tst, "msgsnd(msgp->mtype)", 
+                                     (UInt)&msgp->mtype, sizeof(msgp->mtype) );
+                  SYSCALL_TRACK( pre_mem_read, tst, "msgsnd(msgp->mtext)", 
+                                     (UInt)msgp->mtext, msgsz );
+
+                  KERNEL_DO_SYSCALL(tid,res);
+                  break;
+               }
+            case 12: /* IPCOP_msgrcv */
+               {
+                  struct msgbuf *msgp;
+                  Int msgsz = arg3;
+ 
+                  msgp = (struct msgbuf *)deref_Addr( tst,
+                            (Addr) (&((struct ipc_kludge *)arg5)->msgp),
+                            "msgrcv(msgp)" );
+
+                  SYSCALL_TRACK( pre_mem_write, tst, "msgrcv(msgp->mtype)", 
+                                     (UInt)&msgp->mtype, sizeof(msgp->mtype) );
+                  SYSCALL_TRACK( pre_mem_write, tst, "msgrcv(msgp->mtext)", 
+                                     (UInt)msgp->mtext, msgsz );
+
+                  KERNEL_DO_SYSCALL(tid,res);
+
+                  if ( !VG_(is_kerror)(res) && res > 0 ) {
+                     VG_TRACK( post_mem_write, (UInt)&msgp->mtype, sizeof(msgp->mtype) );
+                     VG_TRACK( post_mem_write, (UInt)msgp->mtext, res );
+                  }
+                  break;
+               }
+            case 13: /* IPCOP_msgget */
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case 14: /* IPCOP_msgctl */
+               {
+                  switch (arg3 /* cmd */) {
+                     case IPC_STAT:
+                        SYSCALL_TRACK( pre_mem_write, tst, "msgctl(buf)", arg5, 
+                                           sizeof(struct msqid_ds) );
+                        KERNEL_DO_SYSCALL(tid,res);
+                        if ( !VG_(is_kerror)(res) && res > 0 ) {
+                           VG_TRACK( post_mem_write, arg5, sizeof(struct msqid_ds) );
+                        }
+                        break;
+                     case IPC_SET:
+                        SYSCALL_TRACK( pre_mem_read, tst, "msgctl(buf)", arg5, 
+                                           sizeof(struct msqid_ds) );
+                        KERNEL_DO_SYSCALL(tid,res);
+                        break;
+#                    if defined(IPC_64)
+                     case IPC_STAT|IPC_64:
+                        SYSCALL_TRACK( pre_mem_write, tst, "msgctl(buf)", arg5, 
+                                           sizeof(struct msqid64_ds) );
+                        KERNEL_DO_SYSCALL(tid,res);
+                        if ( !VG_(is_kerror)(res) && res > 0 ) {
+                           VG_TRACK( post_mem_write, arg5, sizeof(struct msqid64_ds) );
+                        }
+                        break;
+#                    endif
+#                    if defined(IPC_64)
+                     case IPC_SET|IPC_64:
+                        SYSCALL_TRACK( pre_mem_read, tst, "msgctl(buf)", arg5, 
+                                           sizeof(struct msqid64_ds) );
+                        KERNEL_DO_SYSCALL(tid,res);
+                        break;
+#                    endif
+                     default:
+                        KERNEL_DO_SYSCALL(tid,res);
+                        break;
+                  }
+                  break;
+               }
+            case 21: /* IPCOP_shmat */
+               {
+                  Int shmid = arg2;
+                  /*Int shmflag = arg3;*/
+                  Addr addr;
+
+                  KERNEL_DO_SYSCALL(tid,res);
+
+                  if ( VG_(is_kerror) ( res ) )
+                     break;
+                  
+                  /* force readability. before the syscall it is
+                   * indeed uninitialized, as can be seen in
+                   * glibc/sysdeps/unix/sysv/linux/shmat.c */
+                  VG_TRACK( post_mem_write, arg4, sizeof( ULong ) );
+
+                  addr = deref_Addr ( tst, arg4, "shmat(addr)" );
+                  if ( addr > 0 ) { 
+                     UInt segmentSize = get_shm_size ( shmid );
+                     if ( segmentSize > 0 ) {
+                        /* we don't distinguish whether it's read-only or
+                         * read-write -- it doesn't matter really. */
+                        VG_TRACK( post_mem_write, addr, segmentSize );
+                     }
+                  }
+                  break;
+               }
+            case 22: /* IPCOP_shmdt */
+                  KERNEL_DO_SYSCALL(tid,res);
+                  /* ### FIXME: this should call make_noaccess on the
+                   * area passed to shmdt. But there's no way to
+                   * figure out the size of the shared memory segment
+                   * just from the address...  Maybe we want to keep a
+                   * copy of the exiting mappings inside valgrind? */
+                  break;
+            case 23: /* IPCOP_shmget */
+                KERNEL_DO_SYSCALL(tid,res);
+                break;
+            case 24: /* IPCOP_shmctl */
+	      /* Subject: shmctl: The True Story
+                    Date: Thu, 9 May 2002 18:07:23 +0100 (BST)
+                    From: Reuben Thomas <rrt@mupsych.org>
+                      To: Julian Seward <jseward@acm.org>
+
+                 1. As you suggested, the syscall subop is in arg1.
+
+                 2. There are a couple more twists, so the arg order
+                    is actually:
+
+                 arg1 syscall subop
+                 arg2 file desc
+                 arg3 shm operation code (can have IPC_64 set)
+                 arg4 0 ??? is arg3-arg4 a 64-bit quantity when IPC_64
+                        is defined?
+                 arg5 pointer to buffer
+
+                 3. With this in mind, I've amended the case as below:
+	      */
+               {
+                  UInt cmd = arg3;
+                  Bool out_arg = False;
+                  if ( arg5 ) {
+#                    if defined(IPC_64)
+                     cmd = cmd & (~IPC_64);
+#                    endif
+                     out_arg = cmd == SHM_STAT || cmd == IPC_STAT;
+                     if ( out_arg )
+                        SYSCALL_TRACK( pre_mem_write, tst, 
+                           "shmctl(SHM_STAT or IPC_STAT,buf)", 
+                           arg5, sizeof(struct shmid_ds) );
+                     else
+                        SYSCALL_TRACK( pre_mem_read, tst, 
+                           "shmctl(SHM_XXXX,buf)", 
+                           arg5, sizeof(struct shmid_ds) );
+                  }
+                  KERNEL_DO_SYSCALL(tid,res);
+                  if ( arg5 && !VG_(is_kerror)(res) && res == 0 && out_arg )
+                          VG_TRACK( post_mem_write, arg5, sizeof(struct shmid_ds) );
+               }
+               break;
+            default:
+               VG_(message)(Vg_DebugMsg,
+                            "FATAL: unhandled syscall(ipc) %d",
+                            arg1 );
+               VG_(panic)("... bye!\n");
+               break; /*NOTREACHED*/
+         }
+         }
+         break;
+
+      case __NR_ioctl: /* syscall 54 */
+         /* int ioctl(int d, int request, ...)
+            [The  "third"  argument  is traditionally char *argp, 
+             and will be so named for this discussion.]
+         */
+         /*
+         VG_(message)(
+            Vg_DebugMsg, 
+            "is an IOCTL,  request = 0x%x,   d = %d,   argp = 0x%x", 
+            arg2,arg1,arg3);
+         */
+         MAYBE_PRINTF("ioctl ( %d, 0x%x, %p )\n",arg1,arg2,arg3);
+         switch (arg2 /* request */) {
+            case TCSETS:
+            case TCSETSW:
+            case TCSETSF:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(TCSET{S,SW,SF})", arg3, 
+                                 VKI_SIZEOF_STRUCT_TERMIOS );
+               KERNEL_DO_SYSCALL(tid,res);
+               break; 
+            case TCGETS:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(TCGETS)", arg3, 
+                                 VKI_SIZEOF_STRUCT_TERMIOS );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, VKI_SIZEOF_STRUCT_TERMIOS );
+               break;
+            case TCSETA:
+            case TCSETAW:
+            case TCSETAF:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(TCSET{A,AW,AF})", arg3,
+                                 VKI_SIZEOF_STRUCT_TERMIO );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case TCGETA:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(TCGETA)", arg3,
+                                 VKI_SIZEOF_STRUCT_TERMIO );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, VKI_SIZEOF_STRUCT_TERMIO );
+               break;
+            case TCSBRK:
+            case TCXONC:
+            case TCSBRKP:
+            case TCFLSH:
+               /* These just take an int by value */
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case TIOCGWINSZ:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(TIOCGWINSZ)", arg3, 
+                                 sizeof(struct winsize) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, sizeof(struct winsize) );
+               break;
+            case TIOCSWINSZ:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(TIOCSWINSZ)", arg3, 
+                                 sizeof(struct winsize) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case TIOCGPGRP:
+               /* Get process group ID for foreground processing group. */
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(TIOCGPGRP)", arg3,
+                                 sizeof(pid_t) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, sizeof(pid_t) );
+               break;
+            case TIOCSPGRP:
+               /* Set a process group ID? */
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(TIOCGPGRP)", arg3,
+                                 sizeof(pid_t) );
+               KERNEL_DO_SYSCALL(tid,res); 
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, sizeof(pid_t) );
+               break;
+            case TIOCGPTN: /* Get Pty Number (of pty-mux device) */
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(TIOCGPTN)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                   VG_TRACK( post_mem_write, arg3, sizeof(int));
+               break;
+            case TIOCSCTTY:
+               /* Just takes an int value.  */
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case TIOCSPTLCK: /* Lock/unlock Pty */
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(TIOCSPTLCK)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case FIONBIO:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(FIONBIO)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case FIOASYNC:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(FIOASYNC)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case FIONREAD:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(FIONREAD)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, sizeof(int) );
+               break;
+
+            /* If you get compilation problems here, change the #if
+               1 to #if 0 and get rid of <scsi/sg.h> in
+               vg_unsafe.h. */
+#       if 1
+            case SG_SET_COMMAND_Q:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(SG_SET_COMMAND_Q)", 
+                                 arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+#           if defined(SG_IO)
+            case SG_IO:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(SG_IO)", arg3, 
+                                 sizeof(struct sg_io_hdr) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct sg_io_hdr));
+               break;
+#           endif /* SG_IO */
+            case SG_GET_SCSI_ID:
+               /* Note: sometimes sg_scsi_id is called sg_scsi_id_t */
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(SG_GET_SCSI_ID)", arg3, 
+                                 sizeof(struct sg_scsi_id) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct sg_scsi_id));
+               break;
+            case SG_SET_RESERVED_SIZE:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(SG_SET_RESERVED_SIZE)", 
+                                 arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case SG_SET_TIMEOUT:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(SG_SET_TIMEOUT)", arg3, 
+                                 sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case SG_GET_RESERVED_SIZE:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(SG_GET_RESERVED_SIZE)", arg3, 
+                                 sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(int));
+               break;
+            case SG_GET_TIMEOUT:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(SG_GET_TIMEOUT)", arg3, 
+                                 sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(int));
+               break;
+            case SG_GET_VERSION_NUM:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(SG_GET_VERSION_NUM)", 
+                                 arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+#       endif
+
+            case IIOCGETCPS:
+               /* In early 2.4 kernels, ISDN_MAX_CHANNELS was only defined
+                * when KERNEL was. I never saw a larger value than 64 though */
+#              ifndef ISDN_MAX_CHANNELS
+#              define ISDN_MAX_CHANNELS 64
+#              endif
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(IIOCGETCPS)", arg3,
+                                 ISDN_MAX_CHANNELS 
+                                 * 2 * sizeof(unsigned long) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, ISDN_MAX_CHANNELS 
+                                        * 2 * sizeof(unsigned long) );
+               break;
+            case IIOCNETGPN:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(IIOCNETGPN)",
+                                 (UInt)&((isdn_net_ioctl_phone *)arg3)->name,
+                                 sizeof(((isdn_net_ioctl_phone *)arg3)->name) );
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(IIOCNETGPN)", arg3,
+                                 sizeof(isdn_net_ioctl_phone) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, sizeof(isdn_net_ioctl_phone) );
+               break;
+
+            /* These all use struct ifreq AFAIK */
+            case SIOCGIFINDEX:
+            case SIOCGIFFLAGS:        /* get flags                    */
+            case SIOCGIFHWADDR:       /* Get hardware address         */
+            case SIOCGIFMTU:          /* get MTU size                 */
+            case SIOCGIFADDR:         /* get PA address               */
+            case SIOCGIFNETMASK:      /* get network PA mask          */
+            case SIOCGIFMETRIC:       /* get metric                   */
+            case SIOCGIFMAP:          /* Get device parameters        */
+            case SIOCGIFTXQLEN:       /* Get the tx queue length      */
+            case SIOCGIFDSTADDR:      /* get remote PA address        */
+            case SIOCGIFBRDADDR:      /* get broadcast PA address     */
+            case SIOCGIFNAME:         /* get iface name               */
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(SIOCGIFINDEX)", arg3, 
+                                sizeof(struct ifreq));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct ifreq));
+               break;
+            case SIOCGIFCONF:         /* get iface list               */
+               /* WAS:
+               SYSCALL_TRACK( pre_mem_write,"ioctl(SIOCGIFCONF)", arg3, 
+                                sizeof(struct ifconf));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct ifconf));
+               */
+               SYSCALL_TRACK( pre_mem_read,tst, "ioctl(SIOCGIFCONF)", arg3, 
+                                sizeof(struct ifconf));
+               if ( arg3 ) {
+                  // TODO len must be readable and writable
+                  // buf pointer only needs to be readable
+                  struct ifconf *ifc = (struct ifconf *) arg3;
+                  SYSCALL_TRACK( pre_mem_write,tst, "ioctl(SIOCGIFCONF).ifc_buf",
+                                   (Addr)(ifc->ifc_buf), (UInt)(ifc->ifc_len) );
+               }
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0 && arg3 ) {
+                  struct ifconf *ifc = (struct ifconf *) arg3;
+                  if (ifc->ifc_buf != NULL)
+                     VG_TRACK( post_mem_write, (Addr)(ifc->ifc_buf), 
+                                     (UInt)(ifc->ifc_len) );
+               }
+               break;
+            case SIOCGSTAMP:
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(SIOCGSTAMP)", arg3, 
+                                sizeof(struct timeval));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct timeval));
+               break;
+            case SIOCGRARP:           /* get RARP table entry         */
+            case SIOCGARP:            /* get ARP table entry          */
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(SIOCGARP)", arg3, 
+                                sizeof(struct arpreq));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct arpreq));
+               break;
+                    
+            case SIOCSIFFLAGS:        /* set flags                    */
+            case SIOCSIFMAP:          /* Set device parameters        */
+            case SIOCSIFTXQLEN:       /* Set the tx queue length      */
+            case SIOCSIFDSTADDR:      /* set remote PA address        */
+            case SIOCSIFBRDADDR:      /* set broadcast PA address     */
+            case SIOCSIFNETMASK:      /* set network PA mask          */
+            case SIOCSIFMETRIC:       /* set metric                   */
+            case SIOCSIFADDR:         /* set PA address               */
+            case SIOCSIFMTU:          /* set MTU size                 */
+            case SIOCSIFHWADDR:       /* set hardware address         */
+               SYSCALL_TRACK( pre_mem_read,tst,"ioctl(SIOCSIFFLAGS)", arg3, 
+                                sizeof(struct ifreq));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            /* Routing table calls.  */
+            case SIOCADDRT:           /* add routing table entry      */
+            case SIOCDELRT:           /* delete routing table entry   */
+               SYSCALL_TRACK( pre_mem_read,tst,"ioctl(SIOCADDRT/DELRT)", arg3, 
+                                sizeof(struct rtentry));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            /* RARP cache control calls. */
+            case SIOCDRARP:           /* delete RARP table entry      */
+            case SIOCSRARP:           /* set RARP table entry         */
+            /* ARP cache control calls. */
+            case SIOCSARP:            /* set ARP table entry          */
+            case SIOCDARP:            /* delete ARP table entry       */
+               SYSCALL_TRACK( pre_mem_read,tst, "ioctl(SIOCSIFFLAGS)", arg3, 
+                                sizeof(struct ifreq));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SIOCSPGRP:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(SIOCSPGRP)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            /* linux/soundcard interface (OSS) */
+            case SNDCTL_SEQ_GETOUTCOUNT:
+            case SNDCTL_SEQ_GETINCOUNT:
+            case SNDCTL_SEQ_PERCMODE:
+            case SNDCTL_SEQ_TESTMIDI:
+            case SNDCTL_SEQ_RESETSAMPLES:
+            case SNDCTL_SEQ_NRSYNTHS:
+            case SNDCTL_SEQ_NRMIDIS:
+            case SNDCTL_SEQ_GETTIME:
+            case SNDCTL_DSP_GETFMTS:
+            case SNDCTL_DSP_GETTRIGGER:
+            case SNDCTL_DSP_GETODELAY:
+#           if defined(SNDCTL_DSP_GETSPDIF)
+            case SNDCTL_DSP_GETSPDIF:
+#           endif
+            case SNDCTL_DSP_GETCAPS:
+            case SOUND_PCM_READ_RATE:
+            case SOUND_PCM_READ_CHANNELS:
+            case SOUND_PCM_READ_BITS:
+            case (SOUND_PCM_READ_BITS|0x40000000): /* what the fuck ? */
+            case SOUND_PCM_READ_FILTER:
+               SYSCALL_TRACK( pre_mem_write,tst,"ioctl(SNDCTL_XXX|SOUND_XXX (SIOR, int))", 
+                                arg3,
+                                sizeof(int));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(int));
+               break;
+            case SNDCTL_SEQ_CTRLRATE:
+            case SNDCTL_DSP_SPEED:
+            case SNDCTL_DSP_STEREO:
+            case SNDCTL_DSP_GETBLKSIZE: 
+            case SNDCTL_DSP_CHANNELS:
+            case SOUND_PCM_WRITE_FILTER:
+            case SNDCTL_DSP_SUBDIVIDE:
+            case SNDCTL_DSP_SETFRAGMENT:
+#           if defined(SNDCTL_DSP_GETCHANNELMASK)
+            case SNDCTL_DSP_GETCHANNELMASK:
+#           endif
+#           if defined(SNDCTL_DSP_BIND_CHANNEL)
+            case SNDCTL_DSP_BIND_CHANNEL:
+#           endif
+            case SNDCTL_TMR_TIMEBASE:
+            case SNDCTL_TMR_TEMPO:
+            case SNDCTL_TMR_SOURCE:
+            case SNDCTL_MIDI_PRETIME:
+            case SNDCTL_MIDI_MPUMODE:
+               SYSCALL_TRACK( pre_mem_read,tst, "ioctl(SNDCTL_XXX|SOUND_XXX "
+                                     "(SIOWR, int))", 
+                                arg3, sizeof(int));
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(SNDCTL_XXX|SOUND_XXX "
+                                     "(SIOWR, int))", 
+                                arg3, sizeof(int));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case SNDCTL_DSP_GETOSPACE:
+            case SNDCTL_DSP_GETISPACE:
+               SYSCALL_TRACK( pre_mem_write,tst, 
+                                "ioctl(SNDCTL_XXX|SOUND_XXX "
+                                "(SIOR, audio_buf_info))", arg3,
+                                sizeof(audio_buf_info));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(audio_buf_info));
+               break;
+            case SNDCTL_DSP_SETTRIGGER:
+               SYSCALL_TRACK( pre_mem_read,tst, "ioctl(SNDCTL_XXX|SOUND_XXX (SIOW, int))", 
+                                arg3, sizeof(int));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            /* Real Time Clock (/dev/rtc) ioctls */
+#           ifndef GLIBC_2_1
+            case RTC_UIE_ON:
+            case RTC_UIE_OFF:
+            case RTC_AIE_ON:
+            case RTC_AIE_OFF:
+            case RTC_PIE_ON:
+            case RTC_PIE_OFF:
+            case RTC_IRQP_SET:
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case RTC_RD_TIME:
+            case RTC_ALM_READ:
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(RTC_RD_TIME/ALM_READ)", arg3,
+                                sizeof(struct rtc_time));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct rtc_time));
+               break;
+            case RTC_ALM_SET:
+               SYSCALL_TRACK( pre_mem_read,tst, "ioctl(RTC_ALM_SET)", arg3,
+                                sizeof(struct rtc_time));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case RTC_IRQP_READ:
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(RTC_IRQP_READ)", arg3,
+                                sizeof(unsigned long));
+               KERNEL_DO_SYSCALL(tid,res);
+               if(!VG_(is_kerror) && res == 0)
+                   VG_TRACK( post_mem_write,arg3, sizeof(unsigned long));
+               break;
+#           endif /* GLIBC_2_1 */
+
+#           ifdef BLKGETSIZE
+            case BLKGETSIZE:
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(BLKGETSIZE)", arg3,
+                                sizeof(unsigned long));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(unsigned long));
+               break;
+#           endif /* BLKGETSIZE */
+
+            /* CD ROM stuff (??)  */
+            case CDROMSUBCHNL:
+                SYSCALL_TRACK( pre_mem_read,tst, "ioctl(CDROMSUBCHNL (cdsc_format, char))",
+                   (int) &(((struct cdrom_subchnl *) arg3)->cdsc_format), 
+                   sizeof(((struct cdrom_subchnl *) arg3)->cdsc_format));
+                SYSCALL_TRACK( pre_mem_write,tst, "ioctl(CDROMSUBCHNL)", arg3, 
+                   sizeof(struct cdrom_subchnl));
+                KERNEL_DO_SYSCALL(tid,res);
+                if (!VG_(is_kerror)(res) && res == 0)
+                   VG_TRACK( post_mem_write,arg3, sizeof(struct cdrom_subchnl));
+                break;
+            case CDROMREADTOCHDR:
+                SYSCALL_TRACK( pre_mem_write,tst, "ioctl(CDROMREADTOCHDR)", arg3, 
+                   sizeof(struct cdrom_tochdr));
+                KERNEL_DO_SYSCALL(tid,res);
+                if (!VG_(is_kerror)(res) && res == 0)
+                   VG_TRACK( post_mem_write,arg3, sizeof(struct cdrom_tochdr));
+                break;
+            case CDROMREADTOCENTRY:
+                 SYSCALL_TRACK( pre_mem_read,tst, "ioctl(CDROMREADTOCENTRY (cdte_format, char))",
+                    (int) &(((struct cdrom_tocentry *) arg3)->cdte_format), 
+                    sizeof(((struct cdrom_tocentry *) arg3)->cdte_format));
+                 SYSCALL_TRACK( pre_mem_read,tst, "ioctl(CDROMREADTOCENTRY (cdte_track, char))",
+                    (int) &(((struct cdrom_tocentry *) arg3)->cdte_track), 
+                    sizeof(((struct cdrom_tocentry *) arg3)->cdte_track));
+                 SYSCALL_TRACK( pre_mem_write,tst, "ioctl(CDROMREADTOCENTRY)", arg3, 
+                    sizeof(struct cdrom_tocentry));
+                 KERNEL_DO_SYSCALL(tid,res);
+                 if (!VG_(is_kerror)(res) && res == 0)
+                    VG_TRACK( post_mem_write,arg3, sizeof(struct cdrom_tochdr));
+                 break;
+            case CDROMPLAYMSF:
+                 SYSCALL_TRACK( pre_mem_read,tst, "ioctl(CDROMPLAYMSF)", arg3, 
+                    sizeof(struct cdrom_msf));
+                 KERNEL_DO_SYSCALL(tid,res);
+                 break;
+            /* We don't have any specific information on it, so
+               try to do something reasonable based on direction and
+               size bits.  The encoding scheme is described in
+               /usr/include/asm/ioctl.h.  
+
+               According to Simon Hausmann, _IOC_READ means the kernel
+               writes a value to the ioctl value passed from the user
+               space and the other way around with _IOC_WRITE. */
+            default: {
+               UInt dir  = _IOC_DIR(arg2);
+               UInt size = _IOC_SIZE(arg2);
+               if (/* size == 0 || */ dir == _IOC_NONE) {
+                  VG_(message)(Vg_UserMsg, 
+                     "Warning: noted but unhandled ioctl 0x%x"
+                     " with no size/direction hints",
+                     arg2); 
+                  VG_(message)(Vg_UserMsg, 
+                     "   This could cause spurious value errors"
+                     " to appear.");
+                  VG_(message)(Vg_UserMsg, 
+                     "   See README_MISSING_SYSCALL_OR_IOCTL for guidance on"
+                     " writing a proper wrapper." );
+               } else {
+                  if ((dir & _IOC_WRITE) && size > 0)
+                     SYSCALL_TRACK( pre_mem_read,tst, "ioctl(generic)", arg3, size);
+                  if ((dir & _IOC_READ) && size > 0)
+                     SYSCALL_TRACK( pre_mem_write,tst, "ioctl(generic)", arg3, size);
+               }
+               KERNEL_DO_SYSCALL(tid,res);
+               if (size > 0 && (dir & _IOC_READ)
+                   && !VG_(is_kerror)(res) && res == 0
+                   && arg3 != (Addr)NULL)
+                  VG_TRACK( post_mem_write,arg3, size);
+               break;
+            }
+         }
+         break;
+
+      case __NR_kill: /* syscall 37 */
+         /* int kill(pid_t pid, int sig); */
+         MAYBE_PRINTF("kill ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_link: /* syscall 9 */
+         /* int link(const char *oldpath, const char *newpath); */
+         MAYBE_PRINTF("link ( %p, %p)\n", arg1, arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "link(oldpath)", arg1);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "link(newpath)", arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_lseek: /* syscall 19 */
+         /* off_t lseek(int fildes, off_t offset, int whence); */
+         MAYBE_PRINTF("lseek ( %d, %d, %d )\n",arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR__llseek: /* syscall 140 */
+         /* int _llseek(unsigned int fd, unsigned long offset_high,       
+                        unsigned long  offset_low, 
+                        loff_t * result, unsigned int whence); */
+         MAYBE_PRINTF("llseek ( %d, 0x%x, 0x%x, %p, %d )\n",
+                        arg1,arg2,arg3,arg4,arg5);
+         SYSCALL_TRACK( pre_mem_write, tst, "llseek(result)", arg4, sizeof(loff_t));
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0)
+            VG_TRACK( post_mem_write, arg4, sizeof(loff_t) );
+         break;
+
+      case __NR_lstat: /* syscall 107 */
+         /* int lstat(const char *file_name, struct stat *buf); */
+         MAYBE_PRINTF("lstat ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "lstat(file_name)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "lstat(buf)", arg2, sizeof(struct stat) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat) );
+         }
+         break;
+
+#     if defined(__NR_lstat64)
+      case __NR_lstat64: /* syscall 196 */
+         /* int lstat64(const char *file_name, struct stat64 *buf); */
+         MAYBE_PRINTF("lstat64 ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "lstat64(file_name)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "lstat64(buf)", arg2, sizeof(struct stat64) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat64) );
+         }
+         break;
+#     endif
+
+      case __NR_mkdir: /* syscall 39 */
+         /* int mkdir(const char *pathname, mode_t mode); */
+         MAYBE_PRINTF("mkdir ( %p, %d )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "mkdir(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_mmap2)
+      case __NR_mmap2: /* syscall 192 */
+         /* My impression is that this is exactly like __NR_mmap 
+            except that all 6 args are passed in regs, rather than in 
+            a memory-block. */
+         /* void* mmap(void *start, size_t length, int prot, 
+                       int flags, int fd, off_t offset); 
+         */
+         if (VG_(clo_trace_syscalls)) {
+            UInt arg6 = tst->m_ebp;
+            VG_(printf)("mmap2 ( %p, %d, %d, %d, %d, %d )\n",
+                        arg1, arg2, arg3, arg4, arg5, arg6 );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            mmap_segment( (Addr)res, arg2, arg3, arg5 );
+         }
+         break;
+#     endif
+
+      case __NR_mmap: /* syscall 90 */
+         /* void* mmap(void *start, size_t length, int prot, 
+                       int flags, int fd, off_t offset); 
+         */
+         SYSCALL_TRACK( pre_mem_read, tst, "mmap(args)", arg1, 6*sizeof(UInt) );
+         {
+            UInt* arg_block = (UInt*)arg1;
+            UInt arg6;
+            arg1 = arg_block[0];
+            arg2 = arg_block[1];
+            arg3 = arg_block[2];
+            arg4 = arg_block[3];
+            arg5 = arg_block[4];
+            arg6 = arg_block[5];
+            MAYBE_PRINTF("mmap ( %p, %d, %d, %d, %d, %d )\n",
+                        arg1, arg2, arg3, arg4, arg5, arg6 );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            mmap_segment( (Addr)res, arg2, arg3, arg5 );
+         }
+         break;
+
+      case __NR_mprotect: /* syscall 125 */
+         /* int mprotect(const void *addr, size_t len, int prot); */
+         /* should addr .. addr+len-1 be checked before the call? */
+         MAYBE_PRINTF("mprotect ( %p, %d, %d )\n", arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            mprotect_segment( arg1, arg2, arg3 );
+         }
+         break;
+
+      case __NR_munmap: /* syscall 91 */
+         /* int munmap(void *start, size_t length); */
+         /* should start .. start+length-1 be checked before the call? */
+         MAYBE_PRINTF("munmap ( %p, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            munmap_segment( arg1, arg2 );
+         }
+         break;
+
+      case __NR_nanosleep: /* syscall 162 */
+         /* int nanosleep(const struct timespec *req, struct timespec *rem); */
+         MAYBE_PRINTF("nanosleep ( %p, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read, tst, "nanosleep(req)", arg1, 
+                                              sizeof(struct timespec) );
+         if (arg2 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "nanosleep(rem)", arg2, 
+                               sizeof(struct timespec) );
+         KERNEL_DO_SYSCALL(tid,res);
+         /* Somewhat bogus ... is only written by the kernel if
+            res == -1 && errno == EINTR. */
+         if (!VG_(is_kerror)(res) && arg2 != (UInt)NULL)
+            VG_TRACK( post_mem_write, arg2, sizeof(struct timespec) );
+         break;
+
+      case __NR__newselect: /* syscall 142 */
+         /* int select(int n,  
+                       fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 
+                       struct timeval *timeout);
+         */
+         MAYBE_PRINTF("newselect ( %d, %p, %p, %p, %p )\n",
+                        arg1,arg2,arg3,arg4,arg5);
+         if (arg2 != 0)
+            SYSCALL_TRACK( pre_mem_read, tst, "newselect(readfds)",   
+                              arg2, arg1/8 /* __FD_SETSIZE/8 */ );
+         if (arg3 != 0)
+            SYSCALL_TRACK( pre_mem_read, tst, "newselect(writefds)",  
+                              arg3, arg1/8 /* __FD_SETSIZE/8 */ );
+         if (arg4 != 0)
+            SYSCALL_TRACK( pre_mem_read, tst, "newselect(exceptfds)", 
+                              arg4, arg1/8 /* __FD_SETSIZE/8 */ );
+         if (arg5 != 0)
+            SYSCALL_TRACK( pre_mem_read, tst, "newselect(timeout)", arg5, 
+                              sizeof(struct timeval) );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+         
+      case __NR_open: /* syscall 5 */
+         /* int open(const char *pathname, int flags); */
+         MAYBE_PRINTF("open ( %p(%s), %d ) --> ",arg1,arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "open(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         MAYBE_PRINTF("%d\n",res);
+         break;
+
+      case __NR_pipe: /* syscall 42 */
+         /* int pipe(int filedes[2]); */
+         MAYBE_PRINTF("pipe ( %p ) ...\n", arg1);
+         SYSCALL_TRACK( pre_mem_write, tst, "pipe(filedes)", arg1, 2*sizeof(int) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg1, 2*sizeof(int) );
+         if (VG_(clo_trace_syscalls) && !VG_(is_kerror)(res))
+            VG_(printf)("SYSCALL[%d]       pipe --> (rd %d, wr %d)\n", 
+                        VG_(getpid)(), 
+                        ((UInt*)arg1)[0], ((UInt*)arg1)[1] );
+         break;
+
+      case __NR_poll: /* syscall 168 */
+         /* struct pollfd {
+               int fd;           -- file descriptor
+               short events;     -- requested events
+               short revents;    -- returned events
+            };
+           int poll(struct pollfd *ufds, unsigned int nfds, 
+                                         int timeout) 
+         */
+         MAYBE_PRINTF("poll ( %p, %d, %d )\n",arg1,arg2,arg3);
+         /* In fact some parts of this struct should be readable too.
+            This should be fixed properly. */
+         SYSCALL_TRACK( pre_mem_write, tst, "poll(ufds)", 
+                           arg1, arg2 * sizeof(struct pollfd) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            Int i;
+            struct pollfd * arr = (struct pollfd *)arg1;
+            for (i = 0; i < arg2; i++)
+               VG_TRACK( post_mem_write, (Addr)(&arr[i].revents), sizeof(Short) );
+         }
+         break;
+ 
+      case __NR_readlink: /* syscall 85 */
+         /* int readlink(const char *path, char *buf, size_t bufsiz); */
+         MAYBE_PRINTF("readlink ( %p, %p, %d )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "readlink(path)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "readlink(buf)", arg2,arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            VG_TRACK( post_mem_write, arg2, res );
+         }
+         break;
+
+      case __NR_readv: { /* syscall 145 */
+         /* int readv(int fd, const struct iovec * vector, size_t count); */
+         UInt i;
+         struct iovec * vec;
+         MAYBE_PRINTF("readv ( %d, %p, %d )\n",arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_read, tst, "readv(vector)", 
+                           arg2, arg3 * sizeof(struct iovec) );
+         /* ToDo: don't do any of the following if the vector is invalid */
+         vec = (struct iovec *)arg2;
+         for (i = 0; i < arg3; i++)
+            SYSCALL_TRACK( pre_mem_write, tst, "readv(vector[...])",
+                              (UInt)vec[i].iov_base,vec[i].iov_len );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            /* res holds the number of bytes read. */
+            for (i = 0; i < arg3; i++) {
+               Int nReadThisBuf = vec[i].iov_len;
+               if (nReadThisBuf > res) nReadThisBuf = res;
+               VG_TRACK( post_mem_write, (UInt)vec[i].iov_base, nReadThisBuf );
+               res -= nReadThisBuf;
+               if (res < 0) VG_(panic)("readv: res < 0");
+            }
+         }
+         break;
+      }
+
+      case __NR_rename: /* syscall 38 */
+         /* int rename(const char *oldpath, const char *newpath); */
+         MAYBE_PRINTF("rename ( %p, %p )\n", arg1, arg2 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "rename(oldpath)", arg1 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "rename(newpath)", arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_rmdir: /* syscall 40 */
+         /* int rmdir(const char *pathname); */
+         MAYBE_PRINTF("rmdir ( %p )\n", arg1);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "rmdir(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_sched_setparam: /* syscall 154 */
+         /* int sched_setparam(pid_t pid, const struct sched_param *p); */
+         MAYBE_PRINTF("sched_setparam ( %d, %p )\n", arg1, arg2 );
+         SYSCALL_TRACK( pre_mem_read, tst, "sched_setparam(ptr)",
+                           arg2, sizeof(struct sched_param) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct sched_param) );
+         break;
+
+      case __NR_sched_getparam: /* syscall 155 */
+         /* int sched_getparam(pid_t pid, struct sched_param *p); */
+         MAYBE_PRINTF("sched_getparam ( %d, %p )\n", arg1, arg2 );
+         SYSCALL_TRACK( pre_mem_write, tst, "sched_getparam(ptr)",
+                           arg2, sizeof(struct sched_param) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct sched_param) );
+         break;
+
+      case __NR_sched_yield: /* syscall 158 */
+         /* int sched_yield(void); */
+         MAYBE_PRINTF("sched_yield ()\n" );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_select: /* syscall 82 */
+         /* struct sel_arg_struct {
+              unsigned long n;
+              fd_set *inp, *outp, *exp;
+              struct timeval *tvp;
+            };
+            int old_select(struct sel_arg_struct *arg);
+         */
+         SYSCALL_TRACK( pre_mem_read, tst, "select(args)", arg1, 5*sizeof(UInt) );
+         {
+            UInt* arg_struct = (UInt*)arg1;
+            arg1 = arg_struct[0];
+            arg2 = arg_struct[1];
+            arg3 = arg_struct[2];
+            arg4 = arg_struct[3];
+            arg5 = arg_struct[4];
+
+            MAYBE_PRINTF("select ( %d, %p, %p, %p, %p )\n", 
+                         arg1,arg2,arg3,arg4,arg5);
+            if (arg2 != (Addr)NULL)
+               SYSCALL_TRACK( pre_mem_read, tst, "select(readfds)", arg2, 
+                                          arg1/8 /* __FD_SETSIZE/8 */ );
+            if (arg3 != (Addr)NULL)
+               SYSCALL_TRACK( pre_mem_read, tst, "select(writefds)", arg3, 
+                                          arg1/8 /* __FD_SETSIZE/8 */ );
+            if (arg4 != (Addr)NULL)
+               SYSCALL_TRACK( pre_mem_read, tst, "select(exceptfds)", arg4, 
+                                          arg1/8 /* __FD_SETSIZE/8 */ );
+            if (arg5 != (Addr)NULL)
+               SYSCALL_TRACK( pre_mem_read, tst, "select(timeout)", arg5, 
+                                          sizeof(struct timeval) );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_setitimer: /* syscall 104 */
+         /* setitimer(int which, const struct itimerval *value,
+                                 struct itimerval *ovalue); */
+         MAYBE_PRINTF("setitimer ( %d, %p, %p )\n", arg1,arg2,arg3);
+         if (arg2 != (Addr)NULL)
+            SYSCALL_TRACK( pre_mem_read,tst, "setitimer(value)", 
+                             arg2, sizeof(struct itimerval) );
+         if (arg3 != (Addr)NULL)
+            SYSCALL_TRACK( pre_mem_write,tst, "setitimer(ovalue)", 
+                             arg3, sizeof(struct itimerval));
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg3 != (Addr)NULL) {
+            VG_TRACK( post_mem_write,arg3, sizeof(struct itimerval));
+         }
+         break;
+
+#     if defined(__NR_setfsgid32)
+      case __NR_setfsgid32: /* syscall 216 */
+         /* int setfsgid(uid_t fsgid); */
+         MAYBE_PRINTF("setfsgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setgid32)
+      case __NR_setgid32: /* syscall 214 */
+#     endif
+      case __NR_setgid: /* syscall 46 */
+         /* int setgid(gid_t gid); */
+         MAYBE_PRINTF("setgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_setsid: /* syscall 66 */
+         /* pid_t setsid(void); */
+         MAYBE_PRINTF("setsid ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_setgroups32)
+      case __NR_setgroups32: /* syscall 206 */
+#     endif
+      case __NR_setgroups: /* syscall 81 */
+         /* int setgroups(size_t size, const gid_t *list); */
+         MAYBE_PRINTF("setgroups ( %d, %p )\n", arg1, arg2);
+         if (arg1 > 0)
+            SYSCALL_TRACK( pre_mem_read, tst, "setgroups(list)", arg2, 
+                               arg1 * sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_setpgid: /* syscall 57 */
+         /* int setpgid(pid_t pid, pid_t pgid); */
+         MAYBE_PRINTF("setpgid ( %d, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_setregid32)
+      case __NR_setregid32: /* syscall 204 */
+         /* int setregid(gid_t rgid, gid_t egid); */
+         MAYBE_PRINTF("setregid32(?) ( %d, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setresuid32)
+      case __NR_setresuid32: /* syscall 208 */
+         /* int setresuid(uid_t ruid, uid_t euid, uid_t suid); */
+         MAYBE_PRINTF("setresuid32(?) ( %d, %d, %d )\n", arg1, arg2, arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setreuid32)
+      case __NR_setreuid32: /* syscall 203 */
+#     endif
+      case __NR_setreuid: /* syscall 70 */
+         /* int setreuid(uid_t ruid, uid_t euid); */
+         MAYBE_PRINTF("setreuid ( 0x%x, 0x%x )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_setrlimit: /* syscall 75 */
+         /* int setrlimit (int resource, const struct rlimit *rlim); */
+         MAYBE_PRINTF("setrlimit ( %d, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read, tst, "setrlimit(rlim)", arg2, sizeof(struct rlimit) );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_setuid32)
+      case __NR_setuid32: /* syscall 213 */
+#     endif
+      case __NR_setuid: /* syscall 23 */
+         /* int setuid(uid_t uid); */
+         MAYBE_PRINTF("setuid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_socketcall: /* syscall 102 */
+         /* int socketcall(int call, unsigned long *args); */
+         MAYBE_PRINTF("socketcall ( %d, %p )\n",arg1,arg2);
+         switch (arg1 /* request */) {
+
+            case SYS_SOCKETPAIR:
+               /* int socketpair(int d, int type, int protocol, int sv[2]); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.socketpair(args)", 
+                                 arg2, 4*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_write, tst, "socketcall.socketpair(sv)", 
+                                 ((UInt*)arg2)[3], 2*sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res))
+                  VG_TRACK( post_mem_write, ((UInt*)arg2)[3], 2*sizeof(int) );
+               break;
+
+            case SYS_SOCKET:
+               /* int socket(int domain, int type, int protocol); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.socket(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_BIND:
+               /* int bind(int sockfd, struct sockaddr *my_addr, 
+                           int addrlen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.bind(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               pre_mem_read_sockaddr( tst, "socketcall.bind(my_addr.%s)",
+                  (struct sockaddr *) (((UInt*)arg2)[1]), ((UInt*)arg2)[2]);
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+               
+            case SYS_LISTEN:
+               /* int listen(int s, int backlog); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.listen(args)", 
+                                 arg2, 2*sizeof(Addr) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_ACCEPT: {
+               /* int accept(int s, struct sockaddr *addr, int *addrlen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.accept(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               {
+               Addr addr_p     = ((UInt*)arg2)[1];
+               Addr addrlen_p  = ((UInt*)arg2)[2];
+               buf_and_len_pre_check ( tst, addr_p, addrlen_p,
+                                       "socketcall.accept(addr)",
+                                       "socketcall.accept(addrlen_in)" );
+               KERNEL_DO_SYSCALL(tid,res);
+               buf_and_len_post_check ( tst, res, addr_p, addrlen_p,
+                                        "socketcall.accept(addrlen_out)" );
+               }
+               break;
+            }
+
+            case SYS_SENDTO:
+               /* int sendto(int s, const void *msg, int len, 
+                             unsigned int flags, 
+                             const struct sockaddr *to, int tolen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.sendto(args)", arg2, 
+                                 6*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.sendto(msg)",
+                                 ((UInt*)arg2)[1], /* msg */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               pre_mem_read_sockaddr( tst, "socketcall.sendto(to.%s)",
+                  (struct sockaddr *) (((UInt*)arg2)[4]), ((UInt*)arg2)[5]);
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_SEND:
+               /* int send(int s, const void *msg, size_t len, int flags); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.send(args)", arg2,
+                                 4*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.send(msg)",
+                                 ((UInt*)arg2)[1], /* msg */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_RECVFROM:
+               /* int recvfrom(int s, void *buf, int len, unsigned int flags,
+                               struct sockaddr *from, int *fromlen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.recvfrom(args)", 
+                                 arg2, 6*sizeof(Addr) );
+               {
+               Addr buf_p      = ((UInt*)arg2)[1];
+               Int  len        = ((UInt*)arg2)[2];
+               Addr from_p     = ((UInt*)arg2)[4];
+               Addr fromlen_p  = ((UInt*)arg2)[5];
+
+               SYSCALL_TRACK( pre_mem_write, tst, "socketcall.recvfrom(buf)", 
+                                             buf_p, len );
+               buf_and_len_pre_check ( tst, from_p, fromlen_p, 
+                                       "socketcall.recvfrom(from)",
+                                       "socketcall.recvfrom(fromlen_in)" );
+               KERNEL_DO_SYSCALL(tid,res);
+               buf_and_len_post_check ( tst, res, from_p, fromlen_p,
+                                        "socketcall.recvfrom(fromlen_out)" );
+               if (!VG_(is_kerror)(res))
+                  VG_TRACK( post_mem_write, buf_p, len );
+               }
+               break;
+
+            case SYS_RECV:
+               /* int recv(int s, void *buf, int len, unsigned int flags); */
+               /* man 2 recv says:
+               The  recv call is normally used only on a connected socket
+               (see connect(2)) and is identical to recvfrom with a  NULL
+               from parameter.
+               */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.recv(args)", 
+                                 arg2, 4*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_write, tst, "socketcall.recv(buf)", 
+                                 ((UInt*)arg2)[1], /* buf */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res >= 0 
+                                   && ((UInt*)arg2)[1] != (UInt)NULL) {
+                  VG_TRACK( post_mem_write, ((UInt*)arg2)[1], /* buf */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               }
+               break;
+
+            case SYS_CONNECT:
+               /* int connect(int sockfd, 
+                              struct sockaddr *serv_addr, int addrlen ); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.connect(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.connect(serv_addr.sa_family)",
+                                 ((UInt*)arg2)[1], /* serv_addr */
+                                 sizeof (sa_family_t));
+               pre_mem_read_sockaddr( tst,
+                  "socketcall.connect(serv_addr.%s)",
+                  (struct sockaddr *) (((UInt*)arg2)[1]), ((UInt*)arg2)[2]);
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_SETSOCKOPT:
+               /* int setsockopt(int s, int level, int optname, 
+                                 const void *optval, int optlen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.setsockopt(args)", 
+                                 arg2, 5*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.setsockopt(optval)",
+                                 ((UInt*)arg2)[3], /* optval */
+                                 ((UInt*)arg2)[4]  /* optlen */ );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_GETSOCKOPT:
+               /* int setsockopt(int s, int level, int optname, 
+                                 void *optval, socklen_t *optlen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.getsockopt(args)", 
+                                 arg2, 5*sizeof(Addr) );
+               {
+               Addr optval_p  = ((UInt*)arg2)[3];
+               Addr optlen_p  = ((UInt*)arg2)[4];
+               /* vg_assert(sizeof(socklen_t) == sizeof(UInt)); */
+               buf_and_len_pre_check ( tst, optval_p, optlen_p,
+                                       "socketcall.getsockopt(optval)",
+                                       "socketcall.getsockopt(optlen)" );
+               KERNEL_DO_SYSCALL(tid,res);
+               buf_and_len_post_check ( tst, res, optval_p, optlen_p,
+                                        "socketcall.getsockopt(optlen_out)" );
+               }
+               break;
+
+            case SYS_GETSOCKNAME:
+               /* int getsockname(int s, struct sockaddr* name, int* namelen) */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.getsockname(args)",
+                                            arg2, 3*sizeof(Addr) );
+               {
+               Addr name_p     = ((UInt*)arg2)[1];
+               Addr namelen_p  = ((UInt*)arg2)[2];
+
+               buf_and_len_pre_check ( tst, name_p, namelen_p,
+                                       "socketcall.getsockname(name)",
+                                       "socketcall.getsockname(namelen_in)" );
+               KERNEL_DO_SYSCALL(tid,res);
+               buf_and_len_post_check ( tst, res, name_p, namelen_p,
+                                        "socketcall.getsockname(namelen_out)" );
+               }
+               break;
+
+            case SYS_GETPEERNAME:
+               /* int getpeername(int s, struct sockaddr* name, int* namelen) */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.getpeername(args)",
+                                            arg2, 3*sizeof(Addr) );
+               {
+               Addr name_p     = ((UInt*)arg2)[1];
+               Addr namelen_p  = ((UInt*)arg2)[2];
+               buf_and_len_pre_check ( tst, name_p, namelen_p,
+                                       "socketcall.getpeername(name)",
+                                       "socketcall.getpeername(namelen_in)" );
+               KERNEL_DO_SYSCALL(tid,res);
+               buf_and_len_post_check ( tst, res, name_p, namelen_p,
+                                        "socketcall.getpeername(namelen_out)" );
+               }
+               break;
+
+            case SYS_SHUTDOWN:
+               /* int shutdown(int s, int how); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.shutdown(args)", 
+                                            arg2, 2*sizeof(Addr) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_SENDMSG:
+               {
+                  /* int sendmsg(int s, const struct msghdr *msg, int flags); */
+
+                  /* this causes warnings, and I don't get why. glibc bug?
+                   * (after all it's glibc providing the arguments array)
+                  SYSCALL_TRACK( pre_mem_read, "socketcall.sendmsg(args)", 
+                                     arg2, 3*sizeof(Addr) );
+                  */
+
+                  struct msghdr *msg = (struct msghdr *)((UInt *)arg2)[ 1 ];
+                  msghdr_foreachfield ( tst, msg, pre_mem_read_sendmsg );
+
+                  KERNEL_DO_SYSCALL(tid,res);
+                  break;
+               }
+
+            case SYS_RECVMSG:
+               {
+                  /* int recvmsg(int s, struct msghdr *msg, int flags); */
+
+                  /* this causes warnings, and I don't get why. glibc bug?
+                   * (after all it's glibc providing the arguments array)
+                  SYSCALL_TRACK( pre_mem_read, "socketcall.recvmsg(args)", 
+                                     arg2, 3*sizeof(Addr) );
+                  */
+
+                  struct msghdr *msg = (struct msghdr *)((UInt *)arg2)[ 1 ];
+                  msghdr_foreachfield ( tst, msg, pre_mem_write_recvmsg );
+
+                  KERNEL_DO_SYSCALL(tid,res);
+
+                  if ( !VG_(is_kerror)( res ) )
+                     msghdr_foreachfield( tst, msg, post_mem_write_recvmsg );
+
+                  break;
+               }
+
+            default:
+               VG_(message)(Vg_DebugMsg,"FATAL: unhandled socketcall 0x%x",arg1);
+               VG_(panic)("... bye!\n");
+               break; /*NOTREACHED*/
+         }
+         break;
+
+      case __NR_stat: /* syscall 106 */
+         /* int stat(const char *file_name, struct stat *buf); */
+         MAYBE_PRINTF("stat ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "stat(file_name)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "stat(buf)", arg2, sizeof(struct stat) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat) );
+         break;
+
+      case __NR_statfs: /* syscall 99 */
+         /* int statfs(const char *path, struct statfs *buf); */
+         MAYBE_PRINTF("statfs ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "statfs(path)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "stat(buf)", arg2, sizeof(struct statfs) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct statfs) );
+         break;
+
+      case __NR_symlink: /* syscall 83 */
+         /* int symlink(const char *oldpath, const char *newpath); */
+         MAYBE_PRINTF("symlink ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "symlink(oldpath)", arg1 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "symlink(newpath)", arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break; 
+
+#     if defined(__NR_stat64)
+      case __NR_stat64: /* syscall 195 */
+         /* int stat64(const char *file_name, struct stat64 *buf); */
+         MAYBE_PRINTF("stat64 ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "stat64(file_name)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "stat64(buf)", arg2, sizeof(struct stat64) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat64) );
+         break;
+#     endif
+
+#     if defined(__NR_fstat64)
+      case __NR_fstat64: /* syscall 197 */
+         /* int fstat64(int filedes, struct stat64 *buf); */
+         MAYBE_PRINTF("fstat64 ( %d, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "fstat64(buf)", arg2, sizeof(struct stat64) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat64) );
+         break;
+#     endif
+
+      case __NR_sysinfo: /* syscall 116 */
+         /* int sysinfo(struct sysinfo *info); */
+         MAYBE_PRINTF("sysinfo ( %p )\n",arg1);
+         SYSCALL_TRACK( pre_mem_write, tst, "sysinfo(info)", arg1, sizeof(struct sysinfo) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg1, sizeof(struct sysinfo) );
+         break;
+
+      case __NR_time: /* syscall 13 */
+         /* time_t time(time_t *t); */
+         MAYBE_PRINTF("time ( %p )\n",arg1);
+         if (arg1 != (UInt)NULL) {
+            SYSCALL_TRACK( pre_mem_write, tst, "time", arg1, sizeof(time_t) );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
+            VG_TRACK( post_mem_write, arg1, sizeof(time_t) );
+         }
+         break;
+
+      case __NR_times: /* syscall 43 */
+         /* clock_t times(struct tms *buf); */
+         MAYBE_PRINTF("times ( %p )\n",arg1);
+         SYSCALL_TRACK( pre_mem_write, tst, "times(buf)", arg1, sizeof(struct tms) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
+            VG_TRACK( post_mem_write, arg1, sizeof(struct tms) );
+         }
+         break;
+
+      case __NR_truncate: /* syscall 92 */
+         /* int truncate(const char *path, size_t length); */
+         MAYBE_PRINTF("truncate ( %p, %d )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "truncate(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_umask: /* syscall 60 */
+         /* mode_t umask(mode_t mask); */
+         MAYBE_PRINTF("umask ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_unlink: /* syscall 10 */
+         /* int unlink(const char *pathname) */
+         MAYBE_PRINTF("ulink ( %p )\n",arg1);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "unlink(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_uname: /* syscall 122 */
+         /* int uname(struct utsname *buf); */
+         MAYBE_PRINTF("uname ( %p )\n",arg1);
+         SYSCALL_TRACK( pre_mem_write, tst, "uname(buf)", arg1, sizeof(struct utsname) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
+            VG_TRACK( post_mem_write, arg1, sizeof(struct utsname) );
+         }
+         break;
+
+      case __NR_utime: /* syscall 30 */
+         /* int utime(const char *filename, struct utimbuf *buf); */
+         MAYBE_PRINTF("utime ( %p, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "utime(filename)", arg1 );
+         if (arg2 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_read, tst, "utime(buf)", arg2, 
+                                                 sizeof(struct utimbuf) );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_wait4: /* syscall 114 */
+         /* pid_t wait4(pid_t pid, int *status, int options,
+                        struct rusage *rusage) */
+         MAYBE_PRINTF("wait4 ( %d, %p, %d, %p )\n",
+                      arg1,arg2,arg3,arg4);
+         if (arg2 != (Addr)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "wait4(status)", arg2, sizeof(int) );
+         if (arg4 != (Addr)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "wait4(rusage)", arg4, 
+                              sizeof(struct rusage) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            if (arg2 != (Addr)NULL)
+               VG_TRACK( post_mem_write, arg2, sizeof(int) );
+            if (arg4 != (Addr)NULL)
+               VG_TRACK( post_mem_write, arg4, sizeof(struct rusage) );
+         }
+         break;
+
+      case __NR_writev: { /* syscall 146 */
+         /* int writev(int fd, const struct iovec * vector, size_t count); */
+         UInt i;
+         struct iovec * vec;
+         MAYBE_PRINTF("writev ( %d, %p, %d )\n",arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_read, tst, "writev(vector)", 
+                           arg2, arg3 * sizeof(struct iovec) );
+         /* ToDo: don't do any of the following if the vector is invalid */
+         vec = (struct iovec *)arg2;
+         for (i = 0; i < arg3; i++)
+            SYSCALL_TRACK( pre_mem_read, tst, "writev(vector[...])",
+                              (UInt)vec[i].iov_base,vec[i].iov_len );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+      }
+
+      /*-------------------------- SIGNALS --------------------------*/
+
+      /* Normally set to 1, so that Valgrind's signal-simulation machinery
+         is engaged.  Sometimes useful to disable (set to 0), for
+         debugging purposes, to make clients more deterministic. */
+#     define SIGNAL_SIMULATION 1
+
+      case __NR_sigaltstack: /* syscall 186 */
+         /* int sigaltstack(const stack_t *ss, stack_t *oss); */
+         MAYBE_PRINTF("sigaltstack ( %p, %p )\n",arg1,arg2);
+         if (arg1 != (UInt)NULL) {
+            SYSCALL_TRACK( pre_mem_read, tst, "sigaltstack(ss)", 
+                              arg1, sizeof(vki_kstack_t) );
+         }
+         if (arg2 != (UInt)NULL) {
+            SYSCALL_TRACK( pre_mem_write, tst, "sigaltstack(ss)", 
+                              arg1, sizeof(vki_kstack_t) );
+         }
+#        if SIGNAL_SIMULATION
+         VG_(do__NR_sigaltstack) (tid);
+         res = tst->m_eax;
+#        else
+         KERNEL_DO_SYSCALL(tid,res);
+#        endif
+         if (!VG_(is_kerror)(res) && res == 0 && arg2 != (UInt)NULL)
+            VG_TRACK( post_mem_write, arg2, sizeof(vki_kstack_t));
+         break;
+
+      case __NR_rt_sigaction:
+      case __NR_sigaction:
+         /* int sigaction(int signum, struct k_sigaction *act, 
+                                      struct k_sigaction *oldact); */
+         MAYBE_PRINTF("sigaction ( %d, %p, %p )\n",arg1,arg2,arg3);
+         if (arg2 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_read, tst, "sigaction(act)", 
+                              arg2, sizeof(vki_ksigaction));
+         if (arg3 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "sigaction(oldact)", 
+                              arg3, sizeof(vki_ksigaction));
+         /* We do this one ourselves! */
+#        if SIGNAL_SIMULATION
+         VG_(do__NR_sigaction)(tid);
+         res = tst->m_eax;
+#        else
+         /* debugging signals; when we don't handle them. */
+         KERNEL_DO_SYSCALL(tid,res);
+#        endif
+         if (!VG_(is_kerror)(res) && res == 0 && arg3 != (UInt)NULL)
+            VG_TRACK( post_mem_write, arg3, sizeof(vki_ksigaction));
+         break;
+
+      case __NR_rt_sigprocmask:
+      case __NR_sigprocmask:
+         /* int sigprocmask(int how, k_sigset_t *set, 
+                                     k_sigset_t *oldset); */
+         MAYBE_PRINTF("sigprocmask ( %d, %p, %p )\n",arg1,arg2,arg3);
+         if (arg2 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_read, tst, "sigprocmask(set)", 
+                              arg2, sizeof(vki_ksigset_t));
+         if (arg3 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "sigprocmask(oldset)", 
+                              arg3, sizeof(vki_ksigset_t));
+#        if SIGNAL_SIMULATION
+         VG_(do__NR_sigprocmask) ( tid, 
+                                   arg1 /*how*/, 
+                                   (vki_ksigset_t*) arg2,
+                                   (vki_ksigset_t*) arg3 );
+         res = tst->m_eax;
+#        else
+         KERNEL_DO_SYSCALL(tid,res);
+#        endif
+         if (!VG_(is_kerror)(res) && res == 0 && arg3 != (UInt)NULL)
+            VG_TRACK( post_mem_write, arg3, sizeof(vki_ksigset_t));
+         break;
+      case __NR_sigpending: /* syscall 73 */
+#     if defined(__NR_rt_sigpending)
+      case __NR_rt_sigpending: /* syscall 176 */
+#     endif
+         /* int sigpending( sigset_t *set ) ; */
+         MAYBE_PRINTF( "sigpending ( %p )\n", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "sigpending(set)", 
+                           arg1, sizeof(vki_ksigset_t));
+#        if SIGNAL_SIMULATION
+         VG_(do_sigpending)( tid, (vki_ksigset_t*)arg1 );
+         res = 0;
+	 SET_EAX(tid, res);
+#        else
+         KERNEL_DO_SYSCALL(tid, res);
+#        endif
+         if ( !VG_( is_kerror )( res ) && res == 0 )
+            VG_TRACK( post_mem_write, arg1, sizeof( vki_ksigset_t ) ) ;
+         break ;
+
+      default:
+         VG_(message)
+            (Vg_DebugMsg,"FATAL: unhandled syscall: %d",syscallno);
+         VG_(message)
+            (Vg_DebugMsg,"Do not panic.  You may be able to fix this easily.");
+         VG_(message)
+            (Vg_DebugMsg,"Read the file README_MISSING_SYSCALL_OR_IOCTL.");
+         VG_(unimplemented)("no wrapper for the above system call");
+         vg_assert(3+3 == 7);
+         break; /*NOTREACHED*/
+   }
+
+   /* { void zzzmemscan(void); zzzmemscan(); } */
+
+   /* Do any post-syscall actions */
+   if (VG_(needs).syscall_wrapper) {
+      VGP_PUSHCC(VgpSkinSysWrap);
+      SK_(post_syscall)(tid, syscallno, pre_res, res, /*isBlocking*/False);
+      VGP_POPCC(VgpSkinSysWrap);
+   }
+
+   VGP_POPCC(VgpCoreSysWrap);
+}
+
+
+
+/* Perform pre-actions for a blocking syscall, but do not do the
+   syscall itself.
+
+   Because %eax is used both for the syscall number before the call
+   and the result value afterwards, we can't reliably use it to get
+   the syscall number.  So the caller has to pass it explicitly.  
+*/
+void* VG_(pre_known_blocking_syscall) ( ThreadId tid, Int syscallno )
+{
+   ThreadState* tst;
+   UInt         arg1, arg2, arg3;
+   void*        pre_res = 0;
+
+   VGP_PUSHCC(VgpCoreSysWrap);
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   tst              = & VG_(threads)[tid];
+   arg1             = tst->m_ebx;
+   arg2             = tst->m_ecx;
+   arg3             = tst->m_edx;
+   /*
+   arg4             = tst->m_esi;
+   arg5             = tst->m_edi;
+   */
+
+   if (VG_(needs).syscall_wrapper) {
+      VGP_PUSHCC(VgpSkinSysWrap);
+      pre_res = SK_(pre_syscall)(tid, syscallno, /*isBlocking*/True);
+      VGP_POPCC(VgpSkinSysWrap);
+   }
+
+   switch (syscallno) {
+
+      case __NR_read: /* syscall 3 */
+         /* size_t read(int fd, void *buf, size_t count); */
+         MAYBE_PRINTF(
+               "SYSCALL--PRE[%d,%d]       read ( %d, %p, %d )\n", 
+               VG_(getpid)(), tid,
+               arg1, arg2, arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "read(buf)", arg2, arg3 );
+         break;
+
+      case __NR_write: /* syscall 4 */
+         /* size_t write(int fd, const void *buf, size_t count); */
+         MAYBE_PRINTF(
+               "SYSCALL--PRE[%d,%d]       write ( %d, %p, %d )\n", 
+               VG_(getpid)(), tid,
+               arg1, arg2, arg3);
+         SYSCALL_TRACK( pre_mem_read, tst, "write(buf)", arg2, arg3 );
+         break;
+
+      default:
+         VG_(printf)("pre_known_blocking_syscall: unexpected %d\n", syscallno);
+         VG_(panic)("pre_known_blocking_syscall");
+         /*NOTREACHED*/
+         break;
+   }
+   VGP_POPCC(VgpCoreSysWrap);
+
+   return pre_res;      /* 0 if SK_(pre_syscall)() not called */
+}
+
+
+/* Perform post-actions for a blocking syscall, but do not do the
+   syscall itself.  
+
+   Because %eax is used both for the syscall number before the call
+   and the result value afterwards, we can't reliably use it to get
+   the syscall number.  So the caller has to pass it explicitly.  
+*/
+void VG_(post_known_blocking_syscall) ( ThreadId tid,
+                                        Int syscallno,
+                                        void* pre_res,
+                                        Int res )
+{
+   ThreadState* tst;
+   UInt         arg1, arg2, arg3;
+
+   VGP_PUSHCC(VgpCoreSysWrap);
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   tst              = & VG_(threads)[tid];
+   arg1             = tst->m_ebx;
+   arg2             = tst->m_ecx;
+   arg3             = tst->m_edx;
+   /*
+   arg4             = tst->m_esi;
+   arg5             = tst->m_edi;
+   */
+
+   switch (syscallno) {
+
+      case __NR_read: /* syscall 3 */
+         /* size_t read(int fd, void *buf, size_t count); */
+         MAYBE_PRINTF(
+               "SYSCALL-POST[%d,%d]       read ( %d, %p, %d ) --> %d\n", 
+               VG_(getpid)(), tid,
+               arg1, arg2, arg3, res);
+         if (!VG_(is_kerror)(res) && res > 0)
+            VG_TRACK( post_mem_write, arg2, res );
+         break;
+
+      case __NR_write: /* syscall 4 */
+         /* size_t write(int fd, const void *buf, size_t count); */
+         MAYBE_PRINTF(
+               "SYSCALL-POST[%d,%d]       write ( %d, %p, %d ) --> %d\n", 
+               VG_(getpid)(), tid,
+               arg1, arg2, arg3, res);
+         break;
+
+      default:
+         VG_(printf)("post_known_blocking_syscall: unexpected %d\n", 
+                     syscallno);
+         VG_(panic)("post_known_blocking_syscall");
+         /*NOTREACHED*/
+         break;
+   }
+
+   if (VG_(needs).syscall_wrapper) {
+      VGP_PUSHCC(VgpSkinSysWrap);
+      SK_(post_syscall)(tid, syscallno, pre_res, res, /*isBlocking*/True);
+      VGP_POPCC(VgpSkinSysWrap);
+   }
+
+   VGP_POPCC(VgpCoreSysWrap);
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                         vg_syscall_mem.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_to_ucode.c b/coregrind/vg_to_ucode.c
index 179c059..0447d8f 100644
--- a/coregrind/vg_to_ucode.c
+++ b/coregrind/vg_to_ucode.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -40,12 +40,12 @@
 #define uInstr1   VG_(newUInstr1)
 #define uInstr2   VG_(newUInstr2)
 #define uInstr3   VG_(newUInstr3)
-#define dis       VG_(disassemble)
 #define nameIReg  VG_(nameOfIntReg)
 #define nameISize VG_(nameOfIntSize)
 #define newTemp   VG_(getNewTemp)
 #define uLiteral  VG_(setLiteralField)
 
+#define dis       VG_(print_codegen)
 
 /*------------------------------------------------------------*/
 /*--- Here so it can be inlined everywhere.                ---*/
@@ -66,21 +66,6 @@
    return SHADOW(t);
 }
 
-/* Handy predicates. */
-#define SMC_IF_SOME(cb)                              \
-   do {                                              \
-      if (VG_(clo_smc_check) >= VG_CLO_SMC_SOME) {   \
-           LAST_UINSTR((cb)).smc_check = True;       \
-      }                                              \
-   } while (0)
-
-#define SMC_IF_ALL(cb)                               \
-   do {                                              \
-      if (VG_(clo_smc_check) == VG_CLO_SMC_ALL) {    \
-         LAST_UINSTR((cb)).smc_check = True;         \
-      }                                              \
-   } while (0)
-
 
 /*------------------------------------------------------------*/
 /*--- Helper bits and pieces for deconstructing the        ---*/
@@ -818,7 +803,6 @@
       }
       if (keep) {
          uInstr2(cb, STORE, size, TempReg, tmpv, TempReg, tmpa);
-         SMC_IF_ALL(cb);
       }
       if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), 
                            nameIReg(size,gregOfRM(rm)), dis_buf);
@@ -916,7 +900,6 @@
       Int  tmpv = newTemp(cb);
       uInstr2(cb, GET,   size, ArchReg, gregOfRM(rm), TempReg, tmpv);
       uInstr2(cb, STORE, size, TempReg, tmpv, TempReg, tmpa);
-      SMC_IF_SOME(cb);
       if (dis) VG_(printf)("mov%c %s,%s\n", nameISize(size), 
                            nameIReg(size,gregOfRM(rm)), dis_buf);
       return HI8(pair)+eip0;
@@ -1113,7 +1096,6 @@
       }
       if (gregOfRM(modrm) < 7) {
          uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1);
-         SMC_IF_ALL(cb);
       }
       if (dis)
          VG_(printf)("%s%c $0x%x, %s\n",
@@ -1201,7 +1183,6 @@
       }
       setFlagsFromUOpcode(cb, uopc);
       uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1);
-      SMC_IF_ALL(cb);
       if (dis) {
          if (orig_src_tag == Literal)
             VG_(printf)("%s%c $0x%x, %s\n",
@@ -1321,7 +1302,6 @@
       /* Dump the result back, if non-BT. */
       if (gregOfRM(modrm) != 4 /* BT */) {
          uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1);
-         SMC_IF_ALL(cb);
       }
       if (dis)
             VG_(printf)("%s%c $0x%x, %s\n",
@@ -1512,7 +1492,6 @@
             uInstr1(cb, NOT, sz, TempReg, t1);
             setFlagsFromUOpcode(cb, NOT);
             uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             if (dis)
                VG_(printf)("not%c %s\n", nameISize(sz), dis_buf);
             break;
@@ -1520,7 +1499,6 @@
             uInstr1(cb, NEG, sz, TempReg, t1);
             setFlagsFromUOpcode(cb, NEG);
             uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             if (dis)
                VG_(printf)("neg%c %s\n", nameISize(sz), dis_buf);
             break;
@@ -1595,13 +1573,11 @@
             uInstr1(cb, INC, 1, TempReg, t1);
             setFlagsFromUOpcode(cb, INC);
             uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             break;
          case 1: /* DEC */
             uInstr1(cb, DEC, 1, TempReg, t1);
             setFlagsFromUOpcode(cb, DEC);
             uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             break;
          default: 
             VG_(printf)(
@@ -1650,7 +1626,6 @@
             uInstr2(cb, MOV,   4, Literal, 0,     TempReg, t4);
 	    uLiteral(cb, eip+1);
             uInstr2(cb, STORE, 4, TempReg, t4,    TempReg, t3);
-            SMC_IF_ALL(cb);
             uInstr1(cb, JMP,   0, TempReg, t1);
             uCond(cb, CondAlways);
             LAST_UINSTR(cb).jmpkind = JmpCall;
@@ -1680,13 +1655,11 @@
             uInstr1(cb, INC, sz, TempReg, t1);
             setFlagsFromUOpcode(cb, INC);
             uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             break;
          case 1: /* DEC */
             uInstr1(cb, DEC, sz, TempReg, t1);
             setFlagsFromUOpcode(cb, DEC);
             uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             break;
          case 2: /* call Ev */
             t3 = newTemp(cb); t4 = newTemp(cb);
@@ -1697,7 +1670,6 @@
             uInstr2(cb, MOV,   4, Literal, 0,     TempReg, t4);
 	         uLiteral(cb, eip+HI8(pair));
             uInstr2(cb, STORE, 4, TempReg, t4,    TempReg, t3);
-            SMC_IF_ALL(cb);
             uInstr1(cb, JMP,   0, TempReg, t1);
             uCond(cb, CondAlways);
             LAST_UINSTR(cb).jmpkind = JmpCall;
@@ -1715,7 +1687,6 @@
 	    uLiteral(cb, sz);
             uInstr2(cb, PUT,    4, TempReg, t3,    ArchReg, R_ESP);
             uInstr2(cb, STORE, sz, TempReg, t1,    TempReg, t3);
-            SMC_IF_ALL(cb);
             break;
          default: 
             VG_(printf)(
@@ -1864,7 +1835,6 @@
 
    uInstr2(cb, LOAD,  sz, TempReg, ts,    TempReg, tv);
    uInstr2(cb, STORE, sz, TempReg, tv,    TempReg, td);
-   SMC_IF_SOME(cb);
 
    uInstr0(cb, CALLM_S, 0);
    uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tv);
@@ -1912,7 +1882,6 @@
    uInstr2(cb, GET,   sz, ArchReg, R_EAX, TempReg, ta);
    uInstr2(cb, GET,    4, ArchReg, R_EDI, TempReg, td);
    uInstr2(cb, STORE, sz, TempReg, ta,    TempReg, td);
-   SMC_IF_SOME(cb);
 
    uInstr0(cb, CALLM_S, 0);
    uInstr2(cb, MOV,   4, Literal, 0,     TempReg, ta);
@@ -1996,7 +1965,6 @@
 
    uInstr2(cb, LOAD,  sz, TempReg, ts,    TempReg, tv);
    uInstr2(cb, STORE, sz, TempReg, tv,    TempReg, td);
-   SMC_IF_SOME(cb);
 
    uInstr0(cb, CALLM_S, 0);
    uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tv);
@@ -2032,7 +2000,6 @@
    uInstr2(cb, GET,   sz, ArchReg, R_EAX, TempReg, ta);
    uInstr2(cb, GET,    4, ArchReg, R_EDI, TempReg, td);
    uInstr2(cb, STORE, sz, TempReg, ta,    TempReg, td);
-   SMC_IF_SOME(cb);
 
    uInstr0(cb, CALLM_S, 0);
    uInstr2(cb, MOV,   4, Literal, 0,     TempReg, ta);
@@ -2269,7 +2236,6 @@
                Lit16, 
                (((UShort)first_byte) << 8) | ((UShort)second_byte),
                TempReg, ta);
-   if (is_write) SMC_IF_ALL(cb);
    if (dis) {
       if (is_write)
          VG_(printf)("fpu_w_%d 0x%x:0x%x, %s\n",
@@ -2485,7 +2451,13 @@
                return dis_fpu_mem(cb, 8, rd, eip, first_byte); 
             case 2: /* FST double-real */
             case 3: /* FSTP double-real */
-               return dis_fpu_mem(cb, 8, wr, eip, first_byte); 
+               return dis_fpu_mem(cb, 8, wr, eip, first_byte);
+            case 4: /* FRSTOR */
+               return dis_fpu_mem(cb, 108, rd, eip, first_byte);
+            case 6: /* FSAVE */
+               return dis_fpu_mem(cb, 108, wr, eip, first_byte);
+            case 7: /* FSTSW */
+               return dis_fpu_mem(cb, 2, wr, eip, first_byte);
             default: 
                goto unhandled;
          }
@@ -2585,7 +2557,6 @@
       uFlagsRWU(cb, FlagsEmpty, FlagsOSZACP, FlagsEmpty);
       uInstr1(cb, POP,   sz, TempReg, t);
       uInstr2(cb, STORE, sz, TempReg, t,      TempReg, ta);
-      SMC_IF_ALL(cb);
       if (dis)
          VG_(printf)("shld%c %%cl, %s, %s\n",
                      nameISize(sz), nameIReg(sz, gregOfRM(modrm)), 
@@ -3010,7 +2981,6 @@
       uInstr2(cb,  ADD, sz, TempReg, tmpd, TempReg, tmpt);
       setFlagsFromUOpcode(cb, ADD);
       uInstr2(cb, STORE, sz, TempReg, tmpt, TempReg, tmpa);
-      SMC_IF_SOME(cb);
       uInstr2(cb, PUT, sz, TempReg, tmpd, ArchReg, gregOfRM(rm));
       if (dis)
          VG_(printf)("xadd%c %s, %s\n", nameISize(sz), 
@@ -3167,7 +3137,6 @@
          uInstr2(cb, MOV,   4, Literal, 0,     TempReg, t2);
 	 uLiteral(cb, eip);
          uInstr2(cb, STORE, 4, TempReg, t2,    TempReg, t1);
-         SMC_IF_ALL(cb);
          uInstr1(cb, JMP,   0, Literal, 0);
 	 uLiteral(cb, d32);
          uCond(cb, CondAlways);
@@ -3472,7 +3441,6 @@
       uInstr2(cb, MOV,    4, Literal, 0,     TempReg, t2);
       uLiteral(cb, d32);
       uInstr2(cb, STORE, sz, TempReg, t1,    TempReg, t2);
-      SMC_IF_SOME(cb);
       if (dis) VG_(printf)("mov%c %s,0x%x\n", nameISize(sz), 
                            nameIReg(sz,R_EAX), d32);
       break;
@@ -3535,7 +3503,6 @@
          uInstr2(cb, MOV, sz, Literal, 0, TempReg, t1);
 	 uLiteral(cb, d32);
          uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
-         SMC_IF_SOME(cb);
          if (dis) VG_(printf)("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
       }
       break;
@@ -3556,6 +3523,10 @@
       eip = dis_op_imm_A(cb, sz, OR, True, eip, "or" );
       break;
 
+   case 0x15: /* ADC Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, ADC, True, eip, "adc" );
+      break;
+
    case 0x1C: /* SBB Ib, AL */
       eip = dis_op_imm_A(cb, 1, SBB, True, eip, "sbb" );
       break;
@@ -3718,40 +3689,7 @@
    case 0x5D: /* POP eBP */
    case 0x5E: /* POP eSI */
    case 0x5F: /* POP eDI */
-    { Int   n_pops;
-      Addr  eipS, eipE;
-      UChar ch;
-      if (sz != 4)           goto normal_pop_case;
-      if (VG_(clo_cachesim)) goto normal_pop_case;
-      /* eip points at first pop insn + 1.  Make eipS and eipE
-         bracket the sequence. */
-      eipE = eipS = eip - 1;
-      while (True) { 
-         ch = getUChar(eipE+1);
-         if (ch < 0x58 || ch > 0x5F || ch == 0x5C) break;
-         eipE++;
-      }
-      n_pops = eipE - eipS + 1;
-      if (0 && n_pops > 1) VG_(printf)("%d pops\n", n_pops);
-      t1 = newTemp(cb); t3 = newTemp(cb);
-      uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t1);
-      for (; eipS <= eipE; eipS++) {
-         ch = getUChar(eipS);
-	 uInstr2(cb, LOAD, 4, TempReg, t1, TempReg, t3);
-         uInstr2(cb, PUT,  4, TempReg, t3, ArchReg, ch-0x58);
-         uInstr2(cb, ADD,  4, Literal, 0,        TempReg, t1);
-         uLiteral(cb, 4);
-         SMC_IF_ALL(cb);
-         if (dis) 
-            VG_(printf)("popl %s\n", nameIReg(4,ch-0x58));
-      }
-      uInstr2(cb, PUT,    4, TempReg, t1,       ArchReg, R_ESP);
-      eip = eipE + 1;
-      break;
-    }
-
    case 0x5C: /* POP eSP */
-   normal_pop_case:
       t1 = newTemp(cb); t2 = newTemp(cb);
       uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t2);
       uInstr2(cb, LOAD,  sz, TempReg, t2,       TempReg, t1);
@@ -3863,43 +3801,7 @@
    case 0x55: /* PUSH eBP */
    case 0x56: /* PUSH eSI */
    case 0x57: /* PUSH eDI */
-    { Int   n_pushes;
-      Addr  eipS, eipE;
-      UChar ch;
-      if (sz != 4)           goto normal_push_case;
-      if (VG_(clo_cachesim)) goto normal_push_case;
-      /* eip points at first push insn + 1.  Make eipS and eipE
-         bracket the sequence. */
-      eipE = eipS = eip - 1;
-      while (True) { 
-         ch = getUChar(eipE+1);
-         if (ch < 0x50 || ch > 0x57 || ch == 0x54) break;
-         eipE++;
-      }
-      n_pushes = eipE - eipS + 1;
-      if (0 && n_pushes > 1) VG_(printf)("%d pushes\n", n_pushes);
-      t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb);
-      uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t1);
-      uInstr2(cb, MOV,    4, TempReg, t1,       TempReg, t2);
-      uInstr2(cb, SUB,    4, Literal, 0,        TempReg, t2);
-      uLiteral(cb, 4 * n_pushes);
-      uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
-      for (; eipS <= eipE; eipS++) {
-         ch = getUChar(eipS);
-         uInstr2(cb, SUB,    4, Literal, 0,        TempReg, t1);
-         uLiteral(cb, 4);
-         uInstr2(cb, GET, 4, ArchReg, ch-0x50, TempReg, t3);
-	 uInstr2(cb, STORE, 4, TempReg, t3, TempReg, t1);
-         SMC_IF_ALL(cb);
-         if (dis) 
-            VG_(printf)("pushl %s\n", nameIReg(4,ch-0x50));
-      }
-      eip = eipE + 1;
-      break;
-    }
-
    case 0x54: /* PUSH eSP */
-   normal_push_case:
       /* This is the Right Way, in that the value to be pushed is
          established before %esp is changed, so that pushl %esp
          correctly pushes the old value. */
@@ -3911,7 +3813,6 @@
       uLiteral(cb, sz);
       uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
       uInstr2(cb, STORE, sz, TempReg, t1,       TempReg, t2);
-      SMC_IF_ALL(cb);
       if (dis) 
          VG_(printf)("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50));
       break;
@@ -3931,7 +3832,6 @@
       uInstr2(cb, MOV,   sz, Literal, 0,     TempReg, t2);
       uLiteral(cb, d32);
       uInstr2(cb, STORE, sz, TempReg, t2,    TempReg, t1);
-      SMC_IF_ALL(cb);
       if (dis) 
          VG_(printf)("push%c $0x%x\n", nameISize(sz), d32);
       break;
@@ -3948,7 +3848,6 @@
       uLiteral(cb, sz);
       uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
       uInstr2(cb, STORE, sz, TempReg, t1,       TempReg, t2);
-      SMC_IF_ALL(cb);
       if (dis) 
          VG_(printf)("pushf%c\n", nameISize(sz));
       break;
@@ -3980,20 +3879,17 @@
          uInstr2(cb, SUB,    4, Literal,   0, TempReg, t2);
          uLiteral(cb, sz);
          uInstr2(cb, STORE, sz, TempReg,  t1, TempReg, t2);
-         SMC_IF_ALL(cb);
       }
       /* Push old value of %esp */
       uInstr2(cb, SUB,    4, Literal,   0, TempReg, t2);
       uLiteral(cb, sz);
       uInstr2(cb, STORE, sz, TempReg,  t3, TempReg, t2);
-      SMC_IF_ALL(cb);
       /* Do %ebp, %esi, %edi */
       for (reg = 5; reg <= 7; reg++) {
          uInstr2(cb, GET,   sz, ArchReg, reg, TempReg, t1);
          uInstr2(cb, SUB,    4, Literal,   0, TempReg, t2);
          uLiteral(cb, sz);
          uInstr2(cb, STORE, sz, TempReg,  t1, TempReg, t2);
-         SMC_IF_ALL(cb);
       }
       if (dis)
          VG_(printf)("pusha%c\n", nameISize(sz));
@@ -4149,7 +4045,6 @@
          uInstr2(cb, LOAD, sz, TempReg, t3, TempReg, t1);
          uInstr2(cb, GET, sz, ArchReg, gregOfRM(modrm), TempReg, t2);
          uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t3);
-         SMC_IF_SOME(cb);
          uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, gregOfRM(modrm));
          eip += HI8(pair);
          if (dis)
@@ -4231,6 +4126,14 @@
       eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, Literal, d32 );
       break;
 
+   case 0xD2: /* Grp2 CL,Eb */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = 0;
+      sz    = 1;
+      eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, ArchReg, R_ECX );
+      break;
+
    case 0xD3: /* Grp2 CL,Ev */
       modrm = getUChar(eip);
       am_sz = lengthAMode(eip);
@@ -4499,7 +4402,6 @@
             uCond(cb, (Condcode)(opc-0x90));
             uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
             uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             if (dis) VG_(printf)("set%s %s\n", 
                                  VG_(nameCondcode)(opc-0x90), 
                                  dis_buf);
@@ -4568,10 +4470,11 @@
    if (dis)
       VG_(printf)("\n");
    for (; first_uinstr < cb->used; first_uinstr++) {
-      Bool sane = VG_(saneUInstr)(True, &cb->instrs[first_uinstr]);
-      if (dis || !sane) 
-         VG_(ppUInstr)(sane ? first_uinstr : -1,
-                       &cb->instrs[first_uinstr]);
+      Bool sane = VG_(saneUInstr)(True, True, &cb->instrs[first_uinstr]);
+      if (dis) 
+         VG_(ppUInstr)(first_uinstr, &cb->instrs[first_uinstr]);
+      else if (!sane)
+         VG_(upUInstr)(-1, &cb->instrs[first_uinstr]);
       vg_assert(sane);
    }
 
@@ -4588,28 +4491,17 @@
    Addr eip   = eip0;
    Bool isEnd = False;
    Bool block_sane;
-   Int INCEIP_allowed_lag = 4;
    Int delta = 0;
 
-   if (dis) VG_(printf)("\n");
+   if (dis) VG_(printf)("Original x86 code to UCode:\n\n");
 
-   /* When cache simulating, to ensure cache misses are attributed to the
-    * correct line we ensure EIP is always correct.   This is done by:
+   /* After every x86 instruction do an INCEIP, except for the final one
+    * in the basic block.  For them we patch in the x86 instruction size 
+    * into the `extra4b' field of the basic-block-ending JMP. 
     *
-    * a) Using eager INCEIP updating to cope with all instructions except those
-    *    at the end of a basic block.
-    *
-    * b) Patching in the size of the original x86 instr in the `extra4b' field
-    *    of JMPs at the end of a basic block.  Two cases:
-    *       - Jcond followed by Juncond:  patch the Jcond
-    *       - Juncond alone:              patch the Juncond
-    *
-    * See vg_cachesim_instrument() for how this is used. 
+    * The INCEIPs and JMP.extra4b fields allows a skin to track x86
+    * instruction sizes, important for some skins (eg. cache simulation).
     */
-   if (VG_(clo_cachesim)) {
-       INCEIP_allowed_lag = 0;
-   }
-
    if (VG_(clo_single_step)) {
       eip = disInstr ( cb, eip, &isEnd );
 
@@ -4620,15 +4512,17 @@
          uInstr1(cb, JMP, 0, Literal, 0);
          uLiteral(cb, eip);
          uCond(cb, CondAlways);
+         /* Print added JMP */
          if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]);
       }
+      if (dis) VG_(printf)("\n");
       delta = eip - eip0;
 
    } else {
       Addr eip2;
       while (!isEnd) {
          eip2 = disInstr ( cb, eip, &isEnd );
-         delta += (eip2 - eip);
+         delta = (eip2 - eip);
          eip = eip2;
          /* Split up giant basic blocks into pieces, so the
             translations fall within 64k. */
@@ -4639,27 +4533,23 @@
             uInstr1(cb, JMP, 0, Literal, 0);
             uLiteral(cb, eip);
             uCond(cb, CondAlways);
+            /* Print added JMP */
             if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]);
             isEnd = True;
 
-         } else if (delta > INCEIP_allowed_lag && !isEnd) {
+         } else if (!isEnd) {
             uInstr1(cb, INCEIP, 0, Lit16, delta);
+            /* Print added INCEIP */
             if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]);
-            delta = 0;
          }
          if (dis) VG_(printf)("\n");
       }
    }
-   if (VG_(clo_cachesim)) {
-      /* Patch instruction size into earliest JMP. */
-      if (cb->used >= 2 && JMP == cb->instrs[cb->used - 2].opcode) {
-         cb->instrs[cb->used - 2].extra4b = delta;
-      } else {
-         LAST_UINSTR(cb).extra4b = delta;
-      }
-   }
 
-   block_sane = VG_(saneUCodeBlock)(cb);
+   /* Patch instruction size into final JMP. */
+   LAST_UINSTR(cb).extra4b = delta;
+
+   block_sane = VG_(saneUCodeBlockCalls)(cb);
    if (!block_sane) {
       VG_(ppUCodeBlock)(cb, "block failing sanity check");
       vg_assert(block_sane);
@@ -4668,6 +4558,7 @@
    return eip - eip0;
 }
 
+#undef dis
 
 /*--------------------------------------------------------------------*/
 /*--- end                                            vg_to_ucode.c ---*/
diff --git a/coregrind/vg_translate.c b/coregrind/vg_translate.c
index 68d9faf..cd52c65 100644
--- a/coregrind/vg_translate.c
+++ b/coregrind/vg_translate.c
@@ -26,79 +26,20 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
 
-
 /*------------------------------------------------------------*/
 /*--- Renamings of frequently-used global functions.       ---*/
 /*------------------------------------------------------------*/
 
-#define uInstr1   VG_(newUInstr1)
 #define uInstr2   VG_(newUInstr2)
-#define uInstr3   VG_(newUInstr3)
-#define dis       VG_(disassemble)
 #define nameIReg  VG_(nameOfIntReg)
 #define nameISize VG_(nameOfIntSize)
-#define uLiteral  VG_(setLiteralField)
-#define newTemp   VG_(getNewTemp)
-#define newShadow VG_(getNewShadow)
 
-
-/*------------------------------------------------------------*/
-/*--- Memory management for the translater.                ---*/
-/*------------------------------------------------------------*/
-
-#define N_JITBLOCKS    4
-#define N_JITBLOCK_SZ  5000
-
-static UChar jitstorage[N_JITBLOCKS][N_JITBLOCK_SZ];
-static Bool  jitstorage_inuse[N_JITBLOCKS];
-static Bool  jitstorage_initdone = False;
-
-static __inline__ void jitstorage_initialise ( void )
-{
-   Int i;
-   if (jitstorage_initdone) return;
-   jitstorage_initdone = True;
-   for (i = 0; i < N_JITBLOCKS; i++)
-      jitstorage_inuse[i] = False; 
-}
-
-void* VG_(jitmalloc) ( Int nbytes )
-{
-   Int i;
-   jitstorage_initialise();
-   if (nbytes > N_JITBLOCK_SZ) {
-      /* VG_(printf)("too large: %d\n", nbytes); */
-      return VG_(malloc)(VG_AR_PRIVATE, nbytes);
-   }
-   for (i = 0; i < N_JITBLOCKS; i++) {
-      if (!jitstorage_inuse[i]) {
-         jitstorage_inuse[i] = True;
-         /* VG_(printf)("alloc %d -> %d\n", nbytes, i ); */
-         return & jitstorage[i][0];
-      }
-   }
-   VG_(panic)("out of slots in vg_jitmalloc\n");
-   return VG_(malloc)(VG_AR_PRIVATE, nbytes);
-}
-
-void VG_(jitfree) ( void* ptr )
-{
-   Int i;
-   jitstorage_initialise();
-   for (i = 0; i < N_JITBLOCKS; i++) {
-      if (ptr == & jitstorage[i][0]) {
-         vg_assert(jitstorage_inuse[i]);
-         jitstorage_inuse[i] = False;
-         return;
-      }
-   }
-   VG_(free)(VG_AR_PRIVATE, ptr);
-}
+#define dis       VG_(print_codegen)
 
 /*------------------------------------------------------------*/
 /*--- Basics                                               ---*/
@@ -106,7 +47,7 @@
 
 UCodeBlock* VG_(allocCodeBlock) ( void )
 {
-   UCodeBlock* cb = VG_(malloc)(VG_AR_PRIVATE, sizeof(UCodeBlock));
+   UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
    cb->used = cb->size = cb->nextTemp = 0;
    cb->instrs = NULL;
    return cb;
@@ -115,8 +56,8 @@
 
 void VG_(freeCodeBlock) ( UCodeBlock* cb )
 {
-   if (cb->instrs) VG_(free)(VG_AR_PRIVATE, cb->instrs);
-   VG_(free)(VG_AR_PRIVATE, cb);
+   if (cb->instrs) VG_(arena_free)(VG_AR_CORE, cb->instrs);
+   VG_(arena_free)(VG_AR_CORE, cb);
 }
 
 
@@ -129,15 +70,15 @@
          vg_assert(cb->size == 0);
          vg_assert(cb->used == 0);
          cb->size = 8;
-         cb->instrs = VG_(malloc)(VG_AR_PRIVATE, 8 * sizeof(UInstr));
+         cb->instrs = VG_(arena_malloc)(VG_AR_CORE, 8 * sizeof(UInstr));
       } else {
          Int i;
-         UInstr* instrs2 = VG_(malloc)(VG_AR_PRIVATE, 
+         UInstr* instrs2 = VG_(arena_malloc)(VG_AR_CORE, 
                                        2 * sizeof(UInstr) * cb->size);
          for (i = 0; i < cb->used; i++)
             instrs2[i] = cb->instrs[i];
          cb->size *= 2;
-         VG_(free)(VG_AR_PRIVATE, cb->instrs);
+         VG_(arena_free)(VG_AR_CORE, cb->instrs);
          cb->instrs = instrs2;
       }
    }
@@ -147,18 +88,20 @@
 
 
 __inline__ 
-void VG_(emptyUInstr) ( UInstr* u )
+void VG_(newNOP) ( UInstr* u )
 {
    u->val1 = u->val2 = u->val3 = 0;
    u->tag1 = u->tag2 = u->tag3 = NoValue;
    u->flags_r = u->flags_w = FlagsEmpty;
    u->jmpkind = JmpBoring;
-   u->smc_check = u->signed_widen = False;
+   u->signed_widen = u->has_ret_val = False;
+   u->regs_live_after = ALL_RREGS_LIVE;
    u->lit32    = 0;
-   u->opcode   = 0;
+   u->opcode   = NOP;
    u->size     = 0;
    u->cond     = 0;
    u->extra4b  = 0;
+   u->argc = u->regparms_n = 0;
 }
 
 
@@ -174,7 +117,7 @@
    ensureUInstr(cb);
    ui = & cb->instrs[cb->used];
    cb->used++;
-   VG_(emptyUInstr)(ui);
+   VG_(newNOP)(ui);
    ui->val1   = val1;
    ui->val2   = val2;
    ui->val3   = val3;
@@ -198,7 +141,7 @@
    ensureUInstr(cb);
    ui = & cb->instrs[cb->used];
    cb->used++;
-   VG_(emptyUInstr)(ui);
+   VG_(newNOP)(ui);
    ui->val1   = val1;
    ui->val2   = val2;
    ui->opcode = opcode;
@@ -218,7 +161,7 @@
    ensureUInstr(cb);
    ui = & cb->instrs[cb->used];
    cb->used++;
-   VG_(emptyUInstr)(ui);
+   VG_(newNOP)(ui);
    ui->val1   = val1;
    ui->opcode = opcode;
    ui->tag1   = tag1;
@@ -234,7 +177,7 @@
    ensureUInstr(cb);
    ui = & cb->instrs[cb->used];
    cb->used++;
-   VG_(emptyUInstr)(ui);
+   VG_(newNOP)(ui);
    ui->opcode = opcode;
    ui->size   = sz;
 }
@@ -252,13 +195,16 @@
 static __inline__ 
 void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
 {
-   dst->cond          = src->cond;
-   dst->extra4b       = src->extra4b;
-   dst->smc_check     = src->smc_check;
-   dst->signed_widen  = src->signed_widen;
-   dst->jmpkind       = src->jmpkind;
-   dst->flags_r       = src->flags_r;
-   dst->flags_w       = src->flags_w;
+   dst->cond            = src->cond;
+   dst->extra4b         = src->extra4b;
+   dst->signed_widen    = src->signed_widen;
+   dst->jmpkind         = src->jmpkind;
+   dst->flags_r         = src->flags_r;
+   dst->flags_w         = src->flags_w;
+   dst->argc            = src->argc;
+   dst->regparms_n      = src->regparms_n;
+   dst->has_ret_val     = src->has_ret_val;
+   dst->regs_live_after = src->regs_live_after;
 }
 
 
@@ -280,44 +226,85 @@
 }
 
 
+/* Set the C call info fields of the most recent uinsn. */
+void  VG_(setCCallFields) ( UCodeBlock* cb, Addr fn, UChar argc, UChar
+                            regparms_n, Bool has_ret_val )
+{
+   vg_assert(argc       <  4);
+   vg_assert(regparms_n <= argc);
+   LAST_UINSTR(cb).lit32       = fn;
+   LAST_UINSTR(cb).argc        = argc;
+   LAST_UINSTR(cb).regparms_n  = regparms_n;
+   LAST_UINSTR(cb).has_ret_val = has_ret_val;
+}
+
 Bool VG_(anyFlagUse) ( UInstr* u )
 {
    return (u->flags_r != FlagsEmpty 
            || u->flags_w != FlagsEmpty);
 }
 
-
-
+#if 1
+#  define BEST_ALLOC_ORDER
+#endif
 
 /* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
    register number.  This effectively defines the order in which real
    registers are allocated.  %ebp is excluded since it is permanently
-   reserved for pointing at VG_(baseBlock).  %edi is a general spare
-   temp used for Left4 and various misc tag ops.
+   reserved for pointing at VG_(baseBlock).
 
-   Important!  If you change the set of allocatable registers from
-   %eax, %ebx, %ecx, %edx, %esi you must change the
-   save/restore sequences in various places to match!  
+   Important!  This function must correspond with the value of
+   VG_MAX_REALREGS (actually, VG_MAX_REALREGS can be reduced without
+   a problem, except the generated code will obviously be worse).
 */
-__inline__ Int VG_(rankToRealRegNo) ( Int rank )
+__inline__ 
+Int VG_(rankToRealRegNum) ( Int rank )
 {
    switch (rank) {
-#     if 1
+#     ifdef BEST_ALLOC_ORDER
       /* Probably the best allocation ordering. */
       case 0: return R_EAX;
       case 1: return R_EBX;
       case 2: return R_ECX;
       case 3: return R_EDX;
       case 4: return R_ESI;
+      case 5: return R_EDI;
 #     else
       /* Contrary; probably the worst.  Helpful for debugging, tho. */
-      case 4: return R_EAX;
-      case 3: return R_EBX;
-      case 2: return R_ECX;
-      case 1: return R_EDX;
-      case 0: return R_ESI;
+      case 5: return R_EAX;
+      case 4: return R_EBX;
+      case 3: return R_ECX;
+      case 2: return R_EDX;
+      case 1: return R_ESI;
+      case 0: return R_EDI;
 #     endif
-      default: VG_(panic)("rankToRealRegNo");
+      default: VG_(panic)("VG_(rankToRealRegNum)");
+   }
+}
+
+/* Convert an Intel register number into a rank in the range 0 ..
+   VG_MAX_REALREGS-1.  See related comments for rankToRealRegNum()
+   above.  */
+__inline__
+Int VG_(realRegNumToRank) ( Int realReg )
+{
+   switch (realReg) {
+#     ifdef BEST_ALLOC_ORDER
+      case R_EAX: return 0;
+      case R_EBX: return 1;
+      case R_ECX: return 2;
+      case R_EDX: return 3;
+      case R_ESI: return 4;
+      case R_EDI: return 5;
+#     else
+      case R_EAX: return 5;
+      case R_EBX: return 4;
+      case R_ECX: return 3;
+      case R_EDX: return 2;
+      case R_ESI: return 1;
+      case R_EDI: return 0;
+#     endif
+      default: VG_(panic)("VG_(realRegNumToRank)");
    }
 }
 
@@ -382,78 +369,62 @@
    from the result of register allocation on the ucode efficiently and
    without need of any further RealRegs.
 
-   Restrictions on insns (as generated by the disassembler) are as
-   follows:
-
-      A=ArchReg   S=SpillNo   T=TempReg   L=Literal   R=RealReg
-      N=NoValue
-
-         GETF       T       N       N
-         PUTF       T       N       N
-
-         GET        A,S     T       N
-         PUT        T       A,S     N
-         LOAD       T       T       N
-         STORE      T       T       N
-         MOV        T,L     T       N
-         CMOV       T       T       N
-         WIDEN      T       N       N
-         JMP        T,L     N       N
-         CALLM      L       N       N
-         CALLM_S    N       N       N
-         CALLM_E    N       N       N
-         CCALL_1_0  T       N       N
-         CCALL_2_0  T       T       N
-         PUSH,POP   T       N       N
-         CLEAR      L       N       N
-
-         AND, OR
-                    T       T       N
-
-         ADD, ADC, XOR, SUB, SBB
-                    A,L,T   T       N
-
-         SHL, SHR, SAR, ROL, ROR, RCL, RCR
-                    L,T     T       N
-
-         NOT, NEG, INC, DEC, CC2VAL, BSWAP
-                    T       N       N
-
-         JIFZ       T       L       N
-
-         FPU_R      L       T       N
-         FPU_W      L       T       N
-         FPU        L       T       N
-
-         LEA1       T       T   (const in a seperate field)
-         LEA2       T       T       T   (const & shift ditto)
-
-         INCEIP     L       N       N
+   Restrictions for the individual UInstrs are clear from the checks below.
+   Abbreviations: A=ArchReg   S=SpillNo   T=TempReg   L=Literal
+                  Ls=Lit16    R=RealReg   N=NoValue
  
-   and for instrumentation insns:
-
-         LOADV      T       T       N
-         STOREV     T,L     T       N
-         GETV       A       T       N
-         PUTV       T,L     A       N
-         GETVF      T       N       N
-         PUTVF      T       N       N
-         WIDENV     T       N       N
-         TESTV      A,T     N       N
-         SETV       A,T     N       N
-         TAG1       T       N       N
-         TAG2       T       T       N
-
    Before register allocation, S operands should not appear anywhere.
    After register allocation, all T operands should have been
    converted into Rs, and S operands are allowed in GET and PUT --
    denoting spill saves/restores.  
 
+   Before liveness analysis, save_e[acd]x fields should all be True.
+   Afterwards, they may be False.
+
    The size field should be 0 for insns for which it is meaningless,
    ie those which do not directly move/operate on data.
 */
-Bool VG_(saneUInstr) ( Bool beforeRA, UInstr* u )
+Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u )
 {
+#  define LIT0 (u->lit32 == 0)
+#  define LIT1 (!(LIT0))
+#  define LITm (u->tag1 == Literal ? True : LIT0 )
+#  define SZ4  (u->size == 4)
+#  define SZ2  (u->size == 2)
+#  define SZ1  (u->size == 1)
+#  define SZ0  (u->size == 0)
+#  define SZ42 (u->size == 4 || u->size == 2)
+#  define SZi  (u->size == 4 || u->size == 2 || u->size == 1)
+#  define SZf  (  u->size ==  4 || u->size ==  8 || u->size ==   2     \
+               || u->size == 10 || u->size == 28 || u->size == 108)
+#  define SZ4m ((u->tag1 == TempReg || u->tag1 == RealReg) \
+                      ? (u->size == 4) : True)
+
+/* For these ones, two cases:
+ *
+ * 1. They are transliterations of the corresponding x86 instruction, in
+ *    which case they should have its flags (except that redundant write
+ *    flags can be annulled by the optimisation pass).
+ *
+ * 2. They are being used generally for other purposes, eg. helping with a
+ *    'rep'-prefixed instruction, in which case should have empty flags .
+ */
+#  define emptyR (u->flags_r == FlagsEmpty)
+#  define emptyW (u->flags_w == FlagsEmpty)
+#  define CC0 (emptyR && emptyW)
+#  define CCr (u->flags_r == FlagsALL && emptyW)
+#  define CCw (emptyR &&  u->flags_w == FlagsALL)
+#  define CCa (emptyR && (u->flags_w == FlagsOSZACP  || emptyW))
+#  define CCc (emptyR && (u->flags_w == FlagsOC      || emptyW))
+#  define CCe (emptyR && (u->flags_w == FlagsOSZAP   || emptyW))
+#  define CCb ((u->flags_r==FlagC       || emptyR) && \
+               (u->flags_w==FlagsOSZACP || emptyW))
+#  define CCd ((u->flags_r==FlagC   || emptyR) && \
+               (u->flags_w==FlagsOC || emptyW))
+#  define CCf (CC0 || CCr || CCw)
+#  define CCg ((u->flags_r==FlagsOSZACP || emptyR) && emptyW)
+#  define CCj (u->cond==CondAlways ? CC0 : CCg)
+
 #  define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
 #  define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
 #  define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
@@ -466,24 +437,29 @@
 #  define L2  (u->tag2 == Literal && u->val2 == 0)
 #  define Ls1 (u->tag1 == Lit16)
 #  define Ls3 (u->tag3 == Lit16)
+#  define TRL1 (TR1 || L1)
+#  define TRAL1 (TR1 || A1 || L1)
 #  define N1  (u->tag1 == NoValue)
 #  define N2  (u->tag2 == NoValue)
 #  define N3  (u->tag3 == NoValue)
-#  define SZ4 (u->size == 4)
-#  define SZ2 (u->size == 2)
-#  define SZ1 (u->size == 1)
-#  define SZ0 (u->size == 0)
-#  define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)
-#  define FLG_RD (u->flags_r == FlagsALL && u->flags_w == FlagsEmpty)
-#  define FLG_WR (u->flags_r == FlagsEmpty && u->flags_w == FlagsALL)
-#  define FLG_RD_WR_MAYBE                                         \
-       ((u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)    \
-        || (u->flags_r == FlagsEmpty && u->flags_w == FlagsZCP)   \
-        || (u->flags_r == FlagsZCP && u->flags_w == FlagsEmpty))
-#  define CC1 (!(CC0))
-#  define SZ4_IF_TR1 ((u->tag1 == TempReg || u->tag1 == RealReg) \
-                      ? (u->size == 4) : True)
 
+#  define COND0    (u->cond         == 0)
+#  define EXTRA4b0 (u->extra4b      == 0)
+#  define SG_WD0   (u->signed_widen == 0)
+#  define JMPKIND0 (u->jmpkind      == 0)
+#  define CCALL0   (u->argc==0 && u->regparms_n==0 && u->has_ret_val==0 && \
+                    ( beforeLiveness                                       \
+                    ? u->regs_live_after == ALL_RREGS_LIVE                 \
+                    : True ))
+
+#  define XCONDi   (         EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
+#  define Xextra4b (COND0             && SG_WD0 && JMPKIND0 && CCALL0)
+#  define XWIDEN   (COND0                       && JMPKIND0 && CCALL0)
+#  define XJMP     (                     SG_WD0             && CCALL0)
+#  define XCCALL   (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0          )
+#  define XOTHER   (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
+
+   /* 0 or 1 Literal args per UInstr */
    Int n_lits = 0;
    if (u->tag1 == Literal) n_lits++;
    if (u->tag2 == Literal) n_lits++;
@@ -491,94 +467,94 @@
    if (n_lits > 1) 
       return False;
 
+   /* Fields not checked: val1, val2, val3 */
+
    switch (u->opcode) {
-      case GETF:
-         return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_RD;
-      case PUTF:
-         return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_WR;
-      case CALLM_S: case CALLM_E:
-         return SZ0 && N1 && N2 && N3;
-      case INCEIP:
-         return SZ0 && CC0 && Ls1 && N2 && N3;
-      case LEA1:
-         return CC0 && TR1 && TR2 && N3 && SZ4;
-      case LEA2:
-         return CC0 && TR1 && TR2 && TR3 && SZ4;
-      case NOP: 
-         return SZ0 && CC0 && N1 && N2 && N3;
-      case GET: 
-         return CC0 && AS1 && TR2 && N3;
-      case PUT: 
-         return CC0 && TR1 && AS2 && N3;
-      case LOAD: case STORE: 
-         return CC0 && TR1 && TR2 && N3;
-      case MOV:
-         return CC0 && (TR1 || L1) && TR2 && N3 && SZ4_IF_TR1;
-      case CMOV:
-         return CC1 && TR1 && TR2 && N3 && SZ4;
-      case JMP: 
-         return (u->cond==CondAlways ? CC0 : CC1)
-                && (TR1 || L1) && N2 && SZ0 && N3;
-      case CLEAR:
-         return CC0 && Ls1 && N2 && SZ0 && N3;
-      case CALLM:
-         return SZ0 && Ls1 && N2 && N3;
-      case CCALL_1_0:
-         return SZ0 && CC0 && TR1 && N2 && N3;
-      case CCALL_2_0:
-         return SZ0 && CC0 && TR1 && TR2 && N3;
-      case PUSH: case POP:
-         return CC0 && TR1 && N2 && N3;
-      case AND: case OR:
-         return TR1 && TR2 && N3;
-      case ADD: case ADC: case XOR: case SUB: case SBB:
-         return (A1 || TR1 || L1) && TR2 && N3;
-      case SHL: case SHR: case SAR: case ROL: case ROR: case RCL: case RCR:
-         return       (TR1 || L1) && TR2 && N3;
-      case NOT: case NEG: case INC: case DEC:
-         return        TR1 && N2 && N3;
-      case BSWAP:
-         return TR1 && N2 && N3 && CC0 && SZ4;
-      case CC2VAL: 
-         return CC1 && SZ1 && TR1 && N2 && N3;
-      case JIFZ:
-         return CC0 && SZ4 && TR1 && L2 && N3;
-      case FPU_R:  case FPU_W: 
-         return CC0 && Ls1 && TR2 && N3;
-      case FPU: 
-         return SZ0 && FLG_RD_WR_MAYBE && Ls1 && N2 && N3;
-      case LOADV:
-         return CC0 && TR1 && TR2 && N3;
-      case STOREV:
-         return CC0 && (TR1 || L1) && TR2 && N3;
-      case GETV: 
-         return CC0 && A1 && TR2 && N3;
-      case PUTV: 
-         return CC0 && (TR1 || L1) && A2 && N3;
-      case GETVF: 
-         return CC0 && TR1 && N2 && N3 && SZ0;
-      case PUTVF: 
-         return CC0 && TR1 && N2 && N3 && SZ0;
-      case WIDEN:
-         return CC0 && TR1 && N2 && N3;
-      case TESTV: 
-         return CC0 && (A1 || TR1) && N2 && N3;
-      case SETV:
-         return CC0 && (A1 || TR1) && N2 && N3;
-      case TAG1:
-         return CC0 && TR1 && N2 && Ls3 && SZ0;
-      case TAG2:
-         return CC0 && TR1 && TR2 && Ls3 && SZ0;
-      default: 
-         VG_(panic)("vg_saneUInstr: unhandled opcode");
+
+   /* Fields checked: lit32   size  flags_r/w tag1   tag2   tag3    (rest) */
+   case NOP:    return LIT0 && SZ0  && CC0 &&   N1 &&  N2 &&  N3 && XOTHER;
+   case GETF:   return LIT0 && SZ42 && CCr &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case PUTF:   return LIT0 && SZ42 && CCw &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case GET:    return LIT0 && SZi  && CC0 &&  AS1 && TR2 &&  N3 && XOTHER;
+   case PUT:    return LIT0 && SZi  && CC0 &&  TR1 && AS2 &&  N3 && XOTHER;
+   case LOAD: 
+   case STORE:  return LIT0 && SZi  && CC0 &&  TR1 && TR2 &&  N3 && XOTHER;
+   case MOV:    return LITm && SZ4m && CC0 && TRL1 && TR2 &&  N3 && XOTHER;
+   case CMOV:   return LIT0 && SZ4  && CCg &&  TR1 && TR2 &&  N3 && XCONDi;
+   case WIDEN:  return LIT0 && SZi  && CC0 &&  TR1 &&  N2 &&  N3 && XWIDEN;
+   case JMP:    return LITm && SZ0  && CCj && TRL1 &&  N2 &&  N3 && XJMP;
+   case CALLM:  return LIT0 && SZ0 /*any*/ &&  Ls1 &&  N2 &&  N3 && XOTHER;
+   case CALLM_S: 
+   case CALLM_E:return LIT0 && SZ0  && CC0 &&   N1 &&  N2 &&  N3 && XOTHER;
+   case PUSH: 
+   case POP:    return LIT0 && SZi  && CC0 &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case CLEAR:  return LIT0 && SZ0  && CC0 &&  Ls1 &&  N2 &&  N3 && XOTHER;
+   case AND:
+   case OR:     return LIT0 && SZi  && CCa &&  TR1 && TR2 &&  N3 && XOTHER;
+   case ADD:
+   case XOR:
+   case SUB:    return LITm && SZi  && CCa &&TRAL1 && TR2 &&  N3 && XOTHER;
+   case SBB:
+   case ADC:    return LITm && SZi  && CCb &&TRAL1 && TR2 &&  N3 && XOTHER;
+   case SHL:
+   case SHR:
+   case SAR:    return LITm && SZi  && CCa && TRL1 && TR2 &&  N3 && XOTHER;
+   case ROL:
+   case ROR:    return LITm && SZi  && CCc && TRL1 && TR2 &&  N3 && XOTHER;
+   case RCL:
+   case RCR:    return LITm && SZi  && CCd && TRL1 && TR2 &&  N3 && XOTHER;
+   case NOT:    return LIT0 && SZi  && CC0 &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case NEG:    return LIT0 && SZi  && CCa &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case INC:
+   case DEC:    return LIT0 && SZi  && CCe &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case CC2VAL: return LIT0 && SZ1  && CCg &&  TR1 &&  N2 &&  N3 && XCONDi;
+   case BSWAP:  return LIT0 && SZ4  && CC0 &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case JIFZ:   return LIT1 && SZ4  && CC0 &&  TR1 &&  L2 &&  N3 && XOTHER;
+   case FPU_R:  
+   case FPU_W:  return LIT0 && SZf  && CC0 &&  Ls1 && TR2 &&  N3 && XOTHER;
+   case FPU:    return LIT0 && SZ0  && CCf &&  Ls1 &&  N2 &&  N3 && XOTHER;
+   case LEA1:   return /*any*/ SZ4  && CC0 &&  TR1 && TR2 &&  N3 && XOTHER;
+   case LEA2:   return /*any*/ SZ4  && CC0 &&  TR1 && TR2 && TR3 && Xextra4b;
+   case INCEIP: return LIT0 && SZ0  && CC0 &&  Ls1 &&  N2 &&  N3 && XOTHER;
+   case CCALL:  return LIT1 && SZ0  && CC0 && 
+                       (u->argc > 0                   ? TR1 : N1) && 
+                       (u->argc > 1                   ? TR2 : N2) && 
+                       (u->argc > 2 || u->has_ret_val ? TR3 : N3) &&
+                       u->regparms_n <= u->argc && XCCALL;
+   default: 
+      if (VG_(needs).extended_UCode)
+         return SK_(saneExtUInstr)(beforeRA, beforeLiveness, u);
+      else {
+         VG_(printf)("unhandled opcode: %u.  Perhaps " 
+                     "VG_(needs).extended_UCode should be set?",
+                     u->opcode);
+         VG_(panic)("VG_(saneUInstr): unhandled opcode");
+      }
    }
-#  undef SZ4_IF_TR1
-#  undef CC0
-#  undef CC1
+#  undef LIT0
+#  undef LIT1
+#  undef LITm
 #  undef SZ4
 #  undef SZ2
 #  undef SZ1
 #  undef SZ0
+#  undef SZ42
+#  undef SZi
+#  undef SZf
+#  undef SZ4m
+#  undef emptyR
+#  undef emptyW
+#  undef CC0
+#  undef CCr
+#  undef CCw
+#  undef CCa
+#  undef CCb
+#  undef CCc
+#  undef CCd
+#  undef CCe
+#  undef CCf
+#  undef CCg
+#  undef CCj
 #  undef TR1
 #  undef TR2
 #  undef TR3
@@ -588,20 +564,42 @@
 #  undef AS2
 #  undef AS3
 #  undef L1
-#  undef Ls1
 #  undef L2
+#  undef Ls1
 #  undef Ls3
+#  undef TRL1
+#  undef TRAL1
 #  undef N1
 #  undef N2
 #  undef N3
-#  undef FLG_RD
-#  undef FLG_WR
-#  undef FLG_RD_WR_MAYBE 
+#  undef COND0
+#  undef EXTRA4b0
+#  undef SG_WD0
+#  undef JMPKIND0
+#  undef CCALL0
+#  undef Xextra4b
+#  undef XWIDEN
+#  undef XJMP
+#  undef XCCALL
+#  undef XOTHER
 }
 
+void VG_(saneUCodeBlock) ( UCodeBlock* cb )
+{
+   Int i;
+        
+   for (i = 0; i < cb->used; i++) {
+      Bool sane = VG_(saneUInstr)(True, True, &cb->instrs[i]);
+      if (!sane) {
+         VG_(printf)("Instruction failed sanity check:\n");
+         VG_(upUInstr)(i, &cb->instrs[i]);
+      }
+      vg_assert(sane);
+   }
+}
 
 /* Sanity checks to do with CALLMs in UCodeBlocks. */
-Bool VG_(saneUCodeBlock) ( UCodeBlock* cb )
+Bool VG_(saneUCodeBlockCalls) ( UCodeBlock* cb )
 {
    Int  callm = 0;
    Int  callm_s = 0;
@@ -687,6 +685,9 @@
 /*--- Printing uinstrs.                                    ---*/
 /*------------------------------------------------------------*/
 
+/* Global that dictates whether to print generated code at all stages */
+Bool VG_(print_codegen);
+
 Char* VG_(nameCondcode) ( Condcode cond )
 {
    switch (cond) {
@@ -734,14 +735,14 @@
 }
 
 
-static void ppUOperand ( UInstr* u, Int operandNo, Int sz, Bool parens )
+void VG_(ppUOperand) ( UInstr* u, Int operandNo, Int sz, Bool parens )
 {
    UInt tag, val;
    switch (operandNo) {
       case 1: tag = u->tag1; val = u->val1; break;
       case 2: tag = u->tag2; val = u->val2; break;
       case 3: tag = u->tag3; val = u->val3; break;
-      default: VG_(panic)("ppUOperand(1)");
+      default: VG_(panic)("VG_(ppUOperand)(1)");
    }
    if (tag == Literal) val = u->lit32;
 
@@ -754,7 +755,7 @@
       case NoValue: VG_(printf)("NoValue"); break;
       case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
       case SpillNo: VG_(printf)("spill%d", val); break;
-      default: VG_(panic)("ppUOperand(2)");
+      default: VG_(panic)("VG_(ppUOperand)(2)");
    }
    if (parens) VG_(printf)(")");
 }
@@ -786,10 +787,6 @@
    }
    if (!upper) VG_(panic)("vg_nameUOpcode: invalid !upper");
    switch (opc) {
-      case GETVF:   return "GETVF";
-      case PUTVF:   return "PUTVF";
-      case TAG1:    return "TAG1";
-      case TAG2:    return "TAG2";
       case CALLM_S: return "CALLM_S";
       case CALLM_E: return "CALLM_E";
       case INCEIP:  return "INCEIP";
@@ -808,8 +805,7 @@
       case JMP:     return "J"    ;
       case JIFZ:    return "JIFZ" ;
       case CALLM:   return "CALLM";
-      case CCALL_1_0: return "CCALL_1_0";
-      case CCALL_2_0: return "CCALL_2_0";
+      case CCALL:   return "CCALL";
       case PUSH:    return "PUSH" ;
       case POP:     return "POP"  ;
       case CLEAR:   return "CLEAR";
@@ -817,18 +813,61 @@
       case FPU_R:   return "FPU_R";
       case FPU_W:   return "FPU_W";
       case FPU:     return "FPU"  ;
-      case LOADV:   return "LOADV";
-      case STOREV:  return "STOREV";
-      case GETV:    return "GETV";
-      case PUTV:    return "PUTV";
-      case TESTV:   return "TESTV";
-      case SETV:    return "SETV";
-      default:      VG_(panic)("nameUOpcode: unhandled case");
+      default:
+         if (VG_(needs).extended_UCode)
+            return SK_(nameExtUOpcode)(opc);
+         else {
+            VG_(printf)("unhandled opcode: %u.  Perhaps " 
+                        "VG_(needs).extended_UCode should be set?",
+                        opc);
+            VG_(panic)("nameUOpcode: unhandled opcode");
+         }
    }
 }
 
+void ppRealRegsLiveness ( UInstr* u )
+{
+#  define PRINT_RREG_LIVENESS(realReg,s) \
+     VG_(printf)( IS_RREG_LIVE(VG_(realRegNumToRank)(realReg), \
+                               u->regs_live_after)             \
+                     ? s : "-");
 
-void VG_(ppUInstr) ( Int instrNo, UInstr* u )
+   VG_(printf)("[");
+   PRINT_RREG_LIVENESS(R_EAX, "a");
+   PRINT_RREG_LIVENESS(R_EBX, "b");
+   PRINT_RREG_LIVENESS(R_ECX, "c");
+   PRINT_RREG_LIVENESS(R_EDX, "d");
+   PRINT_RREG_LIVENESS(R_ESI, "S");
+   PRINT_RREG_LIVENESS(R_EDI, "D");
+   VG_(printf)("]");
+
+#  undef PRINT_RREG_LIVENESS
+}
+
+/* Ugly-print UInstr :) */
+void VG_(upUInstr) ( Int i, UInstr* u )
+{
+   VG_(ppUInstrWithRegs)(i, u);
+   
+   VG_(printf)("opcode:          %d\n", u->opcode);
+   VG_(printf)("lit32:           %x\n", u->lit32);
+   VG_(printf)("size:            %d\n", u->size);
+   VG_(printf)("val1,val2,val3:  %d, %d, %d\n", u->val1, u->val2, u->val3);
+   VG_(printf)("tag1,tag2,tag3:  %d, %d, %d\n", u->tag1, u->tag2, u->tag3);
+   VG_(printf)("flags_r:         %x\n", u->flags_r);
+   VG_(printf)("flags_w:         %x\n", u->flags_w);
+   VG_(printf)("extra4b:         %x\n", u->extra4b);
+   VG_(printf)("cond:            %x\n", u->cond);
+   VG_(printf)("signed_widen:    %d\n", u->signed_widen);
+   VG_(printf)("jmpkind:         %d\n", u->jmpkind);
+   VG_(printf)("argc,regparms_n: %d, %d\n", u->argc, u->regparms_n);
+   VG_(printf)("has_ret_val:     %d\n", u->has_ret_val);
+   VG_(printf)("regs_live_after: ");
+   ppRealRegsLiveness(u);
+   VG_(printf)("\n");
+}
+
+void ppUInstrWorker ( Int instrNo, UInstr* u, Bool ppRegsLiveness )
 {
    VG_(printf)("\t%4d: %s", instrNo, 
                             VG_(nameUOpcode)(True, u->opcode));
@@ -846,24 +885,6 @@
 
    switch (u->opcode) {
 
-      case TAG1:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, 4, False);
-         VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
-         ppUOperand(u, 1, 4, False);
-         VG_(printf)(" )");
-         break;
-
-      case TAG2:
-         VG_(printf)("\t");
-         ppUOperand(u, 2, 4, False);
-         VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
-         ppUOperand(u, 1, 4, False);
-         VG_(printf)(", ");
-         ppUOperand(u, 2, 4, False);
-         VG_(printf)(" )");
-         break;
-
       case CALLM_S: case CALLM_E:
          break;
 
@@ -873,18 +894,18 @@
 
       case LEA2:
          VG_(printf)("\t%d(" , u->lit32);
-         ppUOperand(u, 1, 4, False);
+         VG_(ppUOperand)(u, 1, 4, False);
          VG_(printf)(",");
-         ppUOperand(u, 2, 4, False);
+         VG_(ppUOperand)(u, 2, 4, False);
          VG_(printf)(",%d), ", (Int)u->extra4b);
-         ppUOperand(u, 3, 4, False);
+         VG_(ppUOperand)(u, 3, 4, False);
          break;
 
       case LEA1:
          VG_(printf)("\t%d" , u->lit32);
-         ppUOperand(u, 1, 4, True);
+         VG_(ppUOperand)(u, 1, 4, True);
          VG_(printf)(", ");
-         ppUOperand(u, 2, 4, False);
+         VG_(ppUOperand)(u, 2, 4, False);
          break;
 
       case NOP:
@@ -893,12 +914,12 @@
       case FPU_W:
          VG_(printf)("\t0x%x:0x%x, ",
                      (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
-         ppUOperand(u, 2, 4, True);
+         VG_(ppUOperand)(u, 2, 4, True);
          break;
 
       case FPU_R:
          VG_(printf)("\t");
-         ppUOperand(u, 2, 4, True);
+         VG_(ppUOperand)(u, 2, 4, True);
          VG_(printf)(", 0x%x:0x%x",
                      (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
          break;
@@ -908,97 +929,93 @@
                      (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
          break;
 
-      case STOREV: case LOADV:
       case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
          VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, u->opcode==LOAD || u->opcode==LOADV); 
+         VG_(ppUOperand)(u, 1, u->size, u->opcode==LOAD); 
          VG_(printf)(", ");
-         ppUOperand(u, 2, u->size, u->opcode==STORE || u->opcode==STOREV);
+         VG_(ppUOperand)(u, 2, u->size, u->opcode==STORE);
+         break;
+
+      case JMP:
+         switch (u->jmpkind) {
+            case JmpCall:      VG_(printf)("-c"); break;
+            case JmpRet:       VG_(printf)("-r"); break;
+            case JmpSyscall:   VG_(printf)("-sys"); break;
+            case JmpClientReq: VG_(printf)("-cli"); break;
+            default: break;
+         }
+         VG_(printf)("\t");
+         VG_(ppUOperand)(u, 1, u->size, False);
+         if (CondAlways == u->cond) {
+            /* Print x86 instruction size if filled in */
+            if (0 != u->extra4b)
+               VG_(printf)("  ($%u)", u->extra4b);
+         }
          break;
 
       case GETF: case PUTF:
+      case CC2VAL: case PUSH: case POP: case CLEAR: case CALLM:
+      case NOT: case NEG: case INC: case DEC: case BSWAP:
          VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False);
+         VG_(ppUOperand)(u, 1, u->size, False);
          break;
 
-      case JMP: case CC2VAL:
-      case PUSH: case POP: case CLEAR: case CALLM:
-         if (u->opcode == JMP) {
-            switch (u->jmpkind) {
-               case JmpCall:      VG_(printf)("-c"); break;
-               case JmpRet:       VG_(printf)("-r"); break;
-               case JmpSyscall:   VG_(printf)("-sys"); break;
-               case JmpClientReq: VG_(printf)("-cli"); break;
-               default: break;
-            }
+      /* Print a "(s)" after args passed on stack */
+      case CCALL:
+         VG_(printf)("\t");
+         if (u->has_ret_val) {
+            VG_(ppUOperand)(u, 3, 0, False);
+            VG_(printf)(" = ");
          }
-         VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False);
-         break;
-
-      case CCALL_1_0:
-         VG_(printf)(" ");
-         ppUOperand(u, 1, 0, False);
-         VG_(printf)(" (%u)", u->lit32);
-         break;
-
-      case CCALL_2_0:
-         VG_(printf)(" ");
-         ppUOperand(u, 1, 0, False);
-         VG_(printf)(", ");
-         ppUOperand(u, 2, 0, False);
-         VG_(printf)(" (%u)", u->lit32);
+         VG_(printf)("%p(", u->lit32);
+         if (u->argc > 0) {
+            VG_(ppUOperand)(u, 1, 0, False);
+            if (u->regparms_n < 1)
+               VG_(printf)("(s)");
+         }
+         if (u->argc > 1) {
+            VG_(printf)(", ");
+            VG_(ppUOperand)(u, 2, 0, False);
+            if (u->regparms_n < 2)
+               VG_(printf)("(s)");
+         }
+         if (u->argc > 2) {
+            VG_(printf)(", ");
+            VG_(ppUOperand)(u, 3, 0, False);
+            if (u->regparms_n < 3)
+               VG_(printf)("(s)");
+         }
+         VG_(printf)(") ");
          break;
 
       case JIFZ:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False);
-         VG_(printf)(", ");
-         ppUOperand(u, 2, u->size, False);
-         break;
-
-      case PUTVF: case GETVF:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, 0, False); 
-         break;
-
-      case NOT: case NEG: case INC: case DEC: case BSWAP:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False); 
-         break;
-
       case ADD: case ADC: case AND: case OR:  
       case XOR: case SUB: case SBB:   
       case SHL: case SHR: case SAR: 
       case ROL: case ROR: case RCL: case RCR:   
          VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False); 
+         VG_(ppUOperand)(u, 1, u->size, False); 
          VG_(printf)(", ");
-         ppUOperand(u, 2, u->size, False);
-         break;
-
-      case GETV: case PUTV:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, u->opcode==PUTV ? 4 : u->size, False);
-         VG_(printf)(", ");
-         ppUOperand(u, 2, u->opcode==GETV ? 4 : u->size, False);
+         VG_(ppUOperand)(u, 2, u->size, False);
          break;
 
       case WIDEN:
          VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
                               u->signed_widen?'s':'z');
          VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False);
+         VG_(ppUOperand)(u, 1, u->size, False);
          break;
 
-      case TESTV: case SETV:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False);
-         break;
-
-      default: VG_(panic)("ppUInstr: unhandled opcode");
+      default: 
+         if (VG_(needs).extended_UCode)
+            SK_(ppExtUInstr)(u);
+         else {
+            VG_(printf)("unhandled opcode: %u.  Perhaps " 
+                        "VG_(needs).extended_UCode should be set?",
+                        u->opcode);
+            VG_(panic)("ppUInstr: unhandled opcode");
+         }
    }
-
    if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
       VG_(printf)("  (");
       if (u->flags_r != FlagsEmpty) 
@@ -1007,16 +1024,31 @@
          vg_ppFlagSet("-w", u->flags_w);
       VG_(printf)(")");
    }
+
+   if (ppRegsLiveness) {
+      VG_(printf)("\t\t");
+      ppRealRegsLiveness ( u );
+   }
+
    VG_(printf)("\n");
 }
 
+void VG_(ppUInstr) ( Int instrNo, UInstr* u )
+{
+   ppUInstrWorker ( instrNo, u, /*ppRegsLiveness*/False );
+}
+
+void VG_(ppUInstrWithRegs) ( Int instrNo, UInstr* u )
+{
+   ppUInstrWorker ( instrNo, u, /*ppRegsLiveness*/True );
+}
 
 void VG_(ppUCodeBlock) ( UCodeBlock* cb, Char* title )
 {
    Int i;
-   VG_(printf)("\n%s\n", title);
+   VG_(printf)("%s\n", title);
    for (i = 0; i < cb->used; i++)
-      if (0 || cb->instrs[i].opcode != NOP)
+      if (cb->instrs[i].opcode != NOP)
          VG_(ppUInstr) ( i, &cb->instrs[i] );
    VG_(printf)("\n");
 }
@@ -1027,43 +1059,35 @@
 /*--- and code improvement.                                ---*/
 /*------------------------------------------------------------*/
 
-/* A structure for communicating temp uses, and for indicating
-   temp->real register mappings for patchUInstr. */
-typedef
-   struct {
-      Int   realNo;
-      Int   tempNo;
-      Bool  isWrite;
-   }
-   TempUse;
-
-
-/* Get the temp use of a uinstr, parking them in an array supplied by
+/* Get the temp/reg use of a uinstr, parking them in an array supplied by
    the caller, which is assumed to be big enough.  Return the number
    of entries.  Insns which read _and_ write a register wind up
    mentioning it twice.  Entries are placed in the array in program
    order, so that if a reg is read-modified-written, it appears first
-   as a read and then as a write.  
+   as a read and then as a write.  'tag' indicates whether we are looking at
+   TempRegs or RealRegs.
 */
-static __inline__ 
-Int getTempUsage ( UInstr* u, TempUse* arr )
+__inline__
+Int VG_(getRegUsage) ( UInstr* u, Tag tag, RegUse* arr )
 {
-
-#  define RD(ono)                                  \
-      if (mycat(u->tag,ono) == TempReg)            \
-         { arr[n].tempNo  = mycat(u->val,ono);     \
-           arr[n].isWrite = False; n++; }
-#  define WR(ono)                                  \
-      if (mycat(u->tag,ono) == TempReg)            \
-         { arr[n].tempNo  = mycat(u->val,ono);     \
-           arr[n].isWrite = True; n++; }
+#  define RD(ono)    VG_UINSTR_READS_REG(ono)
+#  define WR(ono)    VG_UINSTR_WRITES_REG(ono)
 
    Int n = 0;
    switch (u->opcode) {
       case LEA1: RD(1); WR(2); break;
       case LEA2: RD(1); RD(2); WR(3); break;
 
-      case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E: break;
+      case NOP:   case FPU:   case INCEIP: case CALLM_S: case CALLM_E:
+      case CLEAR: case CALLM: break;
+
+      case CCALL:
+         if (u->argc > 0)    RD(1); 
+         if (u->argc > 1)    RD(2); 
+         if (u->argc > 2)    RD(3); 
+         if (u->has_ret_val) WR(3);
+         break;
+
       case FPU_R: case FPU_W: RD(2); break;
 
       case GETF:  WR(1); break;
@@ -1072,16 +1096,14 @@
       case GET:   WR(2); break;
       case PUT:   RD(1); break;
       case LOAD:  RD(1); WR(2); break;
-      case STORE: case CCALL_2_0: RD(1); RD(2); break;
+      case STORE: RD(1); RD(2); break;
       case MOV:   RD(1); WR(2); break;
 
       case JMP:   RD(1); break;
-      case CLEAR: case CALLM: break;
 
-      case PUSH: case CCALL_1_0: RD(1); break;
+      case PUSH: RD(1); break;
       case POP:  WR(1); break;
 
-      case TAG2:
       case CMOV:
       case ADD: case ADC: case AND: case OR:  
       case XOR: case SUB: case SBB:   
@@ -1091,7 +1113,7 @@
       case ROL: case ROR: case RCL: case RCR:
          RD(1); RD(2); WR(2); break;
 
-      case NOT: case NEG: case INC: case DEC: case TAG1: case BSWAP:
+      case NOT: case NEG: case INC: case DEC: case BSWAP:
          RD(1); WR(1); break;
 
       case WIDEN: RD(1); WR(1); break;
@@ -1099,19 +1121,15 @@
       case CC2VAL: WR(1); break;
       case JIFZ: RD(1); break;
 
-      /* These sizes are only ever consulted when the instrumentation
-         code is being added, so the following can return
-         manifestly-bogus sizes. */
-      case LOADV:   RD(1); WR(2); break;
-      case STOREV:  RD(1); RD(2); break;
-      case GETV:    WR(2); break;
-      case PUTV:    RD(1); break;
-      case TESTV:   RD(1); break;
-      case SETV:    WR(1); break;
-      case PUTVF:   RD(1); break;
-      case GETVF:   WR(1); break;
-
-      default: VG_(panic)("getTempUsage: unhandled opcode");
+      default:
+         if (VG_(needs).extended_UCode)
+            return SK_(getExtRegUsage)(u, tag, arr);
+         else {
+            VG_(printf)("unhandled opcode: %u.  Perhaps " 
+                        "VG_(needs).extended_UCode should be set?",
+                        u->opcode);
+            VG_(panic)("VG_(getRegUsage): unhandled opcode");
+         }
    }
    return n;
 
@@ -1120,31 +1138,32 @@
 }
 
 
-/* Change temp regs in u into real regs, as directed by tmap. */
-static __inline__ 
-void patchUInstr ( UInstr* u, TempUse* tmap, Int n_tmap )
+/* Change temp regs in u into real regs, as directed by the
+ * temps[i]-->reals[i] mapping. */
+static __inline__
+void patchUInstr ( UInstr* u, RegUse temps[], UInt reals[], Int n_tmap )
 {
    Int i;
    if (u->tag1 == TempReg) {
       for (i = 0; i < n_tmap; i++)
-         if (tmap[i].tempNo == u->val1) break;
+         if (temps[i].num == u->val1) break;
       if (i == n_tmap) VG_(panic)("patchUInstr(1)");
       u->tag1 = RealReg;
-      u->val1 = tmap[i].realNo;
+      u->val1 = reals[i];
    }
    if (u->tag2 == TempReg) {
       for (i = 0; i < n_tmap; i++)
-         if (tmap[i].tempNo == u->val2) break;
+         if (temps[i].num == u->val2) break;
       if (i == n_tmap) VG_(panic)("patchUInstr(2)");
       u->tag2 = RealReg;
-      u->val2 = tmap[i].realNo;
+      u->val2 = reals[i];
    }
    if (u->tag3 == TempReg) {
       for (i = 0; i < n_tmap; i++)
-         if (tmap[i].tempNo == u->val3) break;
+         if (temps[i].num == u->val3) break;
       if (i == n_tmap) VG_(panic)("patchUInstr(3)");
       u->tag3 = RealReg;
-      u->val3 = tmap[i].realNo;
+      u->val3 = reals[i];
    }
 }
 
@@ -1166,7 +1185,9 @@
 
 
 /* If u reads an ArchReg, return the number of the containing arch
-   reg.  Otherwise return -1.  Used in redundant-PUT elimination. */
+   reg.  Otherwise return -1.  Used in redundant-PUT elimination.
+   Note that this is not required for skins extending UCode because
+   this happens before instrumentation. */
 static __inline__ 
 Int maybe_uinstrReadsArchReg ( UInstr* u )
 {
@@ -1211,10 +1232,10 @@
 Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
 {
    Int i, k;
-   TempUse tempUse[3];
-   k = getTempUsage ( u, &tempUse[0] );
+   RegUse tempUse[3];
+   k = VG_(getRegUsage) ( u, TempReg, &tempUse[0] );
    for (i = 0; i < k; i++)
-      if (tempUse[i].tempNo == tempreg)
+      if (tempUse[i].num == tempreg)
          return True;
    return False;
 }
@@ -1236,14 +1257,18 @@
    Int     i, j, k, m, n, ar, tr, told, actual_areg;
    Int     areg_map[8];
    Bool    annul_put[8];
-   TempUse tempUse[3];
+   RegUse  tempUse[3];
    UInstr* u;
    Bool    wr;
    Int*    last_live_before;
    FlagSet future_dead_flags;
 
+   if (dis) 
+      VG_(printf) ("Improvements:\n");
+
    if (cb->nextTemp > 0)
-      last_live_before = VG_(jitmalloc) ( cb->nextTemp * sizeof(Int) );
+      last_live_before = VG_(arena_malloc) ( VG_AR_JITTER, 
+                                             cb->nextTemp * sizeof(Int) );
    else
       last_live_before = NULL;
 
@@ -1259,11 +1284,11 @@
    for (i = cb->used-1; i >= 0; i--) {
       u = &cb->instrs[i];
 
-      k = getTempUsage(u, &tempUse[0]);
+      k = VG_(getRegUsage)(u, TempReg, &tempUse[0]);
 
       /* For each temp usage ... bwds in program order. */
       for (j = k-1; j >= 0; j--) {
-         tr = tempUse[j].tempNo;
+         tr = tempUse[j].num;
          wr = tempUse[j].isWrite;
          if (last_live_before[tr] == -1) {
             vg_assert(tr >= 0 && tr < cb->nextTemp);
@@ -1300,15 +1325,14 @@
                out here.  Annul this GET, rename tr to told for the
                rest of the block, and extend told's live range to that
                of tr.  */
-            u->opcode = NOP;
-            u->tag1 = u->tag2 = NoValue;
+            VG_(newNOP)(u);
             n = last_live_before[tr] + 1;
             if (n > cb->used) n = cb->used;
             last_live_before[told] = last_live_before[tr];
             last_live_before[tr] = i-1;
-            if (VG_(disassemble))
+            if (dis)
                VG_(printf)(
-                  "at %d: delete GET, rename t%d to t%d in (%d .. %d)\n", 
+                  "   at %2d: delete GET, rename t%d to t%d in (%d .. %d)\n", 
                   i, tr, told,i+1, n-1);
             for (m = i+1; m < n; m++) {
                if (cb->instrs[m].tag1 == TempReg 
@@ -1349,9 +1373,9 @@
                case ADC: case SBB:
                case SHL: case SHR: case SAR: case ROL: case ROR:
                case RCL: case RCR:
-                  if (VG_(disassemble)) 
+                  if (dis) 
                      VG_(printf)(
-                        "at %d: change ArchReg %S to TempReg t%d\n", 
+                        "   at %2d: change ArchReg %S to TempReg t%d\n", 
                         i, nameIReg(4,u->val1), areg_map[u->val1]);
                   u->tag1 = TempReg;
                   u->val1 = areg_map[u->val1];
@@ -1366,12 +1390,12 @@
          }
 
          /* boring insn; invalidate any mappings to temps it writes */
-         k = getTempUsage(u, &tempUse[0]);
+         k = VG_(getRegUsage)(u, TempReg, &tempUse[0]);
 
          for (j = 0; j < k; j++) {
             wr  = tempUse[j].isWrite;
             if (!wr) continue;
-            tr = tempUse[j].tempNo;
+            tr = tempUse[j].num;
             for (m = 0; m < 8; m++)
                if (areg_map[m] == tr) areg_map[m] = -1;
          }
@@ -1398,10 +1422,9 @@
          actual_areg = containingArchRegOf ( 4, u->val2 );
          if (annul_put[actual_areg]) {
             vg_assert(actual_areg != R_ESP);
-            u->opcode = NOP;
-            u->tag1 = u->tag2 = NoValue;
-            if (VG_(disassemble)) 
-               VG_(printf)("at %d: delete PUT\n", i );
+            VG_(newNOP)(u);
+            if (dis) 
+               VG_(printf)("   at %2d: delete PUT\n", i );
          } else {
             if (actual_areg != R_ESP)
                annul_put[actual_areg] = True;
@@ -1443,9 +1466,9 @@
       vg_assert(u->tag1 == TempReg);
       vg_assert(u->tag2 == TempReg);
       if (last_live_before[u->val1] == i) {
-         if (VG_(disassemble))
+         if (dis)
             VG_(printf)(
-               "at %d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
+               "   at %2d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
                i, u->val2, u->val1, i+1, last_live_before[u->val2] );
          for (j = i+1; j <= last_live_before[u->val2]; j++) {
             if (cb->instrs[j].tag1 == TempReg 
@@ -1457,8 +1480,7 @@
          }
          last_live_before[u->val1] = last_live_before[u->val2];
          last_live_before[u->val2] = i-1;
-         u->opcode = NOP;
-         u->tag1 = u->tag2 = NoValue;
+         VG_(newNOP)(u);
       }
    }
 
@@ -1495,8 +1517,8 @@
          this insn.*/
       if (u->flags_w != FlagsEmpty
           && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
-         if (VG_(disassemble)) {
-            VG_(printf)("at %d: annul flag write ", i);
+         if (dis) {
+            VG_(printf)("   at %2d: annul flag write ", i);
             vg_ppFlagSet("", u->flags_w);
             VG_(printf)(" due to later ");
             vg_ppFlagSet("", future_dead_flags);
@@ -1515,7 +1537,12 @@
    }
 
    if (last_live_before) 
-      VG_(jitfree) ( last_live_before );
+      VG_(arena_free) ( VG_AR_JITTER, last_live_before );
+
+   if (dis) {
+      VG_(printf)("\n");
+      VG_(ppUCodeBlock) ( cb, "Improved UCode:" );
+   }
 }
 
 
@@ -1570,7 +1597,8 @@
    Int          ss_busy_until_before[VG_MAX_SPILLSLOTS];
    Int          i, j, k, m, r, tno, max_ss_no;
    Bool         wr, defer, isRead, spill_reqd;
-   TempUse      tempUse[3];
+   UInt         realUse[3];
+   RegUse       tempUse[3];
    UCodeBlock*  c2;
 
    /* Used to denote ... well, "no value" in this fn. */
@@ -1578,7 +1606,8 @@
 
    /* Initialise the TempReg info.  */
    if (c1->nextTemp > 0)
-      temp_info = VG_(jitmalloc)(c1->nextTemp * sizeof(TempInfo) );
+      temp_info = VG_(arena_malloc)(VG_AR_JITTER,
+                                    c1->nextTemp * sizeof(TempInfo) );
    else
       temp_info = NULL;
 
@@ -1594,12 +1623,12 @@
    /* Scan fwds to establish live ranges. */
 
    for (i = 0; i < c1->used; i++) {
-      k = getTempUsage(&c1->instrs[i], &tempUse[0]);
+      k = VG_(getRegUsage)(&c1->instrs[i], TempReg, &tempUse[0]);
       vg_assert(k >= 0 && k <= 3);
 
       /* For each temp usage ... fwds in program order */
       for (j = 0; j < k; j++) {
-         tno = tempUse[j].tempNo;
+         tno = tempUse[j].num;
          wr  = tempUse[j].isWrite;
          if (wr) {
             /* Writes hold a reg live until after this insn. */
@@ -1662,26 +1691,30 @@
 
    /* Show live ranges and assigned spill slot nos. */
 
-   if (VG_(disassemble)) {
-      VG_(printf)("Live Range Assignments\n");
+   if (dis) {
+      VG_(printf)("Live range assignments:\n");
 
       for (i = 0; i < c1->nextTemp; i++) {
          if (temp_info[i].live_after == VG_NOTHING) 
             continue;
          VG_(printf)(
-            "   LR %d is   after %d to before %d   spillno %d\n",
+            "   LR %d is  after %d to before %d\tspillno %d\n",
             i,
             temp_info[i].live_after,
             temp_info[i].dead_before,
             temp_info[i].spill_no
          );
       }
+      VG_(printf)("\n");
    }
 
    /* Now that we've established a spill slot number for each used
       temporary, we can go ahead and do the core of the "Second-chance
       binpacking" allocation algorithm. */
 
+   if (dis) VG_(printf)("Register allocated UCode:\n");
+      
+
    /* Resulting code goes here.  We generate it all in a forwards
       pass. */
    c2 = VG_(allocCodeBlock)();
@@ -1694,9 +1727,6 @@
    for (i = 0; i < c1->nextTemp; i++)
       temp_info[i].real_no = VG_NOTHING;
 
-   if (VG_(disassemble))
-      VG_(printf)("\n");
-
    /* Process each insn in turn. */
    for (i = 0; i < c1->used; i++) {
 
@@ -1721,14 +1751,14 @@
       }
 #     endif
 
-      if (VG_(disassemble))
+      if (dis)
          VG_(ppUInstr)(i, &c1->instrs[i]);
 
       /* First, free up enough real regs for this insn.  This may
          generate spill stores since we may have to evict some TempRegs
          currently in real regs.  Also generates spill loads. */
 
-      k = getTempUsage(&c1->instrs[i], &tempUse[0]);
+      k = VG_(getRegUsage)(&c1->instrs[i], TempReg, &tempUse[0]);
       vg_assert(k >= 0 && k <= 3);
 
       /* For each ***different*** temp mentioned in the insn .... */
@@ -1739,14 +1769,14 @@
             used by the insn once, even if it is mentioned more than
             once. */
          defer = False;
-         tno = tempUse[j].tempNo;
+         tno = tempUse[j].num;
          for (m = j+1; m < k; m++)
-            if (tempUse[m].tempNo == tno) 
+            if (tempUse[m].num == tno) 
                defer = True;
          if (defer) 
             continue;
 
-         /* Now we're trying to find a register for tempUse[j].tempNo.
+         /* Now we're trying to find a register for tempUse[j].num.
             First of all, if it already has a register assigned, we
             don't need to do anything more. */
          if (temp_info[tno].real_no != VG_NOTHING)
@@ -1772,7 +1802,7 @@
 
             Select r in 0 .. VG_MAX_REALREGS-1 such that
             real_to_temp[r] is not mentioned in 
-            tempUse[0 .. k-1].tempNo, since it would be just plain 
+            tempUse[0 .. k-1].num, since it would be just plain 
             wrong to eject some other TempReg which we need to use in 
             this insn.
 
@@ -1783,7 +1813,7 @@
          for (r = 0; r < VG_MAX_REALREGS; r++) {
             is_spill_cand[r] = True;
             for (m = 0; m < k; m++) {
-               if (real_to_temp[r] == tempUse[m].tempNo) {
+               if (real_to_temp[r] == tempUse[m].num) {
                   is_spill_cand[r] = False;
                   break;
                }
@@ -1834,28 +1864,28 @@
          temp_info[real_to_temp[r]].real_no = VG_NOTHING;
          if (temp_info[real_to_temp[r]].dead_before > i) {
             uInstr2(c2, PUT, 4, 
-                        RealReg, VG_(rankToRealRegNo)(r), 
+                        RealReg, VG_(rankToRealRegNum)(r), 
                         SpillNo, temp_info[real_to_temp[r]].spill_no);
             VG_(uinstrs_spill)++;
             spill_reqd = True;
-            if (VG_(disassemble))
+            if (dis)
                VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
          }
 
          /* Decide if tno is read. */
          isRead = False;
          for (m = 0; m < k; m++)
-            if (tempUse[m].tempNo == tno && !tempUse[m].isWrite) 
+            if (tempUse[m].num == tno && !tempUse[m].isWrite) 
                isRead = True;
 
          /* If so, generate a spill load. */
          if (isRead) {
             uInstr2(c2, GET, 4, 
                         SpillNo, temp_info[tno].spill_no, 
-                        RealReg, VG_(rankToRealRegNo)(r) );
+                        RealReg, VG_(rankToRealRegNum)(r) );
             VG_(uinstrs_spill)++;
             spill_reqd = True;
-            if (VG_(disassemble))
+            if (dis)
                VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
          }
 
@@ -1869,19 +1899,18 @@
          and use patchUInstr to convert its rTempRegs into
          realregs. */
       for (j = 0; j < k; j++)
-         tempUse[j].realNo 
-            = VG_(rankToRealRegNo)(temp_info[tempUse[j].tempNo].real_no);
+         realUse[j] = VG_(rankToRealRegNum)(temp_info[tempUse[j].num].real_no);
       VG_(copyUInstr)(c2, &c1->instrs[i]);
-      patchUInstr(&LAST_UINSTR(c2), &tempUse[0], k);
+      patchUInstr(&LAST_UINSTR(c2), &tempUse[0], &realUse[0], k);
 
-      if (VG_(disassemble)) {
+      if (dis) {
          VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
          VG_(printf)("\n");
       }
    }
 
    if (temp_info != NULL)
-      VG_(jitfree)(temp_info);
+      VG_(arena_free)(VG_AR_JITTER, temp_info);
 
    VG_(freeCodeBlock)(c1);
 
@@ -1893,1170 +1922,37 @@
 #  undef VG_NOTHING
 
 }
+extern void fooble(int);
+/* Analysis records liveness of all general-use RealRegs in the UCode. */
+static void vg_realreg_liveness_analysis ( UCodeBlock* cb )
+{        
+   Int      i, j, k;
+   RRegSet  rregs_live;
+   RegUse   regUse[3];
+   UInstr*  u;
 
-
-/*------------------------------------------------------------*/
-/*--- New instrumentation machinery.                       ---*/
-/*------------------------------------------------------------*/
-
-static
-VgTagOp get_VgT_ImproveOR_TQ ( Int sz )
-{
-   switch (sz) {
-      case 4: return VgT_ImproveOR4_TQ;
-      case 2: return VgT_ImproveOR2_TQ;
-      case 1: return VgT_ImproveOR1_TQ;
-      default: VG_(panic)("get_VgT_ImproveOR_TQ");
-   }
-}
-
-
-static
-VgTagOp get_VgT_ImproveAND_TQ ( Int sz )
-{
-   switch (sz) {
-      case 4: return VgT_ImproveAND4_TQ;
-      case 2: return VgT_ImproveAND2_TQ;
-      case 1: return VgT_ImproveAND1_TQ;
-      default: VG_(panic)("get_VgT_ImproveAND_TQ");
-   }
-}
-
-
-static
-VgTagOp get_VgT_Left ( Int sz )
-{
-   switch (sz) {
-      case 4: return VgT_Left4;
-      case 2: return VgT_Left2;
-      case 1: return VgT_Left1;
-      default: VG_(panic)("get_VgT_Left");
-   }
-}
-
-
-static
-VgTagOp get_VgT_UifU ( Int sz )
-{
-   switch (sz) {
-      case 4: return VgT_UifU4;
-      case 2: return VgT_UifU2;
-      case 1: return VgT_UifU1;
-      case 0: return VgT_UifU0;
-      default: VG_(panic)("get_VgT_UifU");
-   }
-}
-
-
-static
-VgTagOp get_VgT_DifD ( Int sz )
-{
-   switch (sz) {
-      case 4: return VgT_DifD4;
-      case 2: return VgT_DifD2;
-      case 1: return VgT_DifD1;
-      default: VG_(panic)("get_VgT_DifD");
-   }
-}
-
-
-static 
-VgTagOp get_VgT_PCast ( Int szs, Int szd )
-{
-   if (szs == 4 && szd == 0) return VgT_PCast40;
-   if (szs == 2 && szd == 0) return VgT_PCast20;
-   if (szs == 1 && szd == 0) return VgT_PCast10;
-   if (szs == 0 && szd == 1) return VgT_PCast01;
-   if (szs == 0 && szd == 2) return VgT_PCast02;
-   if (szs == 0 && szd == 4) return VgT_PCast04;
-   if (szs == 1 && szd == 4) return VgT_PCast14;
-   if (szs == 1 && szd == 2) return VgT_PCast12;
-   if (szs == 1 && szd == 1) return VgT_PCast11;
-   VG_(printf)("get_VgT_PCast(%d,%d)\n", szs, szd);
-   VG_(panic)("get_VgT_PCast");
-}
-
-
-static 
-VgTagOp get_VgT_Widen ( Bool syned, Int szs, Int szd )
-{
-   if (szs == 1 && szd == 2 && syned)  return VgT_SWiden12;
-   if (szs == 1 && szd == 2 && !syned) return VgT_ZWiden12;
-
-   if (szs == 1 && szd == 4 && syned)  return VgT_SWiden14;
-   if (szs == 1 && szd == 4 && !syned) return VgT_ZWiden14;
-
-   if (szs == 2 && szd == 4 && syned)  return VgT_SWiden24;
-   if (szs == 2 && szd == 4 && !syned) return VgT_ZWiden24;
-
-   VG_(printf)("get_VgT_Widen(%d,%d,%d)\n", (Int)syned, szs, szd);
-   VG_(panic)("get_VgT_Widen");
-}
-
-/* Pessimally cast the spec'd shadow from one size to another. */
-static 
-void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg )
-{
-   if (szs == 0 && szd == 0)
-      return;
-   uInstr3(cb, TAG1, 0, TempReg, tempreg, 
-                        NoValue, 0, 
-                        Lit16,   get_VgT_PCast(szs,szd));
-}
-
-
-/* Create a signed or unsigned widen of the spec'd shadow from one
-   size to another.  The only allowed size transitions are 1->2, 1->4
-   and 2->4. */
-static 
-void create_Widen ( UCodeBlock* cb, Bool signed_widen,
-                    Int szs, Int szd, Int tempreg )
-{
-   if (szs == szd) return;
-   uInstr3(cb, TAG1, 0, TempReg, tempreg, 
-                        NoValue, 0, 
-                        Lit16,   get_VgT_Widen(signed_widen,szs,szd));
-}
-
-
-/* Get the condition codes into a new shadow, at the given size. */
-static
-Int create_GETVF ( UCodeBlock* cb, Int sz )
-{
-   Int tt = newShadow(cb);
-   uInstr1(cb, GETVF, 0, TempReg, tt);
-   create_PCast(cb, 0, sz, tt);
-   return tt;
-}
-
-
-/* Save the condition codes from the spec'd shadow. */
-static
-void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg )
-{
-   if (sz == 0) {
-      uInstr1(cb, PUTVF, 0, TempReg, tempreg);
-   } else { 
-      Int tt = newShadow(cb);
-      uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt);
-      create_PCast(cb, sz, 0, tt);
-      uInstr1(cb, PUTVF, 0, TempReg, tt);
-   }
-}
-
-
-/* Do Left on the spec'd shadow. */
-static 
-void create_Left ( UCodeBlock* cb, Int sz, Int tempreg )
-{
-   uInstr3(cb, TAG1, 0, 
-               TempReg, tempreg,
-               NoValue, 0, 
-               Lit16, get_VgT_Left(sz));
-}
-
-
-/* Do UifU on ts and td, putting the result in td. */
-static 
-void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td )
-{
-   uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
-               Lit16, get_VgT_UifU(sz));
-}
-
-
-/* Do DifD on ts and td, putting the result in td. */
-static 
-void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td )
-{
-   uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
-               Lit16, get_VgT_DifD(sz));
-}
-
-
-/* Do HelpAND on value tval and tag tqqq, putting the result in
-   tqqq. */
-static 
-void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
-{
-   uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
-               Lit16, get_VgT_ImproveAND_TQ(sz));
-}
-
-
-/* Do HelpOR on value tval and tag tqqq, putting the result in
-   tqqq. */
-static 
-void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
-{
-   uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
-               Lit16, get_VgT_ImproveOR_TQ(sz));
-}
-
-
-/* Get the shadow for an operand described by (tag, val).  Emit code
-   to do this and return the identity of the shadow holding the
-   result.  The result tag is always copied into a new shadow, so it
-   can be modified without trashing the original.*/
-static
-Int /* TempReg */ getOperandShadow ( UCodeBlock* cb, 
-                                     Int sz, Int tag, Int val )
-{
-   Int sh;
-   sh = newShadow(cb);
-   if (tag == TempReg) {
-      uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh);
-      return sh;
-   }
-   if (tag == Literal) {
-      uInstr1(cb, SETV, sz, TempReg, sh);
-      return sh;
-   }
-   if (tag == ArchReg) {
-      uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh);
-      return sh;
-   }
-   VG_(panic)("getOperandShadow");
-}
-
-
-
-/* Create and return an instrumented version of cb_in.  Free cb_in
-   before returning. */
-static UCodeBlock* vg_instrument ( UCodeBlock* cb_in )
-{
-   UCodeBlock* cb;
-   Int         i, j;
-   UInstr*     u_in;
-   Int         qs, qd, qt, qtt;
-   cb = VG_(allocCodeBlock)();
-   cb->nextTemp = cb_in->nextTemp;
-
-   for (i = 0; i < cb_in->used; i++) {
-      qs = qd = qt = qtt = INVALID_TEMPREG;
-      u_in = &cb_in->instrs[i];
-
-      /* if (i > 0) uInstr1(cb, NOP, 0, NoValue, 0); */
-
-      /* VG_(ppUInstr)(0, u_in); */
-      switch (u_in->opcode) {
-
-         case NOP:
-            break;
-
-         case INCEIP:
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Loads and stores.  Test the V bits for the address.  24
-            Mar 02: since the address is A-checked anyway, there's not
-            really much point in doing the V-check too, unless you
-            think that you might use addresses which are undefined but
-            still addressible.  Hence the optionalisation of the V
-            check.
-
-            The LOADV/STOREV does an addressibility check for the
-            address. */
-
-         case LOAD: 
-            if (VG_(clo_check_addrVs)) {
-               uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
-               uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
-            }
-            uInstr2(cb, LOADV, u_in->size, 
-                        TempReg, u_in->val1,
-                        TempReg, SHADOW(u_in->val2));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         case STORE:
-            if (VG_(clo_check_addrVs)) {
-               uInstr1(cb, TESTV,  4, TempReg, SHADOW(u_in->val2));
-               uInstr1(cb, SETV,   4, TempReg, SHADOW(u_in->val2));
-            }
-            uInstr2(cb, STOREV, u_in->size,
-                        TempReg, SHADOW(u_in->val1), 
-                        TempReg, u_in->val2);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Moving stuff around.  Make the V bits follow accordingly,
-            but don't do anything else.  */
-
-         case GET:
-            uInstr2(cb, GETV, u_in->size,
-                        ArchReg, u_in->val1,
-                        TempReg, SHADOW(u_in->val2));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         case PUT:
-            uInstr2(cb, PUTV, u_in->size, 
-                        TempReg, SHADOW(u_in->val1),
-                        ArchReg, u_in->val2);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         case GETF:
-            /* This is not the smartest way to do it, but should work. */
-            qd = create_GETVF(cb, u_in->size);
-            uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         case PUTF:
-            create_PUTVF(cb, u_in->size, SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         case MOV:
-            switch (u_in->tag1) {
-               case TempReg: 
-                  uInstr2(cb, MOV, 4,
-                              TempReg, SHADOW(u_in->val1),
-                              TempReg, SHADOW(u_in->val2));
-                  break;
-               case Literal: 
-                  uInstr1(cb, SETV, u_in->size, 
-                              TempReg, SHADOW(u_in->val2));
-                  break;
-               default: 
-                  VG_(panic)("vg_instrument: MOV");
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Special case of add, where one of the operands is a literal.
-            lea1(t) = t + some literal.
-            Therefore: lea1#(qa) = left(qa) 
-         */
-         case LEA1:
-            vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
-            qs = SHADOW(u_in->val1);
-            qd = SHADOW(u_in->val2);
-            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd);
-            create_Left(cb, u_in->size, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Another form of add.  
-            lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal
-                                and is 0,1,2 or 3.
-            lea2#(qs,qt) = left(qs `UifU` (qt << shift)).
-            Note, subtly, that the shift puts zeroes at the bottom of qt,
-            meaning Valid, since the corresponding shift of tt puts 
-            zeroes at the bottom of tb.
-         */
-         case LEA2: {
-            Int shift;
-            vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
-            switch (u_in->extra4b) {
-               case 1: shift = 0; break;
-               case 2: shift = 1; break;
-               case 4: shift = 2; break;
-               case 8: shift = 3; break;
-               default: VG_(panic)( "vg_instrument(LEA2)" );
-            }
-            qs = SHADOW(u_in->val1);
-            qt = SHADOW(u_in->val2);
-            qd = SHADOW(u_in->val3);
-            uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd);
-            if (shift > 0) {
-               uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd);
-               uLiteral(cb, shift);
-            }
-            create_UifU(cb, 4, qs, qd);
-            create_Left(cb, u_in->size, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         }
-
-         /* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */
-         case INC: case DEC:
-            qd = SHADOW(u_in->val1);
-            create_Left(cb, u_in->size, qd);
-            if (u_in->flags_w != FlagsEmpty)
-               create_PUTVF(cb, u_in->size, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* This is a HACK (approximation :-) */
-         /* rcl#/rcr#(qs,qd) 
-               = let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags#
-                 eflags# = q0
-                 qd =pcast-0-sz(q0)
-            Ie, cast everything down to a single bit, then back up.
-            This assumes that any bad bits infect the whole word and 
-            the eflags.
-         */
-         case RCL: case RCR:
-	    vg_assert(u_in->flags_r != FlagsEmpty);
-            /* The following assertion looks like it makes sense, but is
-               actually wrong.  Consider this:
-                  rcll    %eax
-                  imull   %eax, %eax
-               The rcll writes O and C but so does the imull, so the O and C 
-               write of the rcll is annulled by the prior improvement pass.
-               Noticed by Kevin Ryde <user42@zip.com.au>
-            */
-	    /* vg_assert(u_in->flags_w != FlagsEmpty); */
-            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
-            /* We can safely modify qs; cast it to 0-size. */
-            create_PCast(cb, u_in->size, 0, qs);
-            qd = SHADOW(u_in->val2);
-            create_PCast(cb, u_in->size, 0, qd);
-            /* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */
-            create_UifU(cb, 0, qs, qd);
-            /* qs is now free; reuse it for the flag definedness. */
-            qs = create_GETVF(cb, 0);
-            create_UifU(cb, 0, qs, qd);
-            create_PUTVF(cb, 0, qd);
-            create_PCast(cb, 0, u_in->size, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* for OP in shl shr sar rol ror
-            (qs is shift count#, qd is value to be OP#d)
-            OP(ts,td)
-            OP#(qs,qd)
-               = pcast-1-sz(qs) `UifU` OP(ts,qd)
-            So we apply OP to the tag bits too, and then UifU with
-            the shift count# to take account of the possibility of it
-            being undefined.
+   /* All regs are dead at the end of the block */
+   rregs_live = ALL_RREGS_DEAD;
             
-            A bit subtle:
-               ROL/ROR rearrange the tag bits as per the value bits.
-               SHL/SHR shifts zeroes into the value, and corresponding 
-                  zeroes indicating Definedness into the tag.
-               SAR copies the top bit of the value downwards, and therefore
-                  SAR also copies the definedness of the top bit too.
-            So in all five cases, we just apply the same op to the tag 
-            bits as is applied to the value bits.  Neat!
-         */
-         case SHL:
-         case SHR: case SAR:
-         case ROL: case ROR: {
-            Int t_amount = INVALID_TEMPREG;
-            vg_assert(u_in->tag1 == TempReg || u_in->tag1 == Literal);
-            vg_assert(u_in->tag2 == TempReg);
-            qd = SHADOW(u_in->val2);
-
-            /* Make qs hold shift-count# and make
-               t_amount be a TempReg holding the shift count. */
-            if (u_in->tag1 == Literal) {
-               t_amount = newTemp(cb);
-               uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount);
-               uLiteral(cb, u_in->lit32);
-               qs = SHADOW(t_amount);
-               uInstr1(cb, SETV, 1, TempReg, qs);
-            } else {
-               t_amount = u_in->val1;
-               qs = SHADOW(u_in->val1);
-            }
-
-            uInstr2(cb, u_in->opcode, 
-                        u_in->size, 
-                        TempReg, t_amount, 
-                        TempReg, qd);
-            qt = newShadow(cb);
-            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
-            create_PCast(cb, 1, u_in->size, qt);
-            create_UifU(cb, u_in->size, qt, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         }
-
-         /* One simple tag operation. */
-         case WIDEN:
-            vg_assert(u_in->tag1 == TempReg);
-            create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size, 
-                             SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* not#(x) = x (since bitwise independent) */
-         case NOT:
-            vg_assert(u_in->tag1 == TempReg);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* neg#(x) = left(x) (derivable from case for SUB) */
-         case NEG:
-            vg_assert(u_in->tag1 == TempReg);
-            create_Left(cb, u_in->size, SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* bswap#(x) = bswap(x) */
-         case BSWAP:
-            vg_assert(u_in->tag1 == TempReg);
-            vg_assert(u_in->size == 4);
-            qd = SHADOW(u_in->val1);
-            uInstr1(cb, BSWAP, 4, TempReg, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* cc2val#(qd) = pcast-0-to-size(eflags#) */
-         case CC2VAL:
-            vg_assert(u_in->tag1 == TempReg);
-            vg_assert(u_in->flags_r != FlagsEmpty);
-            qt = create_GETVF(cb, u_in->size);
-            uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* cmov#(qs,qd) = cmov(qs,qd)
-            That is, do the cmov of tags using the same flags as for
-            the data (obviously).  However, first do a test on the 
-            validity of the flags.
-         */
-         case CMOV:
-            vg_assert(u_in->size == 4);
-            vg_assert(u_in->tag1 == TempReg);
-            vg_assert(u_in->tag2 == TempReg);
-            vg_assert(u_in->flags_r != FlagsEmpty);
-            vg_assert(u_in->flags_w == FlagsEmpty);
-            qs = SHADOW(u_in->val1);
-            qd = SHADOW(u_in->val2);
-            qt = create_GETVF(cb, 0);
-            uInstr1(cb, TESTV, 0, TempReg, qt);
-            /* qt should never be referred to again.  Nevertheless
-               ... */
-            uInstr1(cb, SETV, 0, TempReg, qt);
-
-            uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd);
-            LAST_UINSTR(cb).cond    = u_in->cond;
-            LAST_UINSTR(cb).flags_r = u_in->flags_r;
-
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* add#/sub#(qs,qd) 
-               = qs `UifU` qd `UifU` left(qs) `UifU` left(qd)
-               = left(qs) `UifU` left(qd)
-               = left(qs `UifU` qd)
-            adc#/sbb#(qs,qd)
-               = left(qs `UifU` qd) `UifU` pcast(eflags#)
-            Second arg (dest) is TempReg.
-            First arg (src) is Literal or TempReg or ArchReg. 
-         */
-         case ADD: case SUB:
-         case ADC: case SBB:
-            qd = SHADOW(u_in->val2);
-            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
-            create_UifU(cb, u_in->size, qs, qd);
-            create_Left(cb, u_in->size, qd);
-            if (u_in->opcode == ADC || u_in->opcode == SBB) {
-               vg_assert(u_in->flags_r != FlagsEmpty);
-               qt = create_GETVF(cb, u_in->size);
-               create_UifU(cb, u_in->size, qt, qd);
-            }
-            if (u_in->flags_w != FlagsEmpty) {
-               create_PUTVF(cb, u_in->size, qd);
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* xor#(qs,qd) = qs `UifU` qd */
-         case XOR:
-            qd = SHADOW(u_in->val2);
-            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
-            create_UifU(cb, u_in->size, qs, qd);
-            if (u_in->flags_w != FlagsEmpty) {
-               create_PUTVF(cb, u_in->size, qd);
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* and#/or#(qs,qd) 
-               = (qs `UifU` qd) `DifD` improve(vs,qs) 
-                                `DifD` improve(vd,qd)
-            where improve is the relevant one of
-                Improve{AND,OR}_TQ
-            Use the following steps, with qt as a temp:
-               qt = improve(vd,qd)
-               qd = qs `UifU` qd
-               qd = qt `DifD` qd
-               qt = improve(vs,qs)
-               qd = qt `DifD` qd
-         */
-         case AND: case OR:
-            vg_assert(u_in->tag1 == TempReg);
-            vg_assert(u_in->tag2 == TempReg);
-            qd = SHADOW(u_in->val2);
-            qs = SHADOW(u_in->val1);
-            qt = newShadow(cb);
-
-            /* qt = improve(vd,qd) */
-            uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt);
-            if (u_in->opcode == AND)
-               create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt);
-            else
-               create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt);
-            /* qd = qs `UifU` qd */
-            create_UifU(cb, u_in->size, qs, qd);
-            /* qd = qt `DifD` qd */
-            create_DifD(cb, u_in->size, qt, qd);
-            /* qt = improve(vs,qs) */
-            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
-            if (u_in->opcode == AND)
-               create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt);
-            else
-               create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt);
-            /* qd = qt `DifD` qd */
-               create_DifD(cb, u_in->size, qt, qd);
-            /* So, finally qd is the result tag. */
-            if (u_in->flags_w != FlagsEmpty) {
-               create_PUTVF(cb, u_in->size, qd);
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Machinery to do with supporting CALLM.  Copy the start and
-            end markers only to make the result easier to read
-            (debug); they generate no code and have no effect. 
-         */
-         case CALLM_S: case CALLM_E:
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Copy PUSH and POP verbatim.  Arg/result absval
-            calculations are done when the associated CALL is
-            processed.  CLEAR has no effect on absval calculations but
-            needs to be copied.  
-         */
-         case PUSH: case POP: case CLEAR:
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* In short:
-               callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#)
-            We have to decide on a size to do the computation at,
-            although the choice doesn't affect correctness.  We will
-            do a pcast to the final size anyway, so the only important
-            factor is to choose a size which minimises the total
-            number of casts needed.  Valgrind: just use size 0,
-            regardless.  It may not be very good for performance
-            but does simplify matters, mainly by reducing the number
-            of different pessimising casts which have to be implemented.
-         */
-         case CALLM: {
-            UInstr* uu;
-            Bool res_used;
-
-            /* Now generate the code.  Get the final result absval
-               into qt. */
-            qt  = newShadow(cb);
-            qtt = newShadow(cb);
-            uInstr1(cb, SETV, 0, TempReg, qt);
-            for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) {
-               uu = & cb_in->instrs[j];
-               if (uu->opcode != PUSH) continue;
-               /* cast via a temporary */
-               uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1),
-                                   TempReg, qtt);
-               create_PCast(cb, uu->size, 0, qtt);
-               create_UifU(cb, 0, qtt, qt);
-            }
-            /* Remembering also that flags read count as inputs. */
-            if (u_in->flags_r != FlagsEmpty) {
-               qtt = create_GETVF(cb, 0);
-               create_UifU(cb, 0, qtt, qt);
-            }
-
-            /* qt now holds the result tag.  If any results from the
-               call are used, either by fetching with POP or
-               implicitly by writing the flags, we copy the result
-               absval to the relevant location.  If not used, the call
-               must have been for its side effects, so we test qt here
-               and now.  Note that this assumes that all values
-               removed by POP continue to be live.  So dead args
-               *must* be removed with CLEAR, not by POPping them into
-               a dummy tempreg. 
-            */
-            res_used = False;
-            for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) {
-               uu = & cb_in->instrs[j];
-               if (uu->opcode != POP) continue;
-               /* Cast via a temp. */
-               uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt);
-               create_PCast(cb, 0, uu->size, qtt);
-               uInstr2(cb, MOV, 4, TempReg, qtt, 
-                                   TempReg, SHADOW(uu->val1));
-               res_used = True;
-            }
-            if (u_in->flags_w != FlagsEmpty) {
-               create_PUTVF(cb, 0, qt);
-               res_used = True;
-            }
-            if (!res_used) {
-               uInstr1(cb, TESTV, 0, TempReg, qt);
-               /* qt should never be referred to again.  Nevertheless
-                  ... */
-               uInstr1(cb, SETV, 0, TempReg, qt);
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         }
-         /* Whew ... */
-
-         case JMP:
-            if (u_in->tag1 == TempReg) {
-               uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
-               uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
-            } else {
-               vg_assert(u_in->tag1 == Literal);
-            }
-            if (u_in->cond != CondAlways) {
-               vg_assert(u_in->flags_r != FlagsEmpty);
-               qt = create_GETVF(cb, 0);
-               uInstr1(cb, TESTV, 0, TempReg, qt);
-               /* qt should never be referred to again.  Nevertheless
-                  ... */
-               uInstr1(cb, SETV, 0, TempReg, qt);
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         case JIFZ:
-            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
-            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Emit a check on the address used.  For FPU_R, the value
-            loaded into the FPU is checked at the time it is read from
-            memory (see synth_fpu_mem_check_actions).  */
-         case FPU_R: case FPU_W:
-            vg_assert(u_in->tag2 == TempReg);
-            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
-            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val2));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* For FPU insns not referencing memory, just copy thru. */
-         case FPU: 
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         default:
-            VG_(ppUInstr)(0, u_in);
-            VG_(panic)( "vg_instrument: unhandled case");
-
-      } /* end of switch (u_in->opcode) */
-
-   } /* end of for loop */
-
-   VG_(freeCodeBlock)(cb_in);
-   return cb;
-}
-
-/*------------------------------------------------------------*/
-/*--- Clean up mem check instrumentation.                  ---*/
-/*------------------------------------------------------------*/
-
-#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1)
-#define VGC_UNDEF ((UChar)100)
-#define VGC_VALUE ((UChar)101)
-
-#define NOP_no_msg(uu)                                         \
-   do { uu->opcode = NOP; } while (False)
-
-#define NOP_tag1_op(uu)                                        \
-   do { uu->opcode = NOP;                                      \
-        if (VG_(disassemble))                                  \
-           VG_(printf)("at %d: delete %s due to defd arg\n",   \
-                       i, VG_(nameOfTagOp(u->val3)));          \
-   } while (False)
-
-#define SETV_tag1_op(uu,newsz)                                 \
-   do { uu->opcode = SETV;                                     \
-        uu->size = newsz;                                      \
-        uu->tag2 = uu->tag3 = NoValue;                         \
-        if (VG_(disassemble))                                  \
-           VG_(printf)("at %d: convert %s to SETV%d "          \
-                       "due to defd arg\n",                    \
-                       i, VG_(nameOfTagOp(u->val3)), newsz);   \
-   } while (False)
-
-
-
-/* Run backwards and delete SETVs on shadow temps for which the next
-   action is a write.  Needs an env saying whether or not the next
-   action is a write.  The supplied UCodeBlock is destructively
-   modified.
-*/
-static void vg_delete_redundant_SETVs ( UCodeBlock* cb )
-{
-   Bool*   next_is_write;
-   Int     i, j, k, n_temps;
-   UInstr* u;
-   TempUse tempUse[3];
-
-   n_temps = cb->nextTemp;
-   if (n_temps == 0) return;
-
-   next_is_write = VG_(jitmalloc)(n_temps * sizeof(Bool));
-
-   for (i = 0; i < n_temps; i++) next_is_write[i] = True;
-
    for (i = cb->used-1; i >= 0; i--) {
       u = &cb->instrs[i];
 
-      /* If we're not checking address V bits, there will be a lot of
-         GETVs, TAG1s and TAG2s calculating values which are never
-         used.  These first three cases get rid of them. */
+      u->regs_live_after = rregs_live;
 
-      if (u->opcode == GETV && VGC_IS_SHADOW(u->val2) 
-                            && next_is_write[u->val2]
-                            && !VG_(clo_check_addrVs)) {
-         u->opcode = NOP;
-         u->size = 0;
-         if (VG_(disassemble)) 
-            VG_(printf)("at %d: delete GETV\n", i);
-      } else
+      k = VG_(getRegUsage)(u, RealReg, regUse);
 
-      if (u->opcode == TAG1 && VGC_IS_SHADOW(u->val1) 
-                            && next_is_write[u->val1]
-                            && !VG_(clo_check_addrVs)) {
-         u->opcode = NOP;
-         u->size = 0;
-         if (VG_(disassemble)) 
-            VG_(printf)("at %d: delete TAG1\n", i);
-      } else
-
-      if (u->opcode == TAG2 && VGC_IS_SHADOW(u->val2) 
-                            && next_is_write[u->val2]
-                            && !VG_(clo_check_addrVs)) {
-         u->opcode = NOP;
-         u->size = 0;
-         if (VG_(disassemble)) 
-            VG_(printf)("at %d: delete TAG2\n", i);
-      } else
-
-      /* We do the rest of these regardless of whether or not
-         addresses are V-checked. */
-
-      if (u->opcode == MOV && VGC_IS_SHADOW(u->val2) 
-                           && next_is_write[u->val2]) {
-         /* This MOV is pointless because the target is dead at this
-            point.  Delete it. */
-         u->opcode = NOP;
-         u->size = 0;
-         if (VG_(disassemble)) 
-            VG_(printf)("at %d: delete MOV\n", i);
-      } else
-
-      if (u->opcode == SETV) {
-         if (u->tag1 == TempReg) {
-            vg_assert(VGC_IS_SHADOW(u->val1));
-            if (next_is_write[u->val1]) {
-               /* This write is pointless, so annul it. */
-               u->opcode = NOP;
-               u->size = 0;
-               if (VG_(disassemble)) 
-                  VG_(printf)("at %d: delete SETV\n", i);
-            } else {
-               /* This write has a purpose; don't annul it, but do
-                  notice that we did it. */
-               next_is_write[u->val1] = True;
-            }
-              
-         }
-
-      } else {
-         /* Find out what this insn does to the temps. */
-         k = getTempUsage(u, &tempUse[0]);
-         vg_assert(k <= 3);
-         for (j = k-1; j >= 0; j--) {
-            next_is_write[ tempUse[j].tempNo ]
-                         = tempUse[j].isWrite;
-         }
-      }
-
-   }
-
-   VG_(jitfree)(next_is_write);
-}
-
-
-/* Run forwards, propagating and using the is-completely-defined
-   property.  This removes a lot of redundant tag-munging code.
-   Unfortunately it requires intimate knowledge of how each uinstr and
-   tagop modifies its arguments.  This duplicates knowledge of uinstr
-   tempreg uses embodied in getTempUsage(), which is unfortunate. 
-   The supplied UCodeBlock* is modified in-place.
-
-   For each value temp, def[] should hold VGC_VALUE.
-
-   For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is
-   definitely known to be fully defined at that size.  In all other
-   circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly
-   undefined.  In cases of doubt, VGC_UNDEF is always safe.
-*/
-static void vg_propagate_definedness ( UCodeBlock* cb )
-{
-   UChar*  def;
-   Int     i, j, k, t, n_temps;
-   UInstr* u;
-   TempUse tempUse[3];
-
-   n_temps = cb->nextTemp;
-   if (n_temps == 0) return;
-
-   def = VG_(jitmalloc)(n_temps * sizeof(UChar));
-   for (i = 0; i < n_temps; i++) 
-      def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE;
-
-   /* Run forwards, detecting and using the all-defined property. */
-
-   for (i = 0; i < cb->used; i++) {
-      u = &cb->instrs[i];
-      switch (u->opcode) {
-
-      /* Tag-handling uinstrs. */
-
-         /* Deal with these quickly. */
-         case NOP:
-         case INCEIP:
-            break;
-
-         /* Make a tag defined. */
-         case SETV:
-            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-            def[u->val1] = u->size;
-            break;
-
-         /* Check definedness of a tag. */
-         case TESTV:
-            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-            if (def[u->val1] <= 4) { 
-               vg_assert(def[u->val1] == u->size); 
-               NOP_no_msg(u);
-               if (VG_(disassemble)) 
-                  VG_(printf)("at %d: delete TESTV on defd arg\n", i);
-            }
-            break;
-
-         /* Applies to both values and tags.  Propagate Definedness
-            property through copies.  Note that this isn't optional;
-            we *have* to do this to keep def[] correct. */
-         case MOV:
-            vg_assert(u->tag2 == TempReg);
-            if (u->tag1 == TempReg) {
-               if (VGC_IS_SHADOW(u->val1)) {
-                  vg_assert(VGC_IS_SHADOW(u->val2));
-                  def[u->val2] = def[u->val1];
-               }
-            }
-            break;
-
-         case PUTV:
-            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-            if (def[u->val1] <= 4) {
-               vg_assert(def[u->val1] == u->size);
-               u->tag1 = Literal;
-               u->val1 = 0;
-               switch (u->size) {
-                  case 4: u->lit32 = 0x00000000; break;
-                  case 2: u->lit32 = 0xFFFF0000; break;
-                  case 1: u->lit32 = 0xFFFFFF00; break;
-                  default: VG_(panic)("vg_cleanup(PUTV)");
-               }
-               if (VG_(disassemble)) 
-                  VG_(printf)(
-                     "at %d: propagate definedness into PUTV\n", i);
-            }
-            break;
-
-         case STOREV:
-            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-            if (def[u->val1] <= 4) {
-               vg_assert(def[u->val1] == u->size);
-               u->tag1 = Literal;
-               u->val1 = 0;
-               switch (u->size) {
-                  case 4: u->lit32 = 0x00000000; break;
-                  case 2: u->lit32 = 0xFFFF0000; break;
-                  case 1: u->lit32 = 0xFFFFFF00; break;
-                  default: VG_(panic)("vg_cleanup(STOREV)");
-               }
-               if (VG_(disassemble)) 
-                  VG_(printf)(
-                     "at %d: propagate definedness into STandV\n", i);
-            }
-            break;
-
-         /* Nothing interesting we can do with this, I think. */
-         case PUTVF:
-            break;
-
-         /* Tag handling operations. */
-         case TAG2:
-            vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
-            vg_assert(u->tag3 == Lit16);
-            /* Ultra-paranoid "type" checking. */
-            switch (u->val3) {
-               case VgT_ImproveAND4_TQ: case VgT_ImproveAND2_TQ:
-               case VgT_ImproveAND1_TQ: case VgT_ImproveOR4_TQ:
-               case VgT_ImproveOR2_TQ: case VgT_ImproveOR1_TQ:
-                  vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1));
-                  break;
-               default:
-                  vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-                  break;
-            }
-            switch (u->val3) {
-               Int sz;
-               case VgT_UifU4: 
-                  sz = 4; goto do_UifU;
-               case VgT_UifU2: 
-                  sz = 2; goto do_UifU;
-               case VgT_UifU1:
-                  sz = 1; goto do_UifU;
-               case VgT_UifU0:
-                  sz = 0; goto do_UifU;
-               do_UifU:
-                  vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-                  vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
-                  if (def[u->val1] <= 4) {
-                     /* UifU.  The first arg is defined, so result is
-                        simply second arg.  Delete this operation. */
-                     vg_assert(def[u->val1] == sz);
-                     NOP_no_msg(u);
-                     if (VG_(disassemble)) 
-                        VG_(printf)(
-                           "at %d: delete UifU%d due to defd arg1\n", 
-                           i, sz);
-                  }
-                  else 
-                  if (def[u->val2] <= 4) {
-                     /* UifU.  The second arg is defined, so result is
-                        simply first arg.  Copy to second. */
-                     vg_assert(def[u->val2] == sz);
-                     u->opcode = MOV; 
-                     u->size = 4;
-                     u->tag3 = NoValue;
-                     def[u->val2] = def[u->val1];
-                     if (VG_(disassemble)) 
-                        VG_(printf)(
-                           "at %d: change UifU%d to MOV due to defd"
-                           " arg2\n", 
-                           i, sz);
-                  }
-                  break;
-               case VgT_ImproveAND4_TQ:
-                  sz = 4; goto do_ImproveAND;
-               case VgT_ImproveAND1_TQ:
-                  sz = 1; goto do_ImproveAND;
-               do_ImproveAND:
-                  /* Implements Q = T OR Q.  So if Q is entirely defined,
-                     ie all 0s, we get MOV T, Q. */
-		  if (def[u->val2] <= 4) {
-                     vg_assert(def[u->val2] == sz);
-                     u->size = 4; /* Regardless of sz */
-                     u->opcode = MOV;
-                     u->tag3 = NoValue;
-                     def[u->val2] = VGC_UNDEF;
-                     if (VG_(disassemble)) 
-                        VG_(printf)(
-                            "at %d: change ImproveAND%d_TQ to MOV due "
-                            "to defd arg2\n", 
-                            i, sz);
-                  }
-                  break;
-               default: 
-                  goto unhandled;
-            }
-            break;
-
-         case TAG1:
-            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-            if (def[u->val1] > 4) break;
-            /* We now know that the arg to the op is entirely defined.
-               If the op changes the size of the arg, we must replace
-               it with a SETV at the new size.  If it doesn't change
-               the size, we can delete it completely. */
-            switch (u->val3) {
-               /* Maintain the same size ... */
-               case VgT_Left4: 
-                  vg_assert(def[u->val1] == 4);
-                  NOP_tag1_op(u);
-                  break;
-               case VgT_PCast11: 
-                  vg_assert(def[u->val1] == 1);
-                  NOP_tag1_op(u);
-                  break;
-               /* Change size ... */
-               case VgT_PCast40: 
-                  vg_assert(def[u->val1] == 4);
-                  SETV_tag1_op(u,0);
-                  def[u->val1] = 0;
-                  break;
-               case VgT_PCast14: 
-                  vg_assert(def[u->val1] == 1);
-                  SETV_tag1_op(u,4);
-                  def[u->val1] = 4;
-                  break;
-               case VgT_PCast12: 
-                  vg_assert(def[u->val1] == 1);
-                  SETV_tag1_op(u,2);
-                  def[u->val1] = 2;
-                  break;
-               case VgT_PCast10: 
-                  vg_assert(def[u->val1] == 1);
-                  SETV_tag1_op(u,0);
-                  def[u->val1] = 0;
-                  break;
-               case VgT_PCast02: 
-                  vg_assert(def[u->val1] == 0);
-                  SETV_tag1_op(u,2);
-                  def[u->val1] = 2;
-                  break;
-               default: 
-                  goto unhandled;
-            }
-            if (VG_(disassemble)) 
-               VG_(printf)(
-                  "at %d: delete TAG1 %s due to defd arg\n",
-                  i, VG_(nameOfTagOp(u->val3)));
-            break;
-
-         default:
-         unhandled:
-            /* We don't know how to handle this uinstr.  Be safe, and 
-               set to VGC_VALUE or VGC_UNDEF all temps written by it. */
-            k = getTempUsage(u, &tempUse[0]);
-            vg_assert(k <= 3);
-            for (j = 0; j < k; j++) {
-               t = tempUse[j].tempNo;
-               vg_assert(t >= 0 && t < n_temps);
-               if (!tempUse[j].isWrite) {
-                  /* t is read; ignore it. */
-                  if (0&& VGC_IS_SHADOW(t) && def[t] <= 4)
-                     VG_(printf)("ignoring def %d at %s %s\n", 
-                                 def[t], 
-                                 VG_(nameUOpcode)(True, u->opcode),
-                                 (u->opcode == TAG1 || u->opcode == TAG2)
-                                    ? VG_(nameOfTagOp)(u->val3) 
-                                    : (Char*)"");
-               } else {
-                  /* t is written; better nullify it. */
-                  def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE;
-               }
-            }
+      /* For each reg usage ... bwds in program order.  Variable is live
+         before this UInstr if it is read by this UInstr.
+         Note that regUse[j].num holds the Intel reg number, so we must
+         convert it to our rank number.  */
+      for (j = k-1; j >= 0; j--) {
+         SET_RREG_LIVENESS ( VG_(realRegNumToRank)(regUse[j].num),
+                             rregs_live,
+                             !regUse[j].isWrite );
       }
    }
-
-   VG_(jitfree)(def);
 }
 
-
-/* Top level post-instrumentation cleanup function. */
-static void vg_cleanup ( UCodeBlock* cb )
-{
-   vg_propagate_definedness ( cb );
-   vg_delete_redundant_SETVs ( cb );
-}
-
-
 /*------------------------------------------------------------*/
 /*--- Main entry point for the JITter.                     ---*/
 /*------------------------------------------------------------*/
@@ -3068,13 +1964,14 @@
    this call is being done for debugging purposes, in which case (a)
    throw away the translation once it is made, and (b) produce a load
    of debugging output. 
+
+   'tst' is the identity of the thread needing this block.
 */
-void VG_(translate) ( ThreadState* tst, 
-                         /* Identity of thread needing this block */
-                      Addr  orig_addr,
-                      UInt* orig_size,
-                      Addr* trans_addr,
-                      UInt* trans_size )
+void VG_(translate) ( /*IN*/  ThreadState* tst, 
+		      /*IN*/  Addr  orig_addr,  
+                      /*OUT*/ UInt* orig_size,
+                      /*OUT*/ Addr* trans_addr, 
+                      /*OUT*/ UInt* trans_size )
 {
    Int         n_disassembled_bytes, final_code_size;
    Bool        debugging_translation;
@@ -3085,109 +1982,82 @@
    debugging_translation
       = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
 
-   dis = True;
-   dis = debugging_translation;
+   if (!debugging_translation)
+      VG_TRACK( pre_mem_read, Vg_CoreTranslate, tst, "", orig_addr, 1 );
 
-   /* Check if we're being asked to jump to a silly address, and if so
-      record an error message before potentially crashing the entire
-      system. */
-   if (VG_(clo_instrument) && !debugging_translation && !dis) {
-      Addr bad_addr;
-      Bool ok = VGM_(check_readable) ( orig_addr, 1, &bad_addr );
-      if (!ok) {
-         VG_(record_jump_error)(tst, bad_addr);
-      }
-   }
-
-   /* if (VG_(overall_in_count) >= 4800) dis=True; */
-   if (VG_(disassemble))
-      VG_(printf)("\n");
-   if (0 || dis 
-       || (VG_(overall_in_count) > 0 &&
-           (VG_(overall_in_count) % 1000 == 0))) {
-      if (0&& (VG_(clo_verbosity) > 1 || dis))
-         VG_(message)(Vg_UserMsg,
-              "trans# %d, bb# %lu, in %d, out %d",
-              VG_(overall_in_count), 
-              VG_(bbs_done),
-              VG_(overall_in_osize), VG_(overall_in_tsize),
-              orig_addr );
-   }
    cb = VG_(allocCodeBlock)();
 
+   /* If doing any code printing, print a basic block start marker */
+   if (VG_(clo_trace_codegen)) {
+      Char fnname[64] = "";
+      VG_(get_fnname_if_entry)(orig_addr, fnname, 64);
+      VG_(printf)(
+              "==== BB %d %s(%p) in %dB, out %dB, BBs exec'd %lu ====\n\n",
+              VG_(overall_in_count), fnname, orig_addr, 
+              VG_(overall_in_osize), VG_(overall_in_tsize),
+              VG_(bbs_done));
+   }
+
+   /* True if a debug trans., or if bit N set in VG_(clo_trace_codegen). */
+#  define DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(n) \
+      ( debugging_translation || (VG_(clo_trace_codegen) & (1 << (n-1))) )
+
    /* Disassemble this basic block into cb. */
-   /* VGP_PUSHCC(VgpToUCode); */
+   VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(1);
+   VGP_PUSHCC(VgpToUCode);
    n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
-   /* VGP_POPCC; */
-   /* dis=True; */
-   /* if (0&& VG_(translations_done) < 617)  */
-   /*    dis=False; */
+   VGP_POPCC(VgpToUCode);
+
    /* Try and improve the code a bit. */
    if (VG_(clo_optimise)) {
-      /* VGP_PUSHCC(VgpImprove); */
+      VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(2);
+      VGP_PUSHCC(VgpImprove);
       vg_improve ( cb );
-      if (VG_(disassemble)) 
-         VG_(ppUCodeBlock) ( cb, "Improved code:" );
-      /* VGP_POPCC; */
-   }
-   /* dis=False; */
-   /* Add instrumentation code. */
-   if (VG_(clo_instrument)) {
-      /* VGP_PUSHCC(VgpInstrument); */
-      cb = vg_instrument(cb);
-      /* VGP_POPCC; */
-      if (VG_(disassemble)) 
-         VG_(ppUCodeBlock) ( cb, "Instrumented code:" );
-      if (VG_(clo_cleanup)) {
-         /* VGP_PUSHCC(VgpCleanup); */
-         vg_cleanup(cb);
-         /* VGP_POPCC; */
-         if (VG_(disassemble)) 
-            VG_(ppUCodeBlock) ( cb, "Cleaned-up instrumented code:" );
-      }
+      VGP_POPCC(VgpImprove);
    }
 
-   //VG_(disassemble) = True;
+   /* Skin's instrumentation (Nb: must set VG_(print_codegen) in case
+      SK_(instrument) looks at it. */
+   VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(3);
+   VGP_PUSHCC(VgpInstrument);
+   cb = SK_(instrument) ( cb, orig_addr );
+   if (VG_(print_codegen))
+      VG_(ppUCodeBlock) ( cb, "Instrumented UCode:" );
+   VG_(saneUCodeBlock)( cb );
+   VGP_POPCC(VgpInstrument);
 
-   /* Add cache simulation code. */
-   if (VG_(clo_cachesim)) {
-      /* VGP_PUSHCC(VgpCacheInstrument); */
-      cb = VG_(cachesim_instrument)(cb, orig_addr);
-      /* VGP_POPCC; */
-      if (VG_(disassemble)) 
-         VG_(ppUCodeBlock) ( cb, "Cachesim instrumented code:" );
-   }
-   
-   //VG_(disassemble) = False;
-   
    /* Allocate registers. */
-   /* VGP_PUSHCC(VgpRegAlloc); */
+   VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(4);
+   VGP_PUSHCC(VgpRegAlloc);
    cb = vg_do_register_allocation ( cb );
-   /* VGP_POPCC; */
-   /* dis=False; */
-   /* 
-   if (VG_(disassemble))
-      VG_(ppUCodeBlock) ( cb, "After Register Allocation:");
-   */
+   VGP_POPCC(VgpRegAlloc);
 
-   /* VGP_PUSHCC(VgpFromUcode); */
-   /* NB final_code is allocated with VG_(jitmalloc), not VG_(malloc)
-      and so must be VG_(jitfree)'d. */
+   /* Do post reg-alloc %e[acd]x liveness analysis (too boring to print
+    * anything;  results can be seen when emitting final code). */
+   VGP_PUSHCC(VgpLiveness);
+   vg_realreg_liveness_analysis ( cb );
+   VGP_POPCC(VgpLiveness);
+
+   /* Emit final code */
+   VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(5);
+
+   VGP_PUSHCC(VgpFromUcode);
    final_code = VG_(emit_code)(cb, &final_code_size );
-   /* VGP_POPCC; */
+   VGP_POPCC(VgpFromUcode);
    VG_(freeCodeBlock)(cb);
 
+#undef DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE
+
    if (debugging_translation) {
       /* Only done for debugging -- throw away final result. */
-      VG_(jitfree)(final_code);
+      VG_(arena_free)(VG_AR_JITTER, final_code);
    } else {
       /* Doing it for real -- return values to caller. */
-      //VG_(printf)("%d %d\n", n_disassembled_bytes, final_code_size);
       *orig_size = n_disassembled_bytes;
       *trans_addr = (Addr)final_code;
       *trans_size = final_code_size;
    }
-   VGP_POPCC;
+   VGP_POPCC(VgpTranslate);
 }
 
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_transtab.c b/coregrind/vg_transtab.c
index a6e15b3..09e8fa2 100644
--- a/coregrind/vg_transtab.c
+++ b/coregrind/vg_transtab.c
@@ -26,11 +26,10 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
 
 /* #define DEBUG_TRANSTAB */
 
@@ -319,14 +318,13 @@
          vg_tt_used, vg_tc_used / 1000
       );
 
-   /* Reconstruct the SMC detection structures. */
 #  ifdef DEBUG_TRANSTAB
    for (i = 0; i < VG_TT_SIZE; i++)
       vg_assert(vg_tt[i].orig_addr != VG_TTE_DELETED);
 #  endif
    VG_(sanity_check_tc_tt)();
 
-   VGP_POPCC;
+   VGP_POPCC(VgpDoLRU);
 }
 
 
@@ -460,7 +458,7 @@
    if (tte == NULL) {
       /* We didn't find it.  vg_run_innerloop will have to request a
          translation. */
-      VGP_POPCC;
+      VGP_POPCC(VgpSlowFindT);
       return (Addr)0;
    } else {
       /* Found it.  Put the search result into the fast cache now.
@@ -469,7 +467,7 @@
       VG_(tt_fast)[cno] = (Addr)tte;
       VG_(tt_fast_misses)++;
       tte->mru_epoch = VG_(current_epoch);
-      VGP_POPCC;
+      VGP_POPCC(VgpSlowFindT);
       return tte->trans_addr;
    }
 }
@@ -498,8 +496,11 @@
       o_end = o_start + vg_tt[i].orig_size - 1;
       if (o_end < i_start || o_start > i_end)
          continue;
-      if (VG_(clo_cachesim))
-         VG_(cachesim_notify_discard)( & vg_tt[i] );
+
+      if (VG_(needs).basic_block_discards)
+         SK_(discard_basic_block_info)( vg_tt[i].orig_addr, 
+                                         vg_tt[i].orig_size );
+
       vg_tt[i].orig_addr = VG_TTE_DELETED;
       VG_(this_epoch_out_count) ++;
       VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
diff --git a/coregrind/vg_unsafe.h b/coregrind/vg_unsafe.h
index 0f72646..0862e0e 100644
--- a/coregrind/vg_unsafe.h
+++ b/coregrind/vg_unsafe.h
@@ -27,7 +27,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 
diff --git a/coregrind/vg_valgrinq_dummy.c b/coregrind/vg_valgrinq_dummy.c
index a0b1441..332085a 100644
--- a/coregrind/vg_valgrinq_dummy.c
+++ b/coregrind/vg_valgrinq_dummy.c
@@ -26,11 +26,11 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 /* For the rationale behind this file, look at
-   VG_(mash_LD_PRELOAD_string) in vg_main.c. */
+   VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH) in vg_main.c. */
 
 /* Remember not to use a variable of this name in any program you want
    to debug :-) */