Files updated, added and removed in order to turn the ERASER branch into HEAD


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1086 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/.cvsignore b/.cvsignore
index 6e4e554..3a1dd73 100644
--- a/.cvsignore
+++ b/.cvsignore
@@ -11,3 +11,8 @@
 vg_annotate
 vg_cachegen
 default.supp
+bin
+lib
+include
+share
+cachegrind.out.*
diff --git a/Makefile.am b/Makefile.am
index 60553dd..96911ed 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,15 +1,17 @@
+
+
 SUBDIRS = demangle . docs tests
 
 CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \
-		-Winline -Wall -Wshadow -O -fomit-frame-pointer -g
+		-Winline -Wall -Wshadow -O -fomit-frame-pointer @PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
 
-LDFLAGS = -Wl,-z -Wl,initfirst
+#LDFLAGS = -Wl,-z -Wl,initfirst
 
 INCLUDES = -I$(srcdir)/demangle
 
-bin_SCRIPTS = valgrind cachegrind vg_annotate
+bin_SCRIPTS = valgrind vg_annotate
 
 SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
 
@@ -26,60 +28,103 @@
 	PATCHES_APPLIED ACKNOWLEDGEMENTS \
 	README_KDE3_FOLKS README_PACKAGERS \
 	README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \
-	valgrind.spec valgrind.spec.in
+	valgrind.spec valgrind.spec.in \
+	vg_profile.c \
+	vg_cachesim_I1.c vg_cachesim_D1.c vg_cachesim_L2.c vg_cachesim_gen.c
 
-val_PROGRAMS = valgrind.so valgrinq.so libpthread.so
+val_PROGRAMS = \
+	valgrind.so \
+	valgrinq.so \
+	libpthread.so \
+	vgskin_memcheck.so \
+	vgskin_cachesim.so \
+	vgskin_eraser.so \
+	vgskin_addrcheck.so \
+	vgskin_none.so \
+	vgskin_lackey.so \
+	vgskin_corecheck.so
 
-libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c
+libpthread_so_SOURCES = \
+	vg_libpthread.c \
+	vg_libpthread_unimp.c
+libpthread_so_DEPENDENCIES = $(srcdir)/vg_libpthread.vs
+libpthread_so_LDFLAGS	   = -Werror -fno-omit-frame-pointer -UVG_LIBDIR -shared -fpic -Wl,-version-script $(srcdir)/vg_libpthread.vs
 
 valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+valgrinq_so_LDFLAGS = -shared
 
 valgrind_so_SOURCES = \
 	vg_clientfuncs.c \
 	vg_scheduler.c \
-        vg_cachesim.c \
 	vg_clientmalloc.c \
-	vg_clientperms.c \
+	vg_default.c \
 	vg_demangle.c \
 	vg_dispatch.S \
 	vg_errcontext.c \
 	vg_execontext.c \
 	vg_from_ucode.c \
 	vg_helpers.S \
+	vg_instrument.c \
 	vg_main.c \
 	vg_malloc2.c \
 	vg_memory.c \
 	vg_messages.c \
 	vg_mylibc.c \
 	vg_procselfmaps.c \
-	vg_profile.c \
+	vg_dummy_profile.c \
 	vg_signals.c \
 	vg_startup.S \
 	vg_symtab2.c \
-	vg_syscall_mem.c \
+	vg_syscalls.c \
 	vg_syscall.S \
 	vg_to_ucode.c \
 	vg_translate.c \
-	vg_transtab.c \
-	vg_vtagops.c
-
+	vg_transtab.c
+valgrind_so_LDFLAGS = -Wl,-z -Wl,initfirst -shared
 valgrind_so_LDADD = \
 	demangle/cp-demangle.o \
 	demangle/cplus-dem.o \
 	demangle/dyn-string.o \
 	demangle/safe-ctype.o
 
+vgskin_memcheck_so_SOURCES = \
+	vg_memcheck.c \
+	vg_memcheck_clientreqs.c \
+	vg_memcheck_errcontext.c \
+	vg_memcheck_from_ucode.c \
+	vg_memcheck_translate.c \
+	vg_memcheck_helpers.S
+vgskin_memcheck_so_LDFLAGS = -shared
+
+vgskin_cachesim_so_SOURCES = vg_cachesim.c
+vgskin_cachesim_so_LDFLAGS = -shared
+
+vgskin_eraser_so_SOURCES = vg_eraser.c
+vgskin_eraser_so_LDFLAGS = -shared
+
+vgskin_addrcheck_so_SOURCES = vg_addrcheck.c
+vgskin_addrcheck_so_LDFLAGS = -shared
+
+vgskin_none_so_SOURCES 	 = vg_none.c
+vgskin_none_so_LDFLAGS   = -shared
+
+vgskin_lackey_so_SOURCES = vg_lackey.c
+vgskin_lackey_so_LDFLAGS = -shared
+
+vgskin_corecheck_so_SOURCES = vg_corecheck.c
+vgskin_corecheck_so_LDFLAGS = -shared
+
 include_HEADERS = valgrind.h
 
 noinst_HEADERS = \
-        vg_cachesim_gen.c       \
-        vg_cachesim_I1.c        \
-        vg_cachesim_D1.c        \
-        vg_cachesim_L2.c        \
         vg_kerneliface.h        \
         vg_include.h            \
+        vg_skin.h               \
         vg_constants.h          \
-        vg_unsafe.h
+        vg_constants_skin.h     \
+        vg_unsafe.h		\
+	vg_memcheck_include.h	\
+	vg_memcheck.h
 
 MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) 
 
@@ -92,19 +137,40 @@
 vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS)
 	$(COMPILE) -fno-omit-frame-pointer -c $<
 
-valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
-	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
-		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+##valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+##		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
 
-valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
-	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
+##valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
+##	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
 
-libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
-	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
-		$(libpthread_so_OBJECTS) \
-		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+##libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
+##	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
+##		$(libpthread_so_OBJECTS) \
+##		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+
+##vgskin_memcheck.so$(EXEEXT): $(vgskin_memcheck_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_memcheck.so \
+##		$(vgskin_memcheck_so_OBJECTS)
+
+##vgskin_cachesim.so$(EXEEXT): $(vgskin_cachesim_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_cachesim.so \
+##		$(vgskin_cachesim_so_OBJECTS)
+
+##vgskin_eraser.so$(EXEEXT): $(vgskin_eraser_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_eraser.so \
+##		$(vgskin_eraser_so_OBJECTS)
+
+##vgskin_none.so$(EXEEXT): $(vgskin_none_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_none.so \
+##		$(vgskin_none_so_OBJECTS)
+
+##vgskin_lackey.so$(EXEEXT): $(vgskin_lackey_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_lackey.so \
+##		$(vgskin_lackey_so_OBJECTS)
 
 install-exec-hook:
 	$(mkinstalldirs) $(DESTDIR)$(valdir)
 	rm -f $(DESTDIR)$(valdir)/libpthread.so.0
 	$(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0
+
diff --git a/README b/README
index f2dd536..d065b4a 100644
--- a/README
+++ b/README
@@ -51,7 +51,7 @@
 least.
 
 Valgrind is licensed under the GNU General Public License, version 2. 
-Read the file LICENSE in the source distribution for details.
+Read the file COPYING in the source distribution for details.
 
 
 Documentation
diff --git a/addrcheck/Makefile.am b/addrcheck/Makefile.am
index 60553dd..96911ed 100644
--- a/addrcheck/Makefile.am
+++ b/addrcheck/Makefile.am
@@ -1,15 +1,17 @@
+
+
 SUBDIRS = demangle . docs tests
 
 CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \
-		-Winline -Wall -Wshadow -O -fomit-frame-pointer -g
+		-Winline -Wall -Wshadow -O -fomit-frame-pointer @PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
 
-LDFLAGS = -Wl,-z -Wl,initfirst
+#LDFLAGS = -Wl,-z -Wl,initfirst
 
 INCLUDES = -I$(srcdir)/demangle
 
-bin_SCRIPTS = valgrind cachegrind vg_annotate
+bin_SCRIPTS = valgrind vg_annotate
 
 SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
 
@@ -26,60 +28,103 @@
 	PATCHES_APPLIED ACKNOWLEDGEMENTS \
 	README_KDE3_FOLKS README_PACKAGERS \
 	README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \
-	valgrind.spec valgrind.spec.in
+	valgrind.spec valgrind.spec.in \
+	vg_profile.c \
+	vg_cachesim_I1.c vg_cachesim_D1.c vg_cachesim_L2.c vg_cachesim_gen.c
 
-val_PROGRAMS = valgrind.so valgrinq.so libpthread.so
+val_PROGRAMS = \
+	valgrind.so \
+	valgrinq.so \
+	libpthread.so \
+	vgskin_memcheck.so \
+	vgskin_cachesim.so \
+	vgskin_eraser.so \
+	vgskin_addrcheck.so \
+	vgskin_none.so \
+	vgskin_lackey.so \
+	vgskin_corecheck.so
 
-libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c
+libpthread_so_SOURCES = \
+	vg_libpthread.c \
+	vg_libpthread_unimp.c
+libpthread_so_DEPENDENCIES = $(srcdir)/vg_libpthread.vs
+libpthread_so_LDFLAGS	   = -Werror -fno-omit-frame-pointer -UVG_LIBDIR -shared -fpic -Wl,-version-script $(srcdir)/vg_libpthread.vs
 
 valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+valgrinq_so_LDFLAGS = -shared
 
 valgrind_so_SOURCES = \
 	vg_clientfuncs.c \
 	vg_scheduler.c \
-        vg_cachesim.c \
 	vg_clientmalloc.c \
-	vg_clientperms.c \
+	vg_default.c \
 	vg_demangle.c \
 	vg_dispatch.S \
 	vg_errcontext.c \
 	vg_execontext.c \
 	vg_from_ucode.c \
 	vg_helpers.S \
+	vg_instrument.c \
 	vg_main.c \
 	vg_malloc2.c \
 	vg_memory.c \
 	vg_messages.c \
 	vg_mylibc.c \
 	vg_procselfmaps.c \
-	vg_profile.c \
+	vg_dummy_profile.c \
 	vg_signals.c \
 	vg_startup.S \
 	vg_symtab2.c \
-	vg_syscall_mem.c \
+	vg_syscalls.c \
 	vg_syscall.S \
 	vg_to_ucode.c \
 	vg_translate.c \
-	vg_transtab.c \
-	vg_vtagops.c
-
+	vg_transtab.c
+valgrind_so_LDFLAGS = -Wl,-z -Wl,initfirst -shared
 valgrind_so_LDADD = \
 	demangle/cp-demangle.o \
 	demangle/cplus-dem.o \
 	demangle/dyn-string.o \
 	demangle/safe-ctype.o
 
+vgskin_memcheck_so_SOURCES = \
+	vg_memcheck.c \
+	vg_memcheck_clientreqs.c \
+	vg_memcheck_errcontext.c \
+	vg_memcheck_from_ucode.c \
+	vg_memcheck_translate.c \
+	vg_memcheck_helpers.S
+vgskin_memcheck_so_LDFLAGS = -shared
+
+vgskin_cachesim_so_SOURCES = vg_cachesim.c
+vgskin_cachesim_so_LDFLAGS = -shared
+
+vgskin_eraser_so_SOURCES = vg_eraser.c
+vgskin_eraser_so_LDFLAGS = -shared
+
+vgskin_addrcheck_so_SOURCES = vg_addrcheck.c
+vgskin_addrcheck_so_LDFLAGS = -shared
+
+vgskin_none_so_SOURCES 	 = vg_none.c
+vgskin_none_so_LDFLAGS   = -shared
+
+vgskin_lackey_so_SOURCES = vg_lackey.c
+vgskin_lackey_so_LDFLAGS = -shared
+
+vgskin_corecheck_so_SOURCES = vg_corecheck.c
+vgskin_corecheck_so_LDFLAGS = -shared
+
 include_HEADERS = valgrind.h
 
 noinst_HEADERS = \
-        vg_cachesim_gen.c       \
-        vg_cachesim_I1.c        \
-        vg_cachesim_D1.c        \
-        vg_cachesim_L2.c        \
         vg_kerneliface.h        \
         vg_include.h            \
+        vg_skin.h               \
         vg_constants.h          \
-        vg_unsafe.h
+        vg_constants_skin.h     \
+        vg_unsafe.h		\
+	vg_memcheck_include.h	\
+	vg_memcheck.h
 
 MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) 
 
@@ -92,19 +137,40 @@
 vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS)
 	$(COMPILE) -fno-omit-frame-pointer -c $<
 
-valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
-	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
-		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+##valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+##		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
 
-valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
-	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
+##valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
+##	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
 
-libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
-	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
-		$(libpthread_so_OBJECTS) \
-		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+##libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
+##	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
+##		$(libpthread_so_OBJECTS) \
+##		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+
+##vgskin_memcheck.so$(EXEEXT): $(vgskin_memcheck_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_memcheck.so \
+##		$(vgskin_memcheck_so_OBJECTS)
+
+##vgskin_cachesim.so$(EXEEXT): $(vgskin_cachesim_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_cachesim.so \
+##		$(vgskin_cachesim_so_OBJECTS)
+
+##vgskin_eraser.so$(EXEEXT): $(vgskin_eraser_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_eraser.so \
+##		$(vgskin_eraser_so_OBJECTS)
+
+##vgskin_none.so$(EXEEXT): $(vgskin_none_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_none.so \
+##		$(vgskin_none_so_OBJECTS)
+
+##vgskin_lackey.so$(EXEEXT): $(vgskin_lackey_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_lackey.so \
+##		$(vgskin_lackey_so_OBJECTS)
 
 install-exec-hook:
 	$(mkinstalldirs) $(DESTDIR)$(valdir)
 	rm -f $(DESTDIR)$(valdir)/libpthread.so.0
 	$(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0
+
diff --git a/addrcheck/ac_include.h b/addrcheck/ac_include.h
new file mode 100644
index 0000000..ef6b147
--- /dev/null
+++ b/addrcheck/ac_include.h
@@ -0,0 +1,120 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A header file for the AddrCheck skin.                        ---*/
+/*---                                       vg_addrcheck_include.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VG_ADDRCHECK_INCLUDE_H
+#define __VG_ADDRCHECK_INCLUDE_H
+
+#include "vg_skin.h"
+
+
+/* The classification of a faulting address. */
+typedef 
+   enum { Undescribed, /* as-yet unclassified */
+          Stack, 
+          Unknown, /* classification yielded nothing useful */
+          Freed, Mallocd
+   }
+   AcAddrKind;
+
+/* Records info about a faulting address. */
+typedef
+   struct {
+      /* ALL */
+      AcAddrKind akind;
+      /* Freed, Mallocd */
+      Int blksize;
+      /* Freed, Mallocd */
+      Int rwoffset;
+      /* Freed, Mallocd */
+      ExeContext* lastchange;
+      /* Stack */
+      ThreadId stack_tid;
+      /* True if is just-below %esp -- could be a gcc bug. */
+      Bool maybe_gcc;
+   }
+   AcAddrInfo;
+
+
+/*------------------------------------------------------------*/
+/*--- Skin-specific command line options + defaults        ---*/
+/*------------------------------------------------------------*/
+
+/* Allow loads from partially-valid addresses?  default: YES */
+extern Bool SK_(clo_partial_loads_ok);
+
+/* Max volume of the freed blocks queue. */
+extern Int SK_(clo_freelist_vol);
+
+/* Do leak check at exit?  default: NO */
+extern Bool SK_(clo_leak_check);
+
+/* How closely should we compare ExeContexts in leak records? default: 2 */
+extern VgRes SK_(clo_leak_resolution);
+
+/* In leak check, show reachable-but-not-freed blocks?  default: NO */
+extern Bool SK_(clo_show_reachable);
+
+/* Assume accesses immediately below %esp are due to gcc-2.96 bugs.
+ * default: NO*/
+extern Bool SK_(clo_workaround_gcc296_bugs);
+
+
+/*------------------------------------------------------------*/
+/*--- Functions                                            ---*/
+/*------------------------------------------------------------*/
+
+// SSS: work out a consistent prefix convention here
+
+/* Functions defined in vg_addrcheck.c */
+extern void SK_(helperc_ACCESS4) ( Addr );
+extern void SK_(helperc_ACCESS2) ( Addr );
+extern void SK_(helperc_ACCESS1) ( Addr );
+   
+extern void SK_(fpu_ACCESS_check) ( Addr addr, Int size );
+
+extern ShadowChunk* SK_(any_matching_freed_ShadowChunks) 
+                        ( Bool (*p) ( ShadowChunk* ) );
+
+/* For client requests */
+extern void SK_(make_noaccess) ( Addr a, UInt len );
+extern void SK_(make_accessible) ( Addr a, UInt len );
+
+extern Bool SK_(check_accessible) ( Addr a, UInt len, Addr* bad_addr );
+
+extern void SK_(detect_memory_leaks) ( void );
+
+
+#endif
+
+/*--------------------------------------------------------------------*/
+/*--- end                                   vg_addrcheck_include.h ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/addrcheck/ac_main.c b/addrcheck/ac_main.c
new file mode 100644
index 0000000..a8d9075
--- /dev/null
+++ b/addrcheck/ac_main.c
@@ -0,0 +1,2587 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The AddrCheck skin: like MemCheck, but only does address     ---*/
+/*--- checking.  No definedness checking.                          ---*/
+/*---                                               vg_addrcheck.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_addrcheck_include.h"
+//#include "vg_profile.c"
+
+/*------------------------------------------------------------*/
+/*--- Defns                                                ---*/
+/*------------------------------------------------------------*/
+
+/* These many bytes below %ESP are considered addressible if we're
+   doing the --workaround-gcc296-bugs hack. */
+#define VG_GCC296_BUG_STACK_SLOP 1024
+
+
+typedef 
+   enum { 
+      /* Bad syscall params */
+      ParamSupp,
+      /* Memory errors in core (pthread ops, signal handling) */
+      CoreMemSupp,
+      /* Invalid read/write attempt at given size */
+      Addr1Supp, Addr2Supp, Addr4Supp, Addr8Supp,
+      /* Invalid or mismatching free */
+      FreeSupp
+   } 
+   AddrCheckSuppKind;
+
+/* What kind of error it is. */
+typedef 
+   enum { CoreMemErr,
+          AddrErr, 
+          ParamErr, UserErr,  /* behaves like an anonymous ParamErr */
+          FreeErr, FreeMismatchErr
+   }
+   AddrCheckErrorKind;
+
+/* What kind of memory access is involved in the error? */
+typedef
+   enum { ReadAxs, WriteAxs, ExecAxs }
+   AxsKind;
+
+/* Extra context for memory errors */
+typedef
+   struct {
+      /* AddrErr */
+      AxsKind axskind;
+      /* AddrErr */
+      Int size;
+      /* AddrErr, FreeErr, FreeMismatchErr, ParamErr, UserErr */
+      AcAddrInfo addrinfo;
+      /* ParamErr, UserErr, CoreMemErr */
+      Bool isWrite;
+   }
+   AddrCheckError;
+
+/*------------------------------------------------------------*/
+/*--- Comparing and printing errors                        ---*/
+/*------------------------------------------------------------*/
+
+static __inline__
+void clear_AcAddrInfo ( AcAddrInfo* ai )
+{
+   ai->akind      = Unknown;
+   ai->blksize    = 0;
+   ai->rwoffset   = 0;
+   ai->lastchange = NULL;
+   ai->stack_tid  = VG_INVALID_THREADID;
+   ai->maybe_gcc  = False;
+}
+
+static __inline__
+void clear_AddrCheckError ( AddrCheckError* err_extra )
+{
+   err_extra->axskind   = ReadAxs;
+   err_extra->size      = 0;
+   clear_AcAddrInfo ( &err_extra->addrinfo );
+   err_extra->isWrite   = False;
+}
+
+__attribute__((unused))
+static Bool eq_AcAddrInfo ( VgRes res, AcAddrInfo* ai1, AcAddrInfo* ai2 )
+{
+   if (ai1->akind != Undescribed 
+       && ai2->akind != Undescribed
+       && ai1->akind != ai2->akind) 
+      return False;
+   if (ai1->akind == Freed || ai1->akind == Mallocd) {
+      if (ai1->blksize != ai2->blksize)
+         return False;
+      if (!VG_(eq_ExeContext)(res, ai1->lastchange, ai2->lastchange))
+         return False;
+   }
+   return True;
+}
+
+/* Compare error contexts, to detect duplicates.  Note that if they
+   are otherwise the same, the faulting addrs and associated rwoffsets
+   are allowed to be different.  */
+
+Bool SK_(eq_SkinError) ( VgRes res,
+                         SkinError* e1, SkinError* e2 )
+{
+   AddrCheckError* e1_extra = e1->extra;
+   AddrCheckError* e2_extra = e2->extra;
+   
+   switch (e1->ekind) {
+      case CoreMemErr:
+         if (e1_extra->isWrite != e2_extra->isWrite)   return False;
+         if (e2->ekind != CoreMemErr)                  return False; 
+         if (e1->string == e2->string)                 return True;
+         if (0 == VG_(strcmp)(e1->string, e2->string)) return True;
+         return False;
+
+      case UserErr:
+      case ParamErr:
+         if (e1_extra->isWrite != e2_extra->isWrite)
+            return False;
+         if (e1->ekind == ParamErr 
+             && 0 != VG_(strcmp)(e1->string, e2->string))
+            return False;
+         return True;
+
+      case FreeErr:
+      case FreeMismatchErr:
+         /* JRS 2002-Aug-26: comparing addrs seems overkill and can
+            cause excessive duplication of errors.  Not even AddrErr
+            below does that.  So don't compare either the .addr field
+            or the .addrinfo fields. */
+         /* if (e1->addr != e2->addr) return False; */
+         /* if (!eq_AcAddrInfo(res, &e1_extra->addrinfo, &e2_extra->addrinfo)) 
+               return False;
+         */
+         return True;
+
+      case AddrErr:
+         /* if (e1_extra->axskind != e2_extra->axskind) return False; */
+         if (e1_extra->size != e2_extra->size) return False;
+         /*
+         if (!eq_AcAddrInfo(res, &e1_extra->addrinfo, &e2_extra->addrinfo)) 
+            return False;
+         */
+         return True;
+
+      default: 
+         VG_(printf)("Error:\n  unknown AddrCheck error code %d\n", e1->ekind);
+         VG_(panic)("unknown error code in SK_(eq_SkinError)");
+   }
+}
+
+static void pp_AcAddrInfo ( Addr a, AcAddrInfo* ai )
+{
+   switch (ai->akind) {
+      case Stack: 
+         VG_(message)(Vg_UserMsg, 
+                      "   Address 0x%x is on thread %d's stack", 
+                      a, ai->stack_tid);
+         break;
+      case Unknown:
+         if (ai->maybe_gcc) {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is just below %%esp.  Possibly a bug in GCC/G++",
+               a);
+            VG_(message)(Vg_UserMsg, 
+               "   v 2.96 or 3.0.X.  To suppress, use: --workaround-gcc296-bugs=yes");
+	 } else {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is not stack'd, malloc'd or free'd", a);
+         }
+         break;
+      case Freed: case Mallocd: {
+         UInt delta;
+         UChar* relative;
+         if (ai->rwoffset < 0) {
+            delta    = (UInt)(- ai->rwoffset);
+            relative = "before";
+         } else if (ai->rwoffset >= ai->blksize) {
+            delta    = ai->rwoffset - ai->blksize;
+            relative = "after";
+         } else {
+            delta    = ai->rwoffset;
+            relative = "inside";
+         }
+         {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is %d bytes %s a block of size %d %s",
+               a, delta, relative, 
+               ai->blksize,
+               ai->akind==Mallocd ? "alloc'd" 
+                  : ai->akind==Freed ? "free'd" 
+                                     : "client-defined");
+         }
+         VG_(pp_ExeContext)(ai->lastchange);
+         break;
+      }
+      default:
+         VG_(panic)("pp_AcAddrInfo");
+   }
+}
+
+void SK_(pp_SkinError) ( SkinError* err, void (*pp_ExeContext)(void) )
+{
+   AddrCheckError* err_extra = err->extra;
+
+   switch (err->ekind) {
+      case CoreMemErr:
+         if (err_extra->isWrite) {
+            VG_(message)(Vg_UserMsg, 
+               "%s contains unaddressable byte(s)", err->string );
+         } else {
+            VG_(message)(Vg_UserMsg, 
+               "%s contains unaddressable byte(s)", err->string );
+         }
+         pp_ExeContext();
+         break;
+      
+      case AddrErr:
+         switch (err_extra->axskind) {
+            case ReadAxs:
+            case WriteAxs:
+               /* These two aren't actually differentiated ever. */
+               VG_(message)(Vg_UserMsg, "Invalid memory access of size %d", 
+                                        err_extra->size ); 
+               break;
+            case ExecAxs:
+               VG_(message)(Vg_UserMsg, "Jump to the invalid address "
+                                        "stated on the next line");
+               break;
+            default: 
+               VG_(panic)("pp_SkinError(axskind)");
+         }
+         pp_ExeContext();
+         pp_AcAddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      case FreeErr:
+         VG_(message)(Vg_UserMsg,"Invalid free() / delete / delete[]");
+         /* fall through */
+      case FreeMismatchErr:
+         if (err->ekind == FreeMismatchErr)
+            VG_(message)(Vg_UserMsg, 
+                         "Mismatched free() / delete / delete []");
+         pp_ExeContext();
+         pp_AcAddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      case ParamErr:
+         if (err_extra->isWrite) {
+            VG_(message)(Vg_UserMsg, 
+               "Syscall param %s contains unaddressable byte(s)",
+                err->string );
+         } else {
+            VG_(message)(Vg_UserMsg, 
+                "Syscall param %s contains uninitialised or "
+                "unaddressable byte(s)",
+            err->string);
+         }
+         pp_ExeContext();
+         pp_AcAddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      case UserErr:
+         if (err_extra->isWrite) {
+            VG_(message)(Vg_UserMsg, 
+               "Unaddressable byte(s) found during client check request");
+         } else {
+            VG_(message)(Vg_UserMsg, 
+               "Uninitialised or "
+               "unaddressable byte(s) found during client check request");
+         }
+         pp_ExeContext();
+         pp_AcAddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      default: 
+         VG_(printf)("Error:\n  unknown AddrCheck error code %d\n", err->ekind);
+         VG_(panic)("unknown error code in SK_(pp_SkinError)");
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Recording errors                                     ---*/
+/*------------------------------------------------------------*/
+
+/* Describe an address as best you can, for error messages,
+   putting the result in ai. */
+
+static void describe_addr ( Addr a, AcAddrInfo* ai )
+{
+   ShadowChunk* sc;
+   ThreadId     tid;
+
+   /* Nested functions, yeah.  Need the lexical scoping of 'a'. */ 
+
+   /* Closure for searching thread stacks */
+   Bool addr_is_in_bounds(Addr stack_min, Addr stack_max)
+   {
+      return (stack_min <= a && a <= stack_max);
+   }
+   /* Closure for searching malloc'd and free'd lists */
+   Bool addr_is_in_block(ShadowChunk *sh_ch)
+   {
+      return VG_(addr_is_in_block) ( a, sh_ch->data, sh_ch->size );
+   }
+   /* Perhaps it's on a thread's stack? */
+   tid = VG_(any_matching_thread_stack)(addr_is_in_bounds);
+   if (tid != VG_INVALID_THREADID) {
+      ai->akind     = Stack;
+      ai->stack_tid = tid;
+      return;
+   }
+   /* Search for a recently freed block which might bracket it. */
+   sc = SK_(any_matching_freed_ShadowChunks)(addr_is_in_block);
+   if (NULL != sc) {
+      ai->akind      = Freed;
+      ai->blksize    = sc->size;
+      ai->rwoffset   = (Int)(a) - (Int)(sc->data);
+      ai->lastchange = (ExeContext*)sc->skin_extra[0];
+      return;
+   }
+   /* Search for a currently malloc'd block which might bracket it. */
+   sc = VG_(any_matching_mallocd_ShadowChunks)(addr_is_in_block);
+   if (NULL != sc) {
+      ai->akind      = Mallocd;
+      ai->blksize    = sc->size;
+      ai->rwoffset   = (Int)(a) - (Int)(sc->data);
+      ai->lastchange = (ExeContext*)sc->skin_extra[0];
+      return;
+   } 
+   /* Clueless ... */
+   ai->akind = Unknown;
+   return;
+}
+
+
+/* Creates a copy of the err_extra, updates the copy with address info if
+   necessary, sticks the copy into the SkinError. */
+void SK_(dup_extra_and_update)(SkinError* err)
+{
+   AddrCheckError* err_extra;
+
+   err_extra  = VG_(malloc)(sizeof(AddrCheckError));
+   *err_extra = *((AddrCheckError*)err->extra);
+
+   if (err_extra->addrinfo.akind == Undescribed)
+      describe_addr ( err->addr, &(err_extra->addrinfo) );
+
+   err->extra = err_extra;
+}
+
+/* Is this address within some small distance below %ESP?  Used only
+   for the --workaround-gcc296-bugs kludge. */
+Bool VG_(is_just_below_ESP)( Addr esp, Addr aa )
+{
+   if ((UInt)esp > (UInt)aa
+       && ((UInt)esp - (UInt)aa) <= VG_GCC296_BUG_STACK_SLOP)
+      return True;
+   else
+      return False;
+}
+
+static
+void sk_record_address_error ( Addr a, Int size, Bool isWrite )
+{
+   AddrCheckError err_extra;
+   Bool           just_below_esp;
+
+   just_below_esp 
+      = VG_(is_just_below_ESP)( VG_(get_stack_pointer)(), a );
+
+   /* If this is caused by an access immediately below %ESP, and the
+      user asks nicely, we just ignore it. */
+   if (SK_(clo_workaround_gcc296_bugs) && just_below_esp)
+      return;
+
+   clear_AddrCheckError( &err_extra );
+   err_extra.axskind = isWrite ? WriteAxs : ReadAxs;
+   err_extra.size    = size;
+   err_extra.addrinfo.akind     = Undescribed;
+   err_extra.addrinfo.maybe_gcc = just_below_esp;
+   VG_(maybe_record_error)( NULL, AddrErr, a, /*s*/NULL, &err_extra );
+}
+
+/* These ones are called from non-generated code */
+
+/* This is for memory errors in pthread functions, as opposed to pthread API
+   errors which are found by the core. */
+void SK_(record_core_mem_error) ( ThreadState* tst, Bool isWrite, Char* msg )
+{
+   AddrCheckError err_extra;
+
+   clear_AddrCheckError( &err_extra );
+   err_extra.isWrite = isWrite;
+   VG_(maybe_record_error)( tst, CoreMemErr, /*addr*/0, msg, &err_extra );
+}
+
+void SK_(record_param_error) ( ThreadState* tst, Addr a, Bool isWrite, 
+                               Char* msg )
+{
+   AddrCheckError err_extra;
+
+   vg_assert(NULL != tst);
+   clear_AddrCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   err_extra.isWrite = isWrite;
+   VG_(maybe_record_error)( tst, ParamErr, a, msg, &err_extra );
+}
+
+void SK_(record_jump_error) ( ThreadState* tst, Addr a )
+{
+   AddrCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_AddrCheckError( &err_extra );
+   err_extra.axskind = ExecAxs;
+   err_extra.addrinfo.akind = Undescribed;
+   VG_(maybe_record_error)( tst, AddrErr, a, /*s*/NULL, &err_extra );
+}
+
+void SK_(record_free_error) ( ThreadState* tst, Addr a ) 
+{
+   AddrCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_AddrCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   VG_(maybe_record_error)( tst, FreeErr, a, /*s*/NULL, &err_extra );
+}
+
+void SK_(record_freemismatch_error) ( ThreadState* tst, Addr a )
+{
+   AddrCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_AddrCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   VG_(maybe_record_error)( tst, FreeMismatchErr, a, /*s*/NULL, &err_extra );
+}
+
+void SK_(record_user_error) ( ThreadState* tst, Addr a, Bool isWrite )
+{
+   AddrCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_AddrCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   err_extra.isWrite        = isWrite;
+   VG_(maybe_record_error)( tst, UserErr, a, /*s*/NULL, &err_extra );
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Suppressions                                         ---*/
+/*------------------------------------------------------------*/
+
+#define STREQ(s1,s2) (s1 != NULL && s2 != NULL \
+                      && VG_(strcmp)((s1),(s2))==0)
+
+Bool SK_(recognised_suppression) ( Char* name, SuppKind *skind )
+{
+   if      (STREQ(name, "Param"))   *skind = ParamSupp;
+   else if (STREQ(name, "CoreMem")) *skind = CoreMemSupp;
+   else if (STREQ(name, "Addr1"))   *skind = Addr1Supp;
+   else if (STREQ(name, "Addr2"))   *skind = Addr2Supp;
+   else if (STREQ(name, "Addr4"))   *skind = Addr4Supp;
+   else if (STREQ(name, "Addr8"))   *skind = Addr8Supp;
+   else if (STREQ(name, "Free"))    *skind = FreeSupp;
+   else 
+      return False;
+
+   return True;
+}
+
+Bool SK_(read_extra_suppression_info) ( Int fd, Char* buf, Int nBuf, 
+                                         SkinSupp *s )
+{
+   Bool eof;
+
+   if (s->skind == ParamSupp) {
+      eof = VG_(getLine) ( fd, buf, nBuf );
+      if (eof) return False;
+      s->string = VG_(strdup)(buf);
+   }
+   return True;
+}
+
+extern Bool SK_(error_matches_suppression)(SkinError* err, SkinSupp* su)
+{
+   UInt su_size;
+   AddrCheckError* err_extra = err->extra;
+
+   switch (su->skind) {
+      case ParamSupp:
+         return (err->ekind == ParamErr && STREQ(su->string, err->string));
+
+      case CoreMemSupp:
+         return (err->ekind == CoreMemErr && STREQ(su->string, err->string));
+
+      case Addr1Supp: su_size = 1; goto addr_case;
+      case Addr2Supp: su_size = 2; goto addr_case;
+      case Addr4Supp: su_size = 4; goto addr_case;
+      case Addr8Supp: su_size = 8; goto addr_case;
+      addr_case:
+         return (err->ekind == AddrErr && err_extra->size != su_size);
+
+      case FreeSupp:
+         return (err->ekind == FreeErr || err->ekind == FreeMismatchErr);
+
+      default:
+         VG_(printf)("Error:\n"
+                     "  unknown AddrCheck suppression type %d\n", su->skind);
+         VG_(panic)("unknown suppression type in "
+                    "SK_(error_matches_suppression)");
+   }
+}
+
+#  undef STREQ
+
+
+/*--------------------------------------------------------------------*/
+/*--- Part of the AddrCheck skin: Maintain bitmaps of memory,      ---*/
+/*--- tracking the accessibility (A) each byte.                    ---*/
+/*--------------------------------------------------------------------*/
+
+#define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
+
+/*------------------------------------------------------------*/
+/*--- Command line options                                 ---*/
+/*------------------------------------------------------------*/
+
+Bool  SK_(clo_partial_loads_ok)       = True;
+Int   SK_(clo_freelist_vol)           = 1000000;
+Bool  SK_(clo_leak_check)             = False;
+VgRes SK_(clo_leak_resolution)        = Vg_LowRes;
+Bool  SK_(clo_show_reachable)         = False;
+Bool  SK_(clo_workaround_gcc296_bugs) = False;
+Bool  SK_(clo_cleanup)                = True;
+
+/*------------------------------------------------------------*/
+/*--- Profiling events                                     ---*/
+/*------------------------------------------------------------*/
+
+typedef 
+   enum { 
+      VgpCheckMem = VgpFini+1,
+      VgpSetMem
+   } 
+   VgpSkinCC;
+
+/*------------------------------------------------------------*/
+/*--- Low-level support for memory checking.               ---*/
+/*------------------------------------------------------------*/
+
+/* All reads and writes are checked against a memory map, which
+   records the state of all memory in the process.  The memory map is
+   organised like this:
+
+   The top 16 bits of an address are used to index into a top-level
+   map table, containing 65536 entries.  Each entry is a pointer to a
+   second-level map, which records the accesibililty and validity
+   permissions for the 65536 bytes indexed by the lower 16 bits of the
+   address.  Each byte is represented by one bit, indicating
+   accessibility.  So each second-level map contains 8192 bytes.  This
+   two-level arrangement conveniently divides the 4G address space
+   into 64k lumps, each size 64k bytes.
+
+   All entries in the primary (top-level) map must point to a valid
+   secondary (second-level) map.  Since most of the 4G of address
+   space will not be in use -- ie, not mapped at all -- there is a
+   distinguished secondary map, which indicates `not addressible and
+   not valid' writeable for all bytes.  Entries in the primary map for
+   which the entire 64k is not in use at all point at this
+   distinguished map.
+
+   [...] lots of stuff deleted due to out of date-ness
+
+   As a final optimisation, the alignment and address checks for
+   4-byte loads and stores are combined in a neat way.  The primary
+   map is extended to have 262144 entries (2^18), rather than 2^16.
+   The top 3/4 of these entries are permanently set to the
+   distinguished secondary map.  For a 4-byte load/store, the
+   top-level map is indexed not with (addr >> 16) but instead f(addr),
+   where
+
+    f( XXXX XXXX XXXX XXXX ____ ____ ____ __YZ )
+        = ____ ____ ____ __YZ XXXX XXXX XXXX XXXX  or 
+        = ____ ____ ____ __ZY XXXX XXXX XXXX XXXX
+
+   ie the lowest two bits are placed above the 16 high address bits.
+   If either of these two bits are nonzero, the address is misaligned;
+   this will select a secondary map from the upper 3/4 of the primary
+   map.  Because this is always the distinguished secondary map, a
+   (bogus) address check failure will result.  The failure handling
+   code can then figure out whether this is a genuine addr check
+   failure or whether it is a possibly-legitimate access at a
+   misaligned address.  */
+
+
+/*------------------------------------------------------------*/
+/*--- Crude profiling machinery.                           ---*/
+/*------------------------------------------------------------*/
+
+#ifdef VG_PROFILE_MEMORY
+
+#define N_PROF_EVENTS 150
+
+static UInt event_ctr[N_PROF_EVENTS];
+
+static void init_prof_mem ( void )
+{
+   Int i;
+   for (i = 0; i < N_PROF_EVENTS; i++)
+      event_ctr[i] = 0;
+}
+
+static void done_prof_mem ( void )
+{
+   Int i;
+   for (i = 0; i < N_PROF_EVENTS; i++) {
+      if ((i % 10) == 0) 
+         VG_(printf)("\n");
+      if (event_ctr[i] > 0)
+         VG_(printf)( "prof mem event %2d: %d\n", i, event_ctr[i] );
+   }
+   VG_(printf)("\n");
+}
+
+#define PROF_EVENT(ev)                                  \
+   do { vg_assert((ev) >= 0 && (ev) < N_PROF_EVENTS);   \
+        event_ctr[ev]++;                                \
+   } while (False);
+
+#else
+
+static void init_prof_mem ( void ) { }
+static void done_prof_mem ( void ) { }
+
+#define PROF_EVENT(ev) /* */
+
+#endif
+
+/* Event index.  If just the name of the fn is given, this means the
+   number of calls to the fn.  Otherwise it is the specified event.
+
+   10   alloc_secondary_map
+
+   20   get_abit
+   21   get_vbyte
+   22   set_abit
+   23   set_vbyte
+   24   get_abits4_ALIGNED
+   25   get_vbytes4_ALIGNED
+
+   30   set_address_range_perms
+   31   set_address_range_perms(lower byte loop)
+   32   set_address_range_perms(quadword loop)
+   33   set_address_range_perms(upper byte loop)
+   
+   35   make_noaccess
+   36   make_writable
+   37   make_readable
+
+   40   copy_address_range_state
+   41   copy_address_range_state(byte loop)
+   42   check_writable
+   43   check_writable(byte loop)
+   44   check_readable
+   45   check_readable(byte loop)
+   46   check_readable_asciiz
+   47   check_readable_asciiz(byte loop)
+
+   50   make_aligned_word_NOACCESS
+   51   make_aligned_word_WRITABLE
+
+   60   helperc_LOADV4
+   61   helperc_STOREV4
+   62   helperc_LOADV2
+   63   helperc_STOREV2
+   64   helperc_LOADV1
+   65   helperc_STOREV1
+
+   70   rim_rd_V4_SLOWLY
+   71   rim_wr_V4_SLOWLY
+   72   rim_rd_V2_SLOWLY
+   73   rim_wr_V2_SLOWLY
+   74   rim_rd_V1_SLOWLY
+   75   rim_wr_V1_SLOWLY
+
+   80   fpu_read
+   81   fpu_read aligned 4
+   82   fpu_read aligned 8
+   83   fpu_read 2
+   84   fpu_read 10
+
+   85   fpu_write
+   86   fpu_write aligned 4
+   87   fpu_write aligned 8
+   88   fpu_write 2
+   89   fpu_write 10
+
+   90   fpu_read_check_SLOWLY
+   91   fpu_read_check_SLOWLY(byte loop)
+   92   fpu_write_check_SLOWLY
+   93   fpu_write_check_SLOWLY(byte loop)
+
+   100  is_plausible_stack_addr
+   101  handle_esp_assignment
+   102  handle_esp_assignment(-4)
+   103  handle_esp_assignment(+4)
+   104  handle_esp_assignment(-12)
+   105  handle_esp_assignment(-8)
+   106  handle_esp_assignment(+16)
+   107  handle_esp_assignment(+12)
+   108  handle_esp_assignment(0)
+   109  handle_esp_assignment(+8)
+   110  handle_esp_assignment(-16)
+   111  handle_esp_assignment(+20)
+   112  handle_esp_assignment(-20)
+   113  handle_esp_assignment(+24)
+   114  handle_esp_assignment(-24)
+
+   120  vg_handle_esp_assignment_SLOWLY
+   121  vg_handle_esp_assignment_SLOWLY(normal; move down)
+   122  vg_handle_esp_assignment_SLOWLY(normal; move up)
+   123  vg_handle_esp_assignment_SLOWLY(normal)
+   124  vg_handle_esp_assignment_SLOWLY(>= HUGE_DELTA)
+*/
+
+/*------------------------------------------------------------*/
+/*--- Function declarations.                               ---*/
+/*------------------------------------------------------------*/
+
+static void vgmext_ACCESS4_SLOWLY ( Addr a );
+static void vgmext_ACCESS2_SLOWLY ( Addr a );
+static void vgmext_ACCESS1_SLOWLY ( Addr a );
+static void fpu_ACCESS_check_SLOWLY ( Addr addr, Int size );
+
+/*------------------------------------------------------------*/
+/*--- Data defns.                                          ---*/
+/*------------------------------------------------------------*/
+
+typedef 
+   struct {
+      UChar abits[8192];
+   }
+   AcSecMap;
+
+static AcSecMap* primary_map[ /*65536*/ 262144 ];
+static AcSecMap  distinguished_secondary_map;
+
+#define IS_DISTINGUISHED_SM(smap) \
+   ((smap) == &distinguished_secondary_map)
+
+#define ENSURE_MAPPABLE(addr,caller)                                   \
+   do {                                                                \
+      if (IS_DISTINGUISHED_SM(primary_map[(addr) >> 16])) {       \
+         primary_map[(addr) >> 16] = alloc_secondary_map(caller); \
+         /* VG_(printf)("new 2map because of %p\n", addr); */          \
+      }                                                                \
+   } while(0)
+
+#define BITARR_SET(aaa_p,iii_p)                         \
+   do {                                                 \
+      UInt   iii = (UInt)iii_p;                         \
+      UChar* aaa = (UChar*)aaa_p;                       \
+      aaa[iii >> 3] |= (1 << (iii & 7));                \
+   } while (0)
+
+#define BITARR_CLEAR(aaa_p,iii_p)                       \
+   do {                                                 \
+      UInt   iii = (UInt)iii_p;                         \
+      UChar* aaa = (UChar*)aaa_p;                       \
+      aaa[iii >> 3] &= ~(1 << (iii & 7));               \
+   } while (0)
+
+#define BITARR_TEST(aaa_p,iii_p)                        \
+      (0 != (((UChar*)aaa_p)[ ((UInt)iii_p) >> 3 ]      \
+               & (1 << (((UInt)iii_p) & 7))))           \
+
+
+#define VGM_BIT_VALID      0
+#define VGM_BIT_INVALID    1
+
+#define VGM_NIBBLE_VALID   0
+#define VGM_NIBBLE_INVALID 0xF
+
+#define VGM_BYTE_VALID     0
+#define VGM_BYTE_INVALID   0xFF
+
+#define VGM_WORD_VALID     0
+#define VGM_WORD_INVALID   0xFFFFFFFF
+
+#define VGM_EFLAGS_VALID   0xFFFFFFFE
+#define VGM_EFLAGS_INVALID 0xFFFFFFFF     /* not used */
+
+
+static void init_shadow_memory ( void )
+{
+   Int i;
+
+   for (i = 0; i < 8192; i++)             /* Invalid address */
+      distinguished_secondary_map.abits[i] = VGM_BYTE_INVALID; 
+
+   /* These entries gradually get overwritten as the used address
+      space expands. */
+   for (i = 0; i < 65536; i++)
+      primary_map[i] = &distinguished_secondary_map;
+
+   /* These ones should never change; it's a bug in Valgrind if they do. */
+   for (i = 65536; i < 262144; i++)
+      primary_map[i] = &distinguished_secondary_map;
+}
+
+void SK_(post_clo_init) ( void )
+{
+}
+
+void SK_(fini) ( void )
+{
+   VG_(print_malloc_stats)();
+
+   if (VG_(clo_verbosity) == 1) {
+      if (!SK_(clo_leak_check))
+         VG_(message)(Vg_UserMsg, 
+             "For a detailed leak analysis,  rerun with: --leak-check=yes");
+
+      VG_(message)(Vg_UserMsg, 
+                   "For counts of detected errors, rerun with: -v");
+   }
+   if (SK_(clo_leak_check)) SK_(detect_memory_leaks)();
+
+   done_prof_mem();
+}
+
+/*------------------------------------------------------------*/
+/*--- Basic bitmap management, reading and writing.        ---*/
+/*------------------------------------------------------------*/
+
+/* Allocate and initialise a secondary map. */
+
+static AcSecMap* alloc_secondary_map ( __attribute__ ((unused)) 
+                                       Char* caller )
+{
+   AcSecMap* map;
+   UInt  i;
+   PROF_EVENT(10);
+
+   /* Mark all bytes as invalid access and invalid value. */
+
+   /* It just happens that a AcSecMap occupies exactly 18 pages --
+      although this isn't important, so the following assert is
+      spurious. */
+   vg_assert(0 == (sizeof(AcSecMap) % VKI_BYTES_PER_PAGE));
+   map = VG_(get_memory_from_mmap)( sizeof(AcSecMap), caller );
+
+   for (i = 0; i < 8192; i++)
+      map->abits[i] = VGM_BYTE_INVALID; /* Invalid address */
+
+   /* VG_(printf)("ALLOC_2MAP(%s)\n", caller ); */
+   return map;
+}
+
+
+/* Basic reading/writing of the bitmaps, for byte-sized accesses. */
+
+static __inline__ UChar get_abit ( Addr a )
+{
+   AcSecMap* sm     = primary_map[a >> 16];
+   UInt    sm_off = a & 0xFFFF;
+   PROF_EVENT(20);
+#  if 0
+      if (IS_DISTINGUISHED_SM(sm))
+         VG_(message)(Vg_DebugMsg, 
+                      "accessed distinguished 2ndary (A)map! 0x%x\n", a);
+#  endif
+   return BITARR_TEST(sm->abits, sm_off) 
+             ? VGM_BIT_INVALID : VGM_BIT_VALID;
+}
+
+static __inline__ void set_abit ( Addr a, UChar abit )
+{
+   AcSecMap* sm;
+   UInt    sm_off;
+   PROF_EVENT(22);
+   ENSURE_MAPPABLE(a, "set_abit");
+   sm     = primary_map[a >> 16];
+   sm_off = a & 0xFFFF;
+   if (abit) 
+      BITARR_SET(sm->abits, sm_off);
+   else
+      BITARR_CLEAR(sm->abits, sm_off);
+}
+
+
+/* Reading/writing of the bitmaps, for aligned word-sized accesses. */
+
+static __inline__ UChar get_abits4_ALIGNED ( Addr a )
+{
+   AcSecMap* sm;
+   UInt    sm_off;
+   UChar   abits8;
+   PROF_EVENT(24);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+#  endif
+   sm     = primary_map[a >> 16];
+   sm_off = a & 0xFFFF;
+   abits8 = sm->abits[sm_off >> 3];
+   abits8 >>= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+   abits8 &= 0x0F;
+   return abits8;
+}
+
+
+
+/*------------------------------------------------------------*/
+/*--- Setting permissions over address ranges.             ---*/
+/*------------------------------------------------------------*/
+
+static void set_address_range_perms ( Addr a, UInt len, 
+                                      UInt example_a_bit )
+{
+   UChar     abyte8;
+   UInt      sm_off;
+   AcSecMap* sm;
+
+   PROF_EVENT(30);
+
+   if (len == 0)
+      return;
+
+   if (len > 100 * 1000 * 1000) {
+      VG_(message)(Vg_UserMsg, 
+                   "Warning: set address range perms: "
+                   "large range %u, a %d",
+                   len, example_a_bit );
+   }
+
+   VGP_PUSHCC(VgpSetMem);
+
+   /* Requests to change permissions of huge address ranges may
+      indicate bugs in our machinery.  30,000,000 is arbitrary, but so
+      far all legitimate requests have fallen beneath that size. */
+   /* 4 Mar 02: this is just stupid; get rid of it. */
+   /* vg_assert(len < 30000000); */
+
+   /* Check the permissions make sense. */
+   vg_assert(example_a_bit == VGM_BIT_VALID 
+             || example_a_bit == VGM_BIT_INVALID);
+
+   /* In order that we can charge through the address space at 8
+      bytes/main-loop iteration, make up some perms. */
+   abyte8 = (example_a_bit << 7)
+            | (example_a_bit << 6)
+            | (example_a_bit << 5)
+            | (example_a_bit << 4)
+            | (example_a_bit << 3)
+            | (example_a_bit << 2)
+            | (example_a_bit << 1)
+            | (example_a_bit << 0);
+
+#  ifdef VG_DEBUG_MEMORY
+   /* Do it ... */
+   while (True) {
+      PROF_EVENT(31);
+      if (len == 0) break;
+      set_abit ( a, example_a_bit );
+      set_vbyte ( a, vbyte );
+      a++;
+      len--;
+   }
+
+#  else
+   /* Slowly do parts preceding 8-byte alignment. */
+   while (True) {
+      PROF_EVENT(31);
+      if (len == 0) break;
+      if ((a % 8) == 0) break;
+      set_abit ( a, example_a_bit );
+      a++;
+      len--;
+   }   
+
+   if (len == 0) {
+      VGP_POPCC(VgpSetMem);
+      return;
+   }
+   vg_assert((a % 8) == 0 && len > 0);
+
+   /* Once aligned, go fast. */
+   while (True) {
+      PROF_EVENT(32);
+      if (len < 8) break;
+      ENSURE_MAPPABLE(a, "set_address_range_perms(fast)");
+      sm = primary_map[a >> 16];
+      sm_off = a & 0xFFFF;
+      sm->abits[sm_off >> 3] = abyte8;
+      a += 8;
+      len -= 8;
+   }
+
+   if (len == 0) {
+      VGP_POPCC(VgpSetMem);
+      return;
+   }
+   vg_assert((a % 8) == 0 && len > 0 && len < 8);
+
+   /* Finish the upper fragment. */
+   while (True) {
+      PROF_EVENT(33);
+      if (len == 0) break;
+      set_abit ( a, example_a_bit );
+      a++;
+      len--;
+   }   
+#  endif
+
+   /* Check that zero page and highest page have not been written to
+      -- this could happen with buggy syscall wrappers.  Today
+      (2001-04-26) had precisely such a problem with __NR_setitimer. */
+   vg_assert(SK_(cheap_sanity_check)());
+   VGP_POPCC(VgpSetMem);
+}
+
+/* Set permissions for address ranges ... */
+
+void SK_(make_noaccess) ( Addr a, UInt len )
+{
+   PROF_EVENT(35);
+   DEBUG("SK_(make_noaccess)(%p, %x)\n", a, len);
+   set_address_range_perms ( a, len, VGM_BIT_INVALID );
+}
+
+void SK_(make_accessible) ( Addr a, UInt len )
+{
+   PROF_EVENT(36);
+   DEBUG("SK_(make_accessible)(%p, %x)\n", a, len);
+   set_address_range_perms ( a, len, VGM_BIT_VALID );
+}
+
+/* Block-copy permissions (needed for implementing realloc()). */
+
+static void copy_address_range_state ( Addr src, Addr dst, UInt len )
+{
+   UInt i;
+
+   DEBUG("copy_address_range_state\n");
+
+   PROF_EVENT(40);
+   for (i = 0; i < len; i++) {
+      UChar abit  = get_abit ( src+i );
+      PROF_EVENT(41);
+      set_abit ( dst+i, abit );
+   }
+}
+
+
+/* Check permissions for address range.  If inadequate permissions
+   exist, *bad_addr is set to the offending address, so the caller can
+   know what it is. */
+
+Bool SK_(check_writable) ( Addr a, UInt len, Addr* bad_addr )
+{
+   UInt  i;
+   UChar abit;
+   PROF_EVENT(42);
+   for (i = 0; i < len; i++) {
+      PROF_EVENT(43);
+      abit = get_abit(a);
+      if (abit == VGM_BIT_INVALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      a++;
+   }
+   return True;
+}
+
+Bool SK_(check_readable) ( Addr a, UInt len, Addr* bad_addr )
+{
+   UInt  i;
+   UChar abit;
+
+   PROF_EVENT(44);
+   DEBUG("SK_(check_readable)\n");
+   for (i = 0; i < len; i++) {
+      abit  = get_abit(a);
+      PROF_EVENT(45);
+      if (abit != VGM_BIT_VALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      a++;
+   }
+   return True;
+}
+
+
+/* Check a zero-terminated ascii string.  Tricky -- don't want to
+   examine the actual bytes, to find the end, until we're sure it is
+   safe to do so. */
+
+Bool SK_(check_readable_asciiz) ( Addr a, Addr* bad_addr )
+{
+   UChar abit;
+   PROF_EVENT(46);
+   DEBUG("SK_(check_readable_asciiz)\n");
+   while (True) {
+      PROF_EVENT(47);
+      abit  = get_abit(a);
+      if (abit != VGM_BIT_VALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      /* Ok, a is safe to read. */
+      if (* ((UChar*)a) == 0) return True;
+      a++;
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Memory event handlers                                ---*/
+/*------------------------------------------------------------*/
+
+/* Setting permissions for aligned words.  This supports fast stack
+   operations. */
+
+static void make_noaccess_aligned ( Addr a, UInt len )
+{
+   AcSecMap* sm;
+   UInt    sm_off;
+   UChar   mask;
+   Addr    a_past_end = a + len;
+
+   VGP_PUSHCC(VgpSetMem);
+
+   PROF_EVENT(50);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+   vg_assert(IS_ALIGNED4_ADDR(len));
+#  endif
+
+   for ( ; a < a_past_end; a += 4) {
+      ENSURE_MAPPABLE(a, "make_noaccess_aligned");
+      sm     = primary_map[a >> 16];
+      sm_off = a & 0xFFFF;
+      mask = 0x0F;
+      mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+      /* mask now contains 1s where we wish to make address bits
+         invalid (1s). */
+      sm->abits[sm_off >> 3] |= mask;
+   }
+   VGP_POPCC(VgpSetMem);
+}
+
+static void make_writable_aligned ( Addr a, UInt len )
+{
+   AcSecMap* sm;
+   UInt    sm_off;
+   UChar   mask;
+   Addr    a_past_end = a + len;
+
+   VGP_PUSHCC(VgpSetMem);
+
+   PROF_EVENT(51);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+   vg_assert(IS_ALIGNED4_ADDR(len));
+#  endif
+
+   for ( ; a < a_past_end; a += 4) {
+      ENSURE_MAPPABLE(a, "make_writable_aligned");
+      sm     = primary_map[a >> 16];
+      sm_off = a & 0xFFFF;
+      mask = 0x0F;
+      mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+      /* mask now contains 1s where we wish to make address bits
+         invalid (0s). */
+      sm->abits[sm_off >> 3] &= ~mask;
+   }
+   VGP_POPCC(VgpSetMem);
+}
+
+
+static
+void check_is_writable ( CorePart part, ThreadState* tst,
+                         Char* s, UInt base, UInt size )
+{
+   Bool ok;
+   Addr bad_addr;
+
+   VGP_PUSHCC(VgpCheckMem);
+
+   /* VG_(message)(Vg_DebugMsg,"check is writable: %x .. %x",
+                               base,base+size-1); */
+   ok = SK_(check_writable) ( base, size, &bad_addr );
+   if (!ok) {
+      switch (part) {
+      case Vg_CoreSysCall:
+         SK_(record_param_error) ( tst, bad_addr, /*isWrite =*/True, s );
+         break;
+
+      case Vg_CorePThread:
+      case Vg_CoreSignal:
+         SK_(record_core_mem_error)( tst, /*isWrite=*/True, s );
+         break;
+
+      default:
+         VG_(panic)("check_is_readable: Unknown or unexpected CorePart");
+      }
+   }
+
+   VGP_POPCC(VgpCheckMem);
+}
+
+static
+void check_is_readable ( CorePart part, ThreadState* tst,
+                         Char* s, UInt base, UInt size )
+{     
+   Bool ok;
+   Addr bad_addr;
+
+   VGP_PUSHCC(VgpCheckMem);
+   
+   /* VG_(message)(Vg_DebugMsg,"check is readable: %x .. %x",
+                               base,base+size-1); */
+   ok = SK_(check_readable) ( base, size, &bad_addr );
+   if (!ok) {
+      switch (part) {
+      case Vg_CoreSysCall:
+         SK_(record_param_error) ( tst, bad_addr, /*isWrite =*/False, s );
+         break;
+      
+      case Vg_CorePThread:
+         SK_(record_core_mem_error)( tst, /*isWrite=*/False, s );
+         break;
+
+      /* If we're being asked to jump to a silly address, record an error 
+         message before potentially crashing the entire system. */
+      case Vg_CoreTranslate:
+         SK_(record_jump_error)( tst, bad_addr );
+         break;
+
+      default:
+         VG_(panic)("check_is_readable: Unknown or unexpected CorePart");
+      }
+   }
+   VGP_POPCC(VgpCheckMem);
+}
+
+static
+void check_is_readable_asciiz ( CorePart part, ThreadState* tst,
+                                Char* s, UInt str )
+{
+   Bool ok = True;
+   Addr bad_addr;
+   /* VG_(message)(Vg_DebugMsg,"check is readable asciiz: 0x%x",str); */
+
+   VGP_PUSHCC(VgpCheckMem);
+
+   vg_assert(part == Vg_CoreSysCall);
+   ok = SK_(check_readable_asciiz) ( (Addr)str, &bad_addr );
+   if (!ok) {
+      SK_(record_param_error) ( tst, bad_addr, /*is_writable =*/False, s );
+   }
+
+   VGP_POPCC(VgpCheckMem);
+}
+
+static
+void addrcheck_new_mem_startup( Addr a, UInt len, Bool rr, Bool ww, Bool xx )
+{
+   // JJJ: this ignores the permissions and just makes it readable, like the
+   // old code did, AFAICT
+   DEBUG("new_mem_startup(%p, %u, rr=%u, ww=%u, xx=%u)\n", a,len,rr,ww,xx);
+   SK_(make_accessible)(a, len);
+}
+
+static
+void addrcheck_new_mem_heap ( Addr a, UInt len, Bool is_inited )
+{
+   SK_(make_accessible)(a, len);
+}
+
+static
+void addrcheck_set_perms (Addr a, UInt len, 
+                         Bool nn, Bool rr, Bool ww, Bool xx)
+{
+   DEBUG("addrcheck_set_perms(%p, %u, nn=%u, rr=%u ww=%u, xx=%u)\n",
+                              a, len, nn, rr, ww, xx);
+   if (rr || ww || xx) {
+      SK_(make_accessible)(a, len);
+   } else {
+      SK_(make_noaccess)(a, len);
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Functions called directly from generated code.       ---*/
+/*------------------------------------------------------------*/
+
+static __inline__ UInt rotateRight16 ( UInt x )
+{
+   /* Amazingly, gcc turns this into a single rotate insn. */
+   return (x >> 16) | (x << 16);
+}
+
+
+static __inline__ UInt shiftRight16 ( UInt x )
+{
+   return x >> 16;
+}
+
+
+/* Read/write 1/2/4 sized V bytes, and emit an address error if
+   needed. */
+
+/* SK_(helperc_ACCESS{1,2,4}) handle the common case fast.
+   Under all other circumstances, it defers to the relevant _SLOWLY
+   function, which can handle all situations.
+*/
+__attribute__ ((regparm(1)))
+void SK_(helperc_ACCESS4) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgmext_ACCESS4_SLOWLY(a);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
+   AcSecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   UChar   abits  = sm->abits[a_off];
+   abits >>= (a & 4);
+   abits &= 15;
+   PROF_EVENT(60);
+   if (abits == VGM_NIBBLE_VALID) {
+      /* Handle common case quickly: a is suitably aligned, is mapped,
+         and is addressible.  So just return. */
+      return;
+   } else {
+      /* Slow but general case. */
+      vgmext_ACCESS4_SLOWLY(a);
+   }
+#  endif
+}
+
+__attribute__ ((regparm(1)))
+void SK_(helperc_ACCESS2) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgmext_ACCESS2_SLOWLY(a);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
+   AcSecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(62);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      return;
+   } else {
+      /* Slow but general case. */
+      vgmext_ACCESS2_SLOWLY(a);
+   }
+#  endif
+}
+
+__attribute__ ((regparm(1)))
+void SK_(helperc_ACCESS1) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgmext_ACCESS1_SLOWLY(a);
+#  else
+   UInt    sec_no = shiftRight16(a);
+   AcSecMap* sm   = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(64);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      return;
+   } else {
+      /* Slow but general case. */
+      vgmext_ACCESS1_SLOWLY(a);
+   }
+#  endif
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Fallback functions to handle cases that the above    ---*/
+/*--- VG_(helperc_ACCESS{1,2,4}) can't manage.             ---*/
+/*------------------------------------------------------------*/
+
+static void vgmext_ACCESS4_SLOWLY ( Addr a )
+{
+   Bool a0ok, a1ok, a2ok, a3ok;
+
+   PROF_EVENT(70);
+
+   /* First establish independently the addressibility of the 4 bytes
+      involved. */
+   a0ok = get_abit(a+0) == VGM_BIT_VALID;
+   a1ok = get_abit(a+1) == VGM_BIT_VALID;
+   a2ok = get_abit(a+2) == VGM_BIT_VALID;
+   a3ok = get_abit(a+3) == VGM_BIT_VALID;
+
+   /* Now distinguish 3 cases */
+
+   /* Case 1: the address is completely valid, so:
+      - no addressing error
+   */
+   if (a0ok && a1ok && a2ok && a3ok) {
+      return;
+   }
+
+   /* Case 2: the address is completely invalid.  
+      - emit addressing error
+   */
+   /* VG_(printf)("%p (%d %d %d %d)\n", a, a0ok, a1ok, a2ok, a3ok); */
+   if (!SK_(clo_partial_loads_ok) 
+       || ((a & 3) != 0)
+       || (!a0ok && !a1ok && !a2ok && !a3ok)) {
+      sk_record_address_error( a, 4, False );
+      return;
+   }
+
+   /* Case 3: the address is partially valid.  
+      - no addressing error
+      Case 3 is only allowed if SK_(clo_partial_loads_ok) is True
+      (which is the default), and the address is 4-aligned.  
+      If not, Case 2 will have applied.
+   */
+   vg_assert(SK_(clo_partial_loads_ok));
+   {
+      return;
+   }
+}
+
+static void vgmext_ACCESS2_SLOWLY ( Addr a )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(72);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
+
+   /* If an address error has happened, report it. */
+   if (aerr) {
+      sk_record_address_error( a, 2, False );
+   }
+}
+
+static void vgmext_ACCESS1_SLOWLY ( Addr a )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(74);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+
+   /* If an address error has happened, report it. */
+   if (aerr) {
+      sk_record_address_error( a, 1, False );
+   }
+}
+
+
+/* ---------------------------------------------------------------------
+   FPU load and store checks, called from generated code.
+   ------------------------------------------------------------------ */
+
+__attribute__ ((regparm(2)))
+void SK_(fpu_ACCESS_check) ( Addr addr, Int size )
+{
+   /* Ensure the read area is both addressible and valid (ie,
+      readable).  If there's an address error, don't report a value
+      error too; but if there isn't an address error, check for a
+      value error. 
+
+      Try to be reasonably fast on the common case; wimp out and defer
+      to fpu_ACCESS_check_SLOWLY for everything else.  */
+
+   AcSecMap* sm;
+   UInt    sm_off, a_off;
+   Addr    addr4;
+
+   PROF_EVENT(80);
+
+#  ifdef VG_DEBUG_MEMORY
+   fpu_ACCESS_check_SLOWLY ( addr, size );
+#  else
+
+   if (size == 4) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
+      PROF_EVENT(81);
+      /* Properly aligned. */
+      sm     = primary_map[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
+      /* Properly aligned and addressible. */
+      return;
+     slow4:
+      fpu_ACCESS_check_SLOWLY ( addr, 4 );
+      return;
+   }
+
+   if (size == 8) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
+      PROF_EVENT(82);
+      /* Properly aligned.  Do it in two halves. */
+      addr4 = addr + 4;
+      /* First half. */
+      sm     = primary_map[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* First half properly aligned and addressible. */
+      /* Second half. */
+      sm     = primary_map[addr4 >> 16];
+      sm_off = addr4 & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* Second half properly aligned and addressible. */
+      /* Both halves properly aligned and addressible. */
+      return;
+     slow8:
+      fpu_ACCESS_check_SLOWLY ( addr, 8 );
+      return;
+   }
+
+   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
+      cases go quickly.  */
+   if (size == 2) {
+      PROF_EVENT(83);
+      fpu_ACCESS_check_SLOWLY ( addr, 2 );
+      return;
+   }
+
+   if (size == 10) {
+      PROF_EVENT(84);
+      fpu_ACCESS_check_SLOWLY ( addr, 10 );
+      return;
+   }
+
+   if (size == 28 || size == 108) {
+      PROF_EVENT(84); /* XXX assign correct event number */
+      fpu_ACCESS_check_SLOWLY ( addr, size );
+      return;
+   }
+
+   VG_(printf)("size is %d\n", size);
+   VG_(panic)("fpu_ACCESS_check: unhandled size");
+#  endif
+}
+
+
+/* ---------------------------------------------------------------------
+   Slow, general cases for FPU access checks.
+   ------------------------------------------------------------------ */
+
+void fpu_ACCESS_check_SLOWLY ( Addr addr, Int size )
+{
+   Int  i;
+   Bool aerr = False;
+   PROF_EVENT(90);
+   for (i = 0; i < size; i++) {
+      PROF_EVENT(91);
+      if (get_abit(addr+i) != VGM_BIT_VALID)
+         aerr = True;
+   }
+
+   if (aerr) {
+      sk_record_address_error( addr, size, False );
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Shadow chunks info                                   ---*/
+/*------------------------------------------------------------*/
+
+static __inline__
+void set_where( ShadowChunk* sc, ExeContext* ec )
+{
+   sc->skin_extra[0] = (UInt)ec;
+}
+
+static __inline__
+ExeContext *get_where( ShadowChunk* sc )
+{
+   return (ExeContext*)sc->skin_extra[0];
+}
+
+void SK_(complete_shadow_chunk) ( ShadowChunk* sc, ThreadState* tst )
+{
+   set_where( sc, VG_(get_ExeContext) ( tst ) );
+}
+
+/*------------------------------------------------------------*/
+/*--- Postponing free()ing                                 ---*/
+/*------------------------------------------------------------*/
+
+/* Holds blocks after freeing. */
+static ShadowChunk* vg_freed_list_start   = NULL;
+static ShadowChunk* vg_freed_list_end     = NULL;
+static Int          vg_freed_list_volume  = 0;
+
+static __attribute__ ((unused))
+       Int count_freelist ( void )
+{
+   ShadowChunk* sc;
+   Int n = 0;
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      n++;
+   return n;
+}
+
+static __attribute__ ((unused))
+       void freelist_sanity ( void )
+{
+   ShadowChunk* sc;
+   Int n = 0;
+   /* VG_(printf)("freelist sanity\n"); */
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      n += sc->size;
+   vg_assert(n == vg_freed_list_volume);
+}
+
+/* Put a shadow chunk on the freed blocks queue, possibly freeing up
+   some of the oldest blocks in the queue at the same time. */
+static void add_to_freed_queue ( ShadowChunk* sc )
+{
+   ShadowChunk* sc1;
+
+   /* Put it at the end of the freed list */
+   if (vg_freed_list_end == NULL) {
+      vg_assert(vg_freed_list_start == NULL);
+      vg_freed_list_end = vg_freed_list_start = sc;
+      vg_freed_list_volume = sc->size;
+   } else {    
+      vg_assert(vg_freed_list_end->next == NULL);
+      vg_freed_list_end->next = sc;
+      vg_freed_list_end = sc;
+      vg_freed_list_volume += sc->size;
+   }
+   sc->next = NULL;
+
+   /* Release enough of the oldest blocks to bring the free queue
+      volume below vg_clo_freelist_vol. */
+   
+   while (vg_freed_list_volume > SK_(clo_freelist_vol)) {
+      /* freelist_sanity(); */
+      vg_assert(vg_freed_list_start != NULL);
+      vg_assert(vg_freed_list_end != NULL);
+
+      sc1 = vg_freed_list_start;
+      vg_freed_list_volume -= sc1->size;
+      /* VG_(printf)("volume now %d\n", vg_freed_list_volume); */
+      vg_assert(vg_freed_list_volume >= 0);
+
+      if (vg_freed_list_start == vg_freed_list_end) {
+         vg_freed_list_start = vg_freed_list_end = NULL;
+      } else {
+         vg_freed_list_start = sc1->next;
+      }
+      sc1->next = NULL; /* just paranoia */
+      VG_(freeShadowChunk) ( sc1 );
+   }
+}
+
+/* Return the first shadow chunk satisfying the predicate p. */
+ShadowChunk* SK_(any_matching_freed_ShadowChunks)
+                        ( Bool (*p) ( ShadowChunk* ))
+{
+   ShadowChunk* sc;
+
+   /* No point looking through freed blocks if we're not keeping
+      them around for a while... */
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      if (p(sc))
+         return sc;
+
+   return NULL;
+}
+
+void SK_(alt_free) ( ShadowChunk* sc, ThreadState* tst )
+{
+   /* Record where freed */
+   set_where( sc, VG_(get_ExeContext) ( tst ) );
+
+   /* Put it out of harm's way for a while. */
+   add_to_freed_queue ( sc );
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Our instrumenter                                     ---*/
+/*------------------------------------------------------------*/
+
+#define uInstr1   VG_(newUInstr1)
+#define uInstr2   VG_(newUInstr2)
+#define uLiteral  VG_(setLiteralField)
+#define newTemp   VG_(getNewTemp)
+
+UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
+{
+/* Use this rather than eg. -1 because it's a UInt. */
+#define INVALID_DATA_SIZE   999999
+
+   UCodeBlock* cb;
+   Int         i;
+   UInstr*     u_in;
+   Int         t_addr, t_size;
+
+   cb = VG_(allocCodeBlock)();
+   cb->nextTemp = cb_in->nextTemp;
+
+   for (i = 0; i < cb_in->used; i++) {
+
+      t_addr = t_size = INVALID_TEMPREG;
+      u_in = &cb_in->instrs[i];
+
+      switch (u_in->opcode) {
+         case NOP:  case CALLM_E:  case CALLM_S:
+            break;
+
+         /* For memory-ref instrs, copy the data_addr into a temporary to be
+          * passed to the cachesim_* helper at the end of the instruction.
+          */
+         case LOAD: 
+            t_addr = u_in->val1; 
+            goto do_LOAD_or_STORE;
+         case STORE: t_addr = u_in->val2;
+            goto do_LOAD_or_STORE;
+           do_LOAD_or_STORE:
+            uInstr1(cb, CCALL, 0, TempReg, t_addr);
+            switch (u_in->size) {
+               case 4: VG_(setCCallFields)(cb, (Addr)&SK_(helperc_ACCESS4), 
+                                               1, 1, False );
+                  break;
+               case 2: VG_(setCCallFields)(cb, (Addr)&SK_(helperc_ACCESS2), 
+                                               1, 1, False );
+                  break;
+               case 1: VG_(setCCallFields)(cb, (Addr)&SK_(helperc_ACCESS1), 
+                                               1, 1, False );
+                  break;
+               default: 
+                  VG_(panic)("addrcheck::SK_(instrument):LOAD/STORE");
+            }
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         case FPU_R:
+         case FPU_W:
+            t_addr = u_in->val2;
+            t_size = newTemp(cb);
+	    uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_size);
+	    uLiteral(cb, u_in->size);
+            uInstr2(cb, CCALL, 0, TempReg, t_addr, TempReg, t_size);
+            VG_(setCCallFields)(cb, (Addr)&SK_(fpu_ACCESS_check), 
+                                               2, 2, False );
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         default:
+            VG_(copyUInstr)(cb, u_in);
+            break;
+      }
+   }
+
+   VG_(freeCodeBlock)(cb_in);
+   return cb;
+}
+
+
+
+/*------------------------------------------------------------*/
+/*--- Low-level address-space scanning, for the leak       ---*/
+/*--- detector.                                            ---*/
+/*------------------------------------------------------------*/
+
+static 
+jmp_buf memscan_jmpbuf;
+
+static
+void vg_scan_all_valid_memory_sighandler ( Int sigNo )
+{
+   __builtin_longjmp(memscan_jmpbuf, 1);
+}
+
+/* Safely (avoiding SIGSEGV / SIGBUS) scan the entire valid address
+   space and pass the addresses and values of all addressible,
+   defined, aligned words to notify_word.  This is the basis for the
+   leak detector.  Returns the number of calls made to notify_word.  */
+UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) )
+{
+   /* All volatile, because some gccs seem paranoid about longjmp(). */
+   volatile UInt res, numPages, page, primaryMapNo, nWordsNotified;
+   volatile Addr pageBase, addr;
+   volatile AcSecMap* sm;
+   volatile UChar abits;
+   volatile UInt page_first_word;
+
+   vki_ksigaction sigbus_saved;
+   vki_ksigaction sigbus_new;
+   vki_ksigaction sigsegv_saved;
+   vki_ksigaction sigsegv_new;
+   vki_ksigset_t  blockmask_saved;
+   vki_ksigset_t  unblockmask_new;
+
+   /* Temporarily install a new sigsegv and sigbus handler, and make
+      sure SIGBUS, SIGSEGV and SIGTERM are unblocked.  (Perhaps the
+      first two can never be blocked anyway?)  */
+
+   sigbus_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
+   sigbus_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
+   sigbus_new.ksa_restorer = NULL;
+   res = VG_(ksigemptyset)( &sigbus_new.ksa_mask );
+   vg_assert(res == 0);
+
+   sigsegv_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
+   sigsegv_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
+   sigsegv_new.ksa_restorer = NULL;
+   res = VG_(ksigemptyset)( &sigsegv_new.ksa_mask );
+   vg_assert(res == 0+0);
+
+   res =  VG_(ksigemptyset)( &unblockmask_new );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGBUS );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGSEGV );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGTERM );
+   vg_assert(res == 0+0+0);
+
+   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_new, &sigbus_saved );
+   vg_assert(res == 0+0+0+0);
+
+   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_new, &sigsegv_saved );
+   vg_assert(res == 0+0+0+0+0);
+
+   res = VG_(ksigprocmask)( VKI_SIG_UNBLOCK, &unblockmask_new, &blockmask_saved );
+   vg_assert(res == 0+0+0+0+0+0);
+
+   /* The signal handlers are installed.  Actually do the memory scan. */
+   numPages = 1 << (32-VKI_BYTES_PER_PAGE_BITS);
+   vg_assert(numPages == 1048576);
+   vg_assert(4096 == (1 << VKI_BYTES_PER_PAGE_BITS));
+
+   nWordsNotified = 0;
+
+   for (page = 0; page < numPages; page++) {
+      pageBase = page << VKI_BYTES_PER_PAGE_BITS;
+      primaryMapNo = pageBase >> 16;
+      sm = primary_map[primaryMapNo];
+      if (IS_DISTINGUISHED_SM(sm)) continue;
+      if (__builtin_setjmp(memscan_jmpbuf) == 0) {
+         /* try this ... */
+         page_first_word = * (volatile UInt*)pageBase;
+         /* we get here if we didn't get a fault */
+         /* Scan the page */
+         for (addr = pageBase; addr < pageBase+VKI_BYTES_PER_PAGE; addr += 4) {
+            abits  = get_abits4_ALIGNED(addr);
+            if (abits == VGM_NIBBLE_VALID) {
+               nWordsNotified++;
+               notify_word ( addr, *(UInt*)addr );
+	    }
+         }
+      } else {
+         /* We get here if reading the first word of the page caused a
+            fault, which in turn caused the signal handler to longjmp.
+            Ignore this page. */
+         if (0)
+         VG_(printf)(
+            "vg_scan_all_valid_memory_sighandler: ignoring page at %p\n",
+            (void*)pageBase 
+         );
+      }
+   }
+
+   /* Restore signal state to whatever it was before. */
+   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_saved, NULL );
+   vg_assert(res == 0 +0);
+
+   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_saved, NULL );
+   vg_assert(res == 0 +0 +0);
+
+   res = VG_(ksigprocmask)( VKI_SIG_SETMASK, &blockmask_saved, NULL );
+   vg_assert(res == 0 +0 +0 +0);
+
+   return nWordsNotified;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
+/*------------------------------------------------------------*/
+
+/* A block is either 
+   -- Proper-ly reached; a pointer to its start has been found
+   -- Interior-ly reached; only an interior pointer to it has been found
+   -- Unreached; so far, no pointers to any part of it have been found. 
+*/
+typedef 
+   enum { Unreached, Interior, Proper } 
+   Reachedness;
+
+/* A block record, used for generating err msgs. */
+typedef
+   struct _LossRecord {
+      struct _LossRecord* next;
+      /* Where these lost blocks were allocated. */
+      ExeContext*  allocated_at;
+      /* Their reachability. */
+      Reachedness  loss_mode;
+      /* Number of blocks and total # bytes involved. */
+      UInt         total_bytes;
+      UInt         num_blocks;
+   }
+   LossRecord;
+
+
+/* Find the i such that ptr points at or inside the block described by
+   shadows[i].  Return -1 if none found.  This assumes that shadows[]
+   has been sorted on the ->data field. */
+
+#ifdef VG_DEBUG_LEAKCHECK
+/* Used to sanity-check the fast binary-search mechanism. */
+static Int find_shadow_for_OLD ( Addr          ptr, 
+                                 ShadowChunk** shadows,
+                                 Int           n_shadows )
+
+{
+   Int  i;
+   Addr a_lo, a_hi;
+   PROF_EVENT(70);
+   for (i = 0; i < n_shadows; i++) {
+      PROF_EVENT(71);
+      a_lo = shadows[i]->data;
+      a_hi = ((Addr)shadows[i]->data) + shadows[i]->size - 1;
+      if (a_lo <= ptr && ptr <= a_hi)
+         return i;
+   }
+   return -1;
+}
+#endif
+
+
+static Int find_shadow_for ( Addr          ptr, 
+                             ShadowChunk** shadows,
+                             Int           n_shadows )
+{
+   Addr a_mid_lo, a_mid_hi;
+   Int lo, mid, hi, retVal;
+   PROF_EVENT(70);
+   /* VG_(printf)("find shadow for %p = ", ptr); */
+   retVal = -1;
+   lo = 0;
+   hi = n_shadows-1;
+   while (True) {
+      PROF_EVENT(71);
+
+      /* invariant: current unsearched space is from lo to hi,
+         inclusive. */
+      if (lo > hi) break; /* not found */
+
+      mid      = (lo + hi) / 2;
+      a_mid_lo = shadows[mid]->data;
+      a_mid_hi = ((Addr)shadows[mid]->data) + shadows[mid]->size - 1;
+
+      if (ptr < a_mid_lo) {
+         hi = mid-1;
+         continue;
+      } 
+      if (ptr > a_mid_hi) {
+         lo = mid+1;
+         continue;
+      }
+      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
+      retVal = mid;
+      break;
+   }
+
+#  ifdef VG_DEBUG_LEAKCHECK
+   vg_assert(retVal == find_shadow_for_OLD ( ptr, shadows, n_shadows ));
+#  endif
+   /* VG_(printf)("%d\n", retVal); */
+   return retVal;
+}
+
+
+
+static void sort_malloc_shadows ( ShadowChunk** shadows, UInt n_shadows )
+{
+   Int   incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+                      9841, 29524, 88573, 265720,
+                      797161, 2391484 };
+   Int          lo = 0;
+   Int          hi = n_shadows-1;
+   Int          i, j, h, bigN, hp;
+   ShadowChunk* v;
+
+   PROF_EVENT(72);
+   bigN = hi - lo + 1; if (bigN < 2) return;
+   hp = 0; while (incs[hp] < bigN) hp++; hp--;
+
+   for (; hp >= 0; hp--) {
+      PROF_EVENT(73);
+      h = incs[hp];
+      i = lo + h;
+      while (1) {
+         PROF_EVENT(74);
+         if (i > hi) break;
+         v = shadows[i];
+         j = i;
+         while (shadows[j-h]->data > v->data) {
+            PROF_EVENT(75);
+            shadows[j] = shadows[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         shadows[j] = v;
+         i++;
+      }
+   }
+}
+
+/* Globals, for the callback used by SK_(detect_memory_leaks). */
+
+static ShadowChunk** vglc_shadows;
+static Int           vglc_n_shadows;
+static Reachedness*  vglc_reachedness;
+static Addr          vglc_min_mallocd_addr;
+static Addr          vglc_max_mallocd_addr;
+
+static 
+void vg_detect_memory_leaks_notify_addr ( Addr a, UInt word_at_a )
+{
+   Int  sh_no;
+   Addr ptr;
+
+   /* Rule out some known causes of bogus pointers.  Mostly these do
+      not cause much trouble because only a few false pointers can
+      ever lurk in these places.  This mainly stops it reporting that
+      blocks are still reachable in stupid test programs like this
+
+         int main (void) { char* a = malloc(100); return 0; }
+
+      which people seem inordinately fond of writing, for some reason.  
+
+      Note that this is a complete kludge.  It would be better to
+      ignore any addresses corresponding to valgrind.so's .bss and
+      .data segments, but I cannot think of a reliable way to identify
+      where the .bss segment has been put.  If you can, drop me a
+      line.  
+   */
+   if (VG_(within_stack)(a))                return;
+   if (VG_(within_m_state_static)(a))       return;
+   if (a == (Addr)(&vglc_min_mallocd_addr)) return;
+   if (a == (Addr)(&vglc_max_mallocd_addr)) return;
+
+   /* OK, let's get on and do something Useful for a change. */
+
+   ptr = (Addr)word_at_a;
+   if (ptr >= vglc_min_mallocd_addr && ptr <= vglc_max_mallocd_addr) {
+      /* Might be legitimate; we'll have to investigate further. */
+      sh_no = find_shadow_for ( ptr, vglc_shadows, vglc_n_shadows );
+      if (sh_no != -1) {
+         /* Found a block at/into which ptr points. */
+         vg_assert(sh_no >= 0 && sh_no < vglc_n_shadows);
+         vg_assert(ptr < vglc_shadows[sh_no]->data 
+                         + vglc_shadows[sh_no]->size);
+         /* Decide whether Proper-ly or Interior-ly reached. */
+         if (ptr == vglc_shadows[sh_no]->data) {
+            if (0) VG_(printf)("pointer at %p to %p\n", a, word_at_a );
+            vglc_reachedness[sh_no] = Proper;
+         } else {
+            if (vglc_reachedness[sh_no] == Unreached)
+               vglc_reachedness[sh_no] = Interior;
+         }
+      }
+   }
+}
+
+
+void SK_(detect_memory_leaks) ( void )
+{
+   Int    i;
+   Int    blocks_leaked, bytes_leaked;
+   Int    blocks_dubious, bytes_dubious;
+   Int    blocks_reachable, bytes_reachable;
+   Int    n_lossrecords;
+   UInt   bytes_notified;
+   
+   LossRecord*  errlist;
+   LossRecord*  p;
+
+   PROF_EVENT(76);
+
+   /* VG_(get_malloc_shadows) allocates storage for shadows */
+   vglc_shadows = VG_(get_malloc_shadows)( &vglc_n_shadows );
+   if (vglc_n_shadows == 0) {
+      vg_assert(vglc_shadows == NULL);
+      VG_(message)(Vg_UserMsg, 
+                   "No malloc'd blocks -- no leaks are possible.\n");
+      return;
+   }
+
+   VG_(message)(Vg_UserMsg, 
+                "searching for pointers to %d not-freed blocks.", 
+                vglc_n_shadows );
+   sort_malloc_shadows ( vglc_shadows, vglc_n_shadows );
+
+   /* Sanity check; assert that the blocks are now in order and that
+      they don't overlap. */
+   for (i = 0; i < vglc_n_shadows-1; i++) {
+      vg_assert( ((Addr)vglc_shadows[i]->data)
+                 < ((Addr)vglc_shadows[i+1]->data) );
+      vg_assert( ((Addr)vglc_shadows[i]->data) + vglc_shadows[i]->size
+                 < ((Addr)vglc_shadows[i+1]->data) );
+   }
+
+   vglc_min_mallocd_addr = ((Addr)vglc_shadows[0]->data);
+   vglc_max_mallocd_addr = ((Addr)vglc_shadows[vglc_n_shadows-1]->data)
+                         + vglc_shadows[vglc_n_shadows-1]->size - 1;
+
+   vglc_reachedness 
+      = VG_(malloc)( vglc_n_shadows * sizeof(Reachedness) );
+   for (i = 0; i < vglc_n_shadows; i++)
+      vglc_reachedness[i] = Unreached;
+
+   /* Do the scan of memory. */
+   bytes_notified
+       = VG_(scan_all_valid_memory)( &vg_detect_memory_leaks_notify_addr )
+         * VKI_BYTES_PER_WORD;
+
+   VG_(message)(Vg_UserMsg, "checked %d bytes.", bytes_notified);
+
+   blocks_leaked    = bytes_leaked    = 0;
+   blocks_dubious   = bytes_dubious   = 0;
+   blocks_reachable = bytes_reachable = 0;
+
+   for (i = 0; i < vglc_n_shadows; i++) {
+      if (vglc_reachedness[i] == Unreached) {
+         blocks_leaked++;
+         bytes_leaked += vglc_shadows[i]->size;
+      }
+      else if (vglc_reachedness[i] == Interior) {
+         blocks_dubious++;
+         bytes_dubious += vglc_shadows[i]->size;
+      }
+      else if (vglc_reachedness[i] == Proper) {
+         blocks_reachable++;
+         bytes_reachable += vglc_shadows[i]->size;
+      }
+   }
+
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, "definitely lost: %d bytes in %d blocks.", 
+                            bytes_leaked, blocks_leaked );
+   VG_(message)(Vg_UserMsg, "possibly lost:   %d bytes in %d blocks.", 
+                            bytes_dubious, blocks_dubious );
+   VG_(message)(Vg_UserMsg, "still reachable: %d bytes in %d blocks.", 
+                            bytes_reachable, blocks_reachable );
+
+
+   /* Common up the lost blocks so we can print sensible error
+      messages. */
+
+   n_lossrecords = 0;
+   errlist       = NULL;
+   for (i = 0; i < vglc_n_shadows; i++) {
+     
+      /* 'where' stored in 'skin_extra' field */
+      ExeContext* where = get_where ( vglc_shadows[i] );
+
+      for (p = errlist; p != NULL; p = p->next) {
+         if (p->loss_mode == vglc_reachedness[i]
+             && VG_(eq_ExeContext) ( SK_(clo_leak_resolution),
+                                     p->allocated_at, 
+                                     where) ) {
+            break;
+	 }
+      }
+      if (p != NULL) {
+         p->num_blocks  ++;
+         p->total_bytes += vglc_shadows[i]->size;
+      } else {
+         n_lossrecords ++;
+         p = VG_(malloc)(sizeof(LossRecord));
+         p->loss_mode    = vglc_reachedness[i];
+         p->allocated_at = where;
+         p->total_bytes  = vglc_shadows[i]->size;
+         p->num_blocks   = 1;
+         p->next         = errlist;
+         errlist         = p;
+      }
+   }
+   
+   for (i = 0; i < n_lossrecords; i++) {
+      LossRecord* p_min = NULL;
+      UInt        n_min = 0xFFFFFFFF;
+      for (p = errlist; p != NULL; p = p->next) {
+         if (p->num_blocks > 0 && p->total_bytes < n_min) {
+            n_min = p->total_bytes;
+            p_min = p;
+         }
+      }
+      vg_assert(p_min != NULL);
+
+      if ( (!SK_(clo_show_reachable)) && p_min->loss_mode == Proper) {
+         p_min->num_blocks = 0;
+         continue;
+      }
+
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(
+         Vg_UserMsg,
+         "%d bytes in %d blocks are %s in loss record %d of %d",
+         p_min->total_bytes, p_min->num_blocks,
+         p_min->loss_mode==Unreached ? "definitely lost" :
+            (p_min->loss_mode==Interior ? "possibly lost"
+                                        : "still reachable"),
+         i+1, n_lossrecords
+      );
+      VG_(pp_ExeContext)(p_min->allocated_at);
+      p_min->num_blocks = 0;
+   }
+
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, "LEAK SUMMARY:");
+   VG_(message)(Vg_UserMsg, "   definitely lost: %d bytes in %d blocks.", 
+                            bytes_leaked, blocks_leaked );
+   VG_(message)(Vg_UserMsg, "   possibly lost:   %d bytes in %d blocks.", 
+                            bytes_dubious, blocks_dubious );
+   VG_(message)(Vg_UserMsg, "   still reachable: %d bytes in %d blocks.", 
+                            bytes_reachable, blocks_reachable );
+   if (!SK_(clo_show_reachable)) {
+      VG_(message)(Vg_UserMsg, 
+         "Reachable blocks (those to which a pointer was found) are not shown.");
+      VG_(message)(Vg_UserMsg, 
+         "To see them, rerun with: --show-reachable=yes");
+   }
+   VG_(message)(Vg_UserMsg, "");
+
+   VG_(free) ( vglc_shadows );
+   VG_(free) ( vglc_reachedness );
+}
+
+
+/* ---------------------------------------------------------------------
+   Sanity check machinery (permanently engaged).
+   ------------------------------------------------------------------ */
+
+/* Check that nobody has spuriously claimed that the first or last 16
+   pages (64 KB) of address space have become accessible.  Failure of
+   the following do not per se indicate an internal consistency
+   problem, but they are so likely to that we really want to know
+   about it if so. */
+
+Bool SK_(cheap_sanity_check) ( void )
+{
+   if (IS_DISTINGUISHED_SM(primary_map[0]) && 
+       IS_DISTINGUISHED_SM(primary_map[65535]))
+      return True;
+   else
+      return False;
+}
+
+Bool SK_(expensive_sanity_check) ( void )
+{
+   Int i;
+
+   /* Make sure nobody changed the distinguished secondary. */
+   for (i = 0; i < 8192; i++)
+      if (distinguished_secondary_map.abits[i] != VGM_BYTE_INVALID)
+         return False;
+
+   /* Make sure that the upper 3/4 of the primary map hasn't
+      been messed with. */
+   for (i = 65536; i < 262144; i++)
+      if (primary_map[i] != & distinguished_secondary_map)
+         return False;
+
+   return True;
+}
+      
+/* ---------------------------------------------------------------------
+   Debugging machinery (turn on to debug).  Something of a mess.
+   ------------------------------------------------------------------ */
+
+#if 0
+/* Print the value tags on the 8 integer registers & flag reg. */
+
+static void uint_to_bits ( UInt x, Char* str )
+{
+   Int i;
+   Int w = 0;
+   /* str must point to a space of at least 36 bytes. */
+   for (i = 31; i >= 0; i--) {
+      str[w++] = (x & ( ((UInt)1) << i)) ? '1' : '0';
+      if (i == 24 || i == 16 || i == 8)
+         str[w++] = ' ';
+   }
+   str[w++] = 0;
+   vg_assert(w == 36);
+}
+
+/* Caution!  Not vthread-safe; looks in VG_(baseBlock), not the thread
+   state table. */
+
+static void vg_show_reg_tags ( void )
+{
+   Char buf1[36];
+   Char buf2[36];
+   UInt z_eax, z_ebx, z_ecx, z_edx, 
+        z_esi, z_edi, z_ebp, z_esp, z_eflags;
+
+   z_eax    = VG_(baseBlock)[VGOFF_(sh_eax)];
+   z_ebx    = VG_(baseBlock)[VGOFF_(sh_ebx)];
+   z_ecx    = VG_(baseBlock)[VGOFF_(sh_ecx)];
+   z_edx    = VG_(baseBlock)[VGOFF_(sh_edx)];
+   z_esi    = VG_(baseBlock)[VGOFF_(sh_esi)];
+   z_edi    = VG_(baseBlock)[VGOFF_(sh_edi)];
+   z_ebp    = VG_(baseBlock)[VGOFF_(sh_ebp)];
+   z_esp    = VG_(baseBlock)[VGOFF_(sh_esp)];
+   z_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
+   
+   uint_to_bits(z_eflags, buf1);
+   VG_(message)(Vg_DebugMsg, "efl %\n", buf1);
+
+   uint_to_bits(z_eax, buf1);
+   uint_to_bits(z_ebx, buf2);
+   VG_(message)(Vg_DebugMsg, "eax %s   ebx %s\n", buf1, buf2);
+
+   uint_to_bits(z_ecx, buf1);
+   uint_to_bits(z_edx, buf2);
+   VG_(message)(Vg_DebugMsg, "ecx %s   edx %s\n", buf1, buf2);
+
+   uint_to_bits(z_esi, buf1);
+   uint_to_bits(z_edi, buf2);
+   VG_(message)(Vg_DebugMsg, "esi %s   edi %s\n", buf1, buf2);
+
+   uint_to_bits(z_ebp, buf1);
+   uint_to_bits(z_esp, buf2);
+   VG_(message)(Vg_DebugMsg, "ebp %s   esp %s\n", buf1, buf2);
+}
+
+
+/* For debugging only.  Scan the address space and touch all allegedly
+   addressible words.  Useful for establishing where Valgrind's idea of
+   addressibility has diverged from what the kernel believes. */
+
+static 
+void zzzmemscan_notify_word ( Addr a, UInt w )
+{
+}
+
+void zzzmemscan ( void )
+{
+   Int n_notifies
+      = VG_(scan_all_valid_memory)( zzzmemscan_notify_word );
+   VG_(printf)("zzzmemscan: n_bytes = %d\n", 4 * n_notifies );
+}
+#endif
+
+
+
+
+#if 0
+static Int zzz = 0;
+
+void show_bb ( Addr eip_next )
+{
+   VG_(printf)("[%4d] ", zzz);
+   vg_show_reg_tags( &VG_(m_shadow );
+   VG_(translate) ( eip_next, NULL, NULL, NULL );
+}
+#endif /* 0 */
+
+/*------------------------------------------------------------*/
+/*--- Syscall wrappers                                     ---*/
+/*------------------------------------------------------------*/
+
+void* SK_(pre_syscall)  ( ThreadId tid, UInt syscallno, Bool isBlocking )
+{
+   Int sane = SK_(cheap_sanity_check)();
+   return (void*)sane;
+}
+
+void  SK_(post_syscall) ( ThreadId tid, UInt syscallno,
+                           void* pre_result, Int res, Bool isBlocking )
+{
+   Int  sane_before_call = (Int)pre_result;
+   Bool sane_after_call  = SK_(cheap_sanity_check)();
+
+   if ((Int)sane_before_call && (!sane_after_call)) {
+      VG_(message)(Vg_DebugMsg, "post-syscall: ");
+      VG_(message)(Vg_DebugMsg,
+                   "probable sanity check failure for syscall number %d\n",
+                   syscallno );
+      VG_(panic)("aborting due to the above ... bye!");
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Setup                                                ---*/
+/*------------------------------------------------------------*/
+
+void SK_(written_shadow_regs_values)( UInt* gen_reg_value, UInt* eflags_value )
+{
+   *gen_reg_value = VGM_WORD_VALID;
+   *eflags_value  = VGM_EFLAGS_VALID;
+}
+
+Bool SK_(process_cmd_line_option)(Char* arg)
+{
+#  define STREQ(s1,s2)     (0==VG_(strcmp_ws)((s1),(s2)))
+#  define STREQN(nn,s1,s2) (0==VG_(strncmp_ws)((s1),(s2),(nn)))
+
+   if      (STREQ(arg, "--partial-loads-ok=yes"))
+      SK_(clo_partial_loads_ok) = True;
+   else if (STREQ(arg, "--partial-loads-ok=no"))
+      SK_(clo_partial_loads_ok) = False;
+
+   else if (STREQN(15, arg, "--freelist-vol=")) {
+      SK_(clo_freelist_vol) = (Int)VG_(atoll)(&arg[15]);
+      if (SK_(clo_freelist_vol) < 0) SK_(clo_freelist_vol) = 0;
+   }
+
+   else if (STREQ(arg, "--leak-check=yes"))
+      SK_(clo_leak_check) = True;
+   else if (STREQ(arg, "--leak-check=no"))
+      SK_(clo_leak_check) = False;
+
+   else if (STREQ(arg, "--leak-resolution=low"))
+      SK_(clo_leak_resolution) = Vg_LowRes;
+   else if (STREQ(arg, "--leak-resolution=med"))
+      SK_(clo_leak_resolution) = Vg_MedRes;
+   else if (STREQ(arg, "--leak-resolution=high"))
+      SK_(clo_leak_resolution) = Vg_HighRes;
+   
+   else if (STREQ(arg, "--show-reachable=yes"))
+      SK_(clo_show_reachable) = True;
+   else if (STREQ(arg, "--show-reachable=no"))
+      SK_(clo_show_reachable) = False;
+
+   else if (STREQ(arg, "--workaround-gcc296-bugs=yes"))
+      SK_(clo_workaround_gcc296_bugs) = True;
+   else if (STREQ(arg, "--workaround-gcc296-bugs=no"))
+      SK_(clo_workaround_gcc296_bugs) = False;
+
+   else if (STREQ(arg, "--cleanup=yes"))
+      SK_(clo_cleanup) = True;
+   else if (STREQ(arg, "--cleanup=no"))
+      SK_(clo_cleanup) = False;
+
+   else
+      return False;
+
+   return True;
+
+#undef STREQ
+#undef STREQN
+}
+
+Char* SK_(usage)(void)
+{  
+   return  
+"    --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
+"    --freelist-vol=<number>   volume of freed blocks queue [1000000]\n"
+"    --leak-check=no|yes       search for memory leaks at exit? [no]\n"
+"    --leak-resolution=low|med|high\n"
+"                              amount of bt merging in leak check [low]\n"
+"    --show-reachable=no|yes   show reachable blocks in leak check? [no]\n"
+"    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
+"    --check-addrVs=no|yes     experimental lighterweight checking? [yes]\n"
+"                              yes == Valgrind's original behaviour\n"
+"\n"
+"    --cleanup=no|yes          improve after instrumentation? [yes]\n";
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Setup                                                ---*/
+/*------------------------------------------------------------*/
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* track)
+{
+   needs->name                    = "addrcheck";
+   needs->description             = "a fine-grained address checker";
+
+   needs->core_errors             = True;
+   needs->skin_errors             = True;
+   needs->run_libc_freeres        = True;
+
+   needs->sizeof_shadow_block     = 1;
+
+   needs->basic_block_discards    = False;
+   needs->shadow_regs             = False;
+   needs->command_line_options    = True;
+   needs->client_requests         = True;
+   needs->extended_UCode          = False;
+   needs->syscall_wrapper         = True;
+   needs->alternative_free        = True;
+   needs->sanity_checks           = True;
+
+   VG_(register_compact_helper)((Addr) & SK_(helperc_ACCESS4));
+   VG_(register_compact_helper)((Addr) & SK_(helperc_ACCESS2));
+   VG_(register_compact_helper)((Addr) & SK_(helperc_ACCESS1));
+   VG_(register_compact_helper)((Addr) & SK_(fpu_ACCESS_check));
+
+   /* Events to track */
+   track->new_mem_startup       = & addrcheck_new_mem_startup;
+   track->new_mem_heap          = & addrcheck_new_mem_heap;
+   track->new_mem_stack         = & SK_(make_accessible);
+   track->new_mem_stack_aligned = & make_writable_aligned;
+   track->new_mem_stack_signal  = & SK_(make_accessible);
+   track->new_mem_brk           = & SK_(make_accessible);
+   track->new_mem_mmap          = & addrcheck_set_perms;
+   
+   track->copy_mem_heap         = & copy_address_range_state;
+   track->copy_mem_remap        = & copy_address_range_state;
+   track->change_mem_mprotect   = & addrcheck_set_perms;
+      
+   track->ban_mem_heap          = & SK_(make_noaccess);
+   track->ban_mem_stack         = & SK_(make_noaccess);
+
+   track->die_mem_heap          = & SK_(make_noaccess);
+   track->die_mem_stack         = & SK_(make_noaccess);
+   track->die_mem_stack_aligned = & make_noaccess_aligned; 
+   track->die_mem_stack_signal  = & SK_(make_noaccess); 
+   track->die_mem_brk           = & SK_(make_noaccess);
+   track->die_mem_munmap        = & SK_(make_noaccess); 
+
+   track->bad_free              = & SK_(record_free_error);
+   track->mismatched_free       = & SK_(record_freemismatch_error);
+
+   track->pre_mem_read          = & check_is_readable;
+   track->pre_mem_read_asciiz   = & check_is_readable_asciiz;
+   track->pre_mem_write         = & check_is_writable;
+   track->post_mem_write        = & SK_(make_accessible);
+
+   init_shadow_memory();
+
+   init_prof_mem();
+
+   VGP_(register_profile_event) ( VgpSetMem,   "set-mem-perms" );
+   VGP_(register_profile_event) ( VgpCheckMem, "check-mem-perms" );
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                           vg_addrcheck.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/cachegrind.in b/cachegrind.in
deleted file mode 100755
index 5c22cb4..0000000
--- a/cachegrind.in
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/sh
-# Should point to the installation directory
-prefix="@prefix@"
-exec_prefix="@exec_prefix@"
-VALGRIND="@libdir@/valgrind"
-
-
-# Other stuff ...
-version="@VERSION@"
-emailto="jseward@acm.org"
-
-# name we were invoked with
-vgname=`echo $0 | sed 's,^.*/,,'`
-
-# Cachegrind options
-vgopts=
-
-# Prog and arg to run
-argopts=
-
-# Show usage info?
-dousage=0
-
-# show version info?
-doversion=0
-
-# Collect up args for Cachegrind
-while [ $+ != 0 ]
-do
-  arg=$1
-  case "$arg" in
-#   options for the user
-    --help)                 dousage=1; break;;
-    --version)              doversion=1; break;;
-    --logfile-fd=*)         vgopts="$vgopts $arg"; shift;;
-    -v)                     vgopts="$vgopts $arg"; shift;;
-    --verbose)              vgopts="$vgopts -v"; shift;;
-    -q)                     vgopts="$vgopts $arg"; shift;;
-    --quiet)                vgopts="$vgopts $arg"; shift;;
-    --demangle=no)          vgopts="$vgopts $arg"; shift;;
-    --demangle=yes)         vgopts="$vgopts $arg"; shift;;
-    --trace-children=no)    vgopts="$vgopts $arg"; shift;;
-    --trace-children=yes)   vgopts="$vgopts $arg"; shift;;
-    --suppressions=*)       vgopts="$vgopts $arg"; shift;;
-    --I1=*,*,*)             vgopts="$vgopts $arg"; shift;;
-    --D1=*,*,*)             vgopts="$vgopts $arg"; shift;;
-    --L2=*,*,*)             vgopts="$vgopts $arg"; shift;;
-#   options for debugging Cachegrind
-    --sanity-level=*)       vgopts="$vgopts $arg"; shift;;
-    --single-step=yes)      vgopts="$vgopts $arg"; shift;;
-    --single-step=no)       vgopts="$vgopts $arg"; shift;;
-    --optimise=yes)         vgopts="$vgopts $arg"; shift;;
-    --optimise=no)          vgopts="$vgopts $arg"; shift;;
-    --instrument=yes)       vgopts="$vgopts $arg"; shift;;
-    --instrument=no)        vgopts="$vgopts $arg"; shift;;
-    --cleanup=yes)          vgopts="$vgopts $arg"; shift;;
-    --cleanup=no)           vgopts="$vgopts $arg"; shift;;
-    --smc-check=none)       vgopts="$vgopts $arg"; shift;;
-    --smc-check=some)       vgopts="$vgopts $arg"; shift;;
-    --smc-check=all)        vgopts="$vgopts $arg"; shift;;
-    --trace-syscalls=yes)   vgopts="$vgopts $arg"; shift;;
-    --trace-syscalls=no)    vgopts="$vgopts $arg"; shift;;
-    --trace-signals=yes)    vgopts="$vgopts $arg"; shift;;
-    --trace-signals=no)     vgopts="$vgopts $arg"; shift;;
-    --trace-symtab=yes)     vgopts="$vgopts $arg"; shift;;
-    --trace-symtab=no)      vgopts="$vgopts $arg"; shift;;
-    --trace-malloc=yes)     vgopts="$vgopts $arg"; shift;;
-    --trace-malloc=no)      vgopts="$vgopts $arg"; shift;;
-    --trace-sched=yes)      vgopts="$vgopts $arg"; shift;;
-    --trace-sched=no)       vgopts="$vgopts $arg"; shift;;
-    --trace-pthread=none)   vgopts="$vgopts $arg"; shift;;
-    --trace-pthread=some)   vgopts="$vgopts $arg"; shift;;
-    --trace-pthread=all)    vgopts="$vgopts $arg"; shift;;
-    --stop-after=*)         vgopts="$vgopts $arg"; shift;;
-    --dump-error=*)         vgopts="$vgopts $arg"; shift;;
-    -*)                     dousage=1; break;;
-    *)                      break;;
-  esac
-done
-
-if [ z"$doversion" = z1 ]; then
-   echo "cachegrind-$version"
-   exit 1
-fi
-
-if [ $# = 0 ] || [ z"$dousage" = z1 ]; then
-   echo
-   echo "usage: $vgname [options] prog-and-args"
-   echo
-   echo "  options for the user, with defaults in [ ], are:"
-   echo "    --help                    show this message"
-   echo "    --version                 show version"
-   echo "    -q --quiet                run silently; only print error msgs"
-   echo "    -v --verbose              be more verbose, incl counts of errors"
-   echo "    --demangle=no|yes         automatically demangle C++ names? [yes]"
-   echo "    --trace-children=no|yes   Cachegrind-ise child processes? [no]"
-   echo "    --logfile-fd=<number>     file descriptor for messages [2=stderr]"
-   echo "    --suppressions=<filename> is ignored"
-   echo "    --I1=<size>,<assoc>,<line_size>  set I1 cache manually"
-   echo "    --D1=<size>,<assoc>,<line_size>  set D1 cache manually"
-   echo "    --L2=<size>,<assoc>,<line_size>  set L2 cache manually"
-   echo
-   echo "  options for debugging Cachegrind itself are:"
-   echo "    --sanity-level=<number>   level of sanity checking to do [1]"
-   echo "    --single-step=no|yes      translate each instr separately? [no]"
-   echo "    --optimise=no|yes         improve intermediate code? [yes]"
-   echo "    --instrument=no|yes       actually do memory checks? [yes]"
-   echo "    --cleanup=no|yes          improve after instrumentation? [yes]"
-   echo "    --smc-check=none|some|all check writes for s-m-c? [some]"
-   echo "    --trace-syscalls=no|yes   show all system calls? [no]"
-   echo "    --trace-signals=no|yes    show signal handling details? [no]"
-   echo "    --trace-symtab=no|yes     show symbol table details? [no]"
-   echo "    --trace-malloc=no|yes     show client malloc details? [no]"
-   echo "    --trace-sched=no|yes      show thread scheduler details? [no]"
-   echo "    --trace-pthread=none|some|all  show pthread event details? [no]"
-   echo "    --stop-after=<number>     switch to real CPU after executing"
-   echo "                              <number> basic blocks [infinity]"
-   echo "    --dump-error=<number>     show translation for basic block"
-   echo "                              associated with <number>'th"
-   echo "                              error context [0=don't show any]"
-   echo
-   echo "  Extra options are read from env variable \$CACHEGRIND_OPTS"
-   echo
-   echo "  Valgrind is Copyright (C) 2000-2002 Julian Seward"
-   echo "  and licensed under the GNU General Public License, version 2."
-   echo "  Bug reports, feedback, admiration, abuse, etc, to: $emailto."
-   echo
-   exit 1
-fi
-
-# A bit subtle.  The LD_PRELOAD added entry must be absolute
-# and not depend on LD_LIBRARY_PATH.  This is so that we can
-# mess with LD_LIBRARY_PATH for child processes, which makes
-# libpthread.so fall out of visibility, independently of
-# whether valgrind.so is visible.
-
-VG_ARGS="$CACHEGRIND_OPTS $vgsupp $vgopts --cachesim=yes"
-export VG_ARGS
-LD_LIBRARY_PATH=$VALGRIND:$LD_LIBRARY_PATH
-export LD_LIBRARY_PATH
-LD_PRELOAD=$VALGRIND/valgrind.so:$LD_PRELOAD
-export LD_PRELOAD
-#LD_DEBUG=files
-#LD_DEBUG=symbols
-#export LD_DEBUG
-exec "$@"
diff --git a/cachegrind/Makefile.am b/cachegrind/Makefile.am
index 60553dd..96911ed 100644
--- a/cachegrind/Makefile.am
+++ b/cachegrind/Makefile.am
@@ -1,15 +1,17 @@
+
+
 SUBDIRS = demangle . docs tests
 
 CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \
-		-Winline -Wall -Wshadow -O -fomit-frame-pointer -g
+		-Winline -Wall -Wshadow -O -fomit-frame-pointer @PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
 
-LDFLAGS = -Wl,-z -Wl,initfirst
+#LDFLAGS = -Wl,-z -Wl,initfirst
 
 INCLUDES = -I$(srcdir)/demangle
 
-bin_SCRIPTS = valgrind cachegrind vg_annotate
+bin_SCRIPTS = valgrind vg_annotate
 
 SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
 
@@ -26,60 +28,103 @@
 	PATCHES_APPLIED ACKNOWLEDGEMENTS \
 	README_KDE3_FOLKS README_PACKAGERS \
 	README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \
-	valgrind.spec valgrind.spec.in
+	valgrind.spec valgrind.spec.in \
+	vg_profile.c \
+	vg_cachesim_I1.c vg_cachesim_D1.c vg_cachesim_L2.c vg_cachesim_gen.c
 
-val_PROGRAMS = valgrind.so valgrinq.so libpthread.so
+val_PROGRAMS = \
+	valgrind.so \
+	valgrinq.so \
+	libpthread.so \
+	vgskin_memcheck.so \
+	vgskin_cachesim.so \
+	vgskin_eraser.so \
+	vgskin_addrcheck.so \
+	vgskin_none.so \
+	vgskin_lackey.so \
+	vgskin_corecheck.so
 
-libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c
+libpthread_so_SOURCES = \
+	vg_libpthread.c \
+	vg_libpthread_unimp.c
+libpthread_so_DEPENDENCIES = $(srcdir)/vg_libpthread.vs
+libpthread_so_LDFLAGS	   = -Werror -fno-omit-frame-pointer -UVG_LIBDIR -shared -fpic -Wl,-version-script $(srcdir)/vg_libpthread.vs
 
 valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+valgrinq_so_LDFLAGS = -shared
 
 valgrind_so_SOURCES = \
 	vg_clientfuncs.c \
 	vg_scheduler.c \
-        vg_cachesim.c \
 	vg_clientmalloc.c \
-	vg_clientperms.c \
+	vg_default.c \
 	vg_demangle.c \
 	vg_dispatch.S \
 	vg_errcontext.c \
 	vg_execontext.c \
 	vg_from_ucode.c \
 	vg_helpers.S \
+	vg_instrument.c \
 	vg_main.c \
 	vg_malloc2.c \
 	vg_memory.c \
 	vg_messages.c \
 	vg_mylibc.c \
 	vg_procselfmaps.c \
-	vg_profile.c \
+	vg_dummy_profile.c \
 	vg_signals.c \
 	vg_startup.S \
 	vg_symtab2.c \
-	vg_syscall_mem.c \
+	vg_syscalls.c \
 	vg_syscall.S \
 	vg_to_ucode.c \
 	vg_translate.c \
-	vg_transtab.c \
-	vg_vtagops.c
-
+	vg_transtab.c
+valgrind_so_LDFLAGS = -Wl,-z -Wl,initfirst -shared
 valgrind_so_LDADD = \
 	demangle/cp-demangle.o \
 	demangle/cplus-dem.o \
 	demangle/dyn-string.o \
 	demangle/safe-ctype.o
 
+vgskin_memcheck_so_SOURCES = \
+	vg_memcheck.c \
+	vg_memcheck_clientreqs.c \
+	vg_memcheck_errcontext.c \
+	vg_memcheck_from_ucode.c \
+	vg_memcheck_translate.c \
+	vg_memcheck_helpers.S
+vgskin_memcheck_so_LDFLAGS = -shared
+
+vgskin_cachesim_so_SOURCES = vg_cachesim.c
+vgskin_cachesim_so_LDFLAGS = -shared
+
+vgskin_eraser_so_SOURCES = vg_eraser.c
+vgskin_eraser_so_LDFLAGS = -shared
+
+vgskin_addrcheck_so_SOURCES = vg_addrcheck.c
+vgskin_addrcheck_so_LDFLAGS = -shared
+
+vgskin_none_so_SOURCES 	 = vg_none.c
+vgskin_none_so_LDFLAGS   = -shared
+
+vgskin_lackey_so_SOURCES = vg_lackey.c
+vgskin_lackey_so_LDFLAGS = -shared
+
+vgskin_corecheck_so_SOURCES = vg_corecheck.c
+vgskin_corecheck_so_LDFLAGS = -shared
+
 include_HEADERS = valgrind.h
 
 noinst_HEADERS = \
-        vg_cachesim_gen.c       \
-        vg_cachesim_I1.c        \
-        vg_cachesim_D1.c        \
-        vg_cachesim_L2.c        \
         vg_kerneliface.h        \
         vg_include.h            \
+        vg_skin.h               \
         vg_constants.h          \
-        vg_unsafe.h
+        vg_constants_skin.h     \
+        vg_unsafe.h		\
+	vg_memcheck_include.h	\
+	vg_memcheck.h
 
 MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) 
 
@@ -92,19 +137,40 @@
 vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS)
 	$(COMPILE) -fno-omit-frame-pointer -c $<
 
-valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
-	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
-		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+##valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+##		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
 
-valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
-	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
+##valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
+##	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
 
-libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
-	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
-		$(libpthread_so_OBJECTS) \
-		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+##libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
+##	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
+##		$(libpthread_so_OBJECTS) \
+##		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+
+##vgskin_memcheck.so$(EXEEXT): $(vgskin_memcheck_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_memcheck.so \
+##		$(vgskin_memcheck_so_OBJECTS)
+
+##vgskin_cachesim.so$(EXEEXT): $(vgskin_cachesim_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_cachesim.so \
+##		$(vgskin_cachesim_so_OBJECTS)
+
+##vgskin_eraser.so$(EXEEXT): $(vgskin_eraser_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_eraser.so \
+##		$(vgskin_eraser_so_OBJECTS)
+
+##vgskin_none.so$(EXEEXT): $(vgskin_none_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_none.so \
+##		$(vgskin_none_so_OBJECTS)
+
+##vgskin_lackey.so$(EXEEXT): $(vgskin_lackey_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_lackey.so \
+##		$(vgskin_lackey_so_OBJECTS)
 
 install-exec-hook:
 	$(mkinstalldirs) $(DESTDIR)$(valdir)
 	rm -f $(DESTDIR)$(valdir)/libpthread.so.0
 	$(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0
+
diff --git a/cachegrind/cg_annotate.in b/cachegrind/cg_annotate.in
index 1182190..4fd28eb 100644
--- a/cachegrind/cg_annotate.in
+++ b/cachegrind/cg_annotate.in
@@ -26,7 +26,7 @@
 #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 #  02111-1307, USA.
 #
-#  The GNU General Public License is contained in the file LICENSE.
+#  The GNU General Public License is contained in the file COPYING.
 
 #----------------------------------------------------------------------------
 # Annotator for cachegrind. 
@@ -134,14 +134,14 @@
 my @include_dirs = ("");
 
 # Input file name
-my $input_file = "cachegrind.out";
+my $input_file = undef;
 
 # Version number
 my $version = "@VERSION@";
 
 # Usage message.
 my $usage = <<END
-usage: vg_annotate [options] [source-files]
+usage: vg_annotate [options] --<pid> [source-files]
 
   options for the user, with defaults in [ ], are:
     -h --help             show this message
@@ -223,12 +223,20 @@
                 $inc =~ s|/$||;         # trim trailing '/'
                 push(@include_dirs, "$inc/");
 
+            } elsif ($arg =~ /^--(\d+)$/) {
+                my $pid = $1;
+                if (not defined $input_file) {
+                    $input_file = "cachegrind.out.$pid";
+                } else {
+                    die("One cachegrind.out.<pid> file at a time, please\n");
+                }
+
             } else {            # -h and --help fall under this case
                 die($usage);
             }
 
         # Argument handling -- annotation file checking and selection.
-        # Stick filenames into a hash for quick 'n easy lookup throughout
+        # Stick filenames into a hash for quick 'n easy lookup throughout.
         } else {
             my $readable = 0;
             foreach my $include_dir (@include_dirs) {
@@ -238,7 +246,12 @@
             }
             $readable or die("File $arg not found in any of: @include_dirs\n");
             $user_ann_files{$arg} = 1;
-        } 
+        }
+    }
+
+    # Must have chosen an input file
+    if (not defined $input_file) {
+        die($usage);
     }
 }
 
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index 05f4186..b21815e 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -1,7 +1,7 @@
 
 /*--------------------------------------------------------------------*/
-/*--- The cache simulation framework: instrumentation, recording   ---*/
-/*--- and results printing.                                        ---*/
+/*--- The cache simulation skin: cache detection; instrumentation, ---*/
+/*--- recording and results printing.                              ---*/
 /*---                                                vg_cachesim.c ---*/
 /*--------------------------------------------------------------------*/
 
@@ -27,19 +27,32 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
-#include "vg_include.h"
+#include "vg_skin.h"
+//#include "vg_profile.c"
+
+/* For cache simulation */
+typedef struct {
+    int size;       /* bytes */ 
+    int assoc;
+    int line_size;  /* bytes */ 
+} cache_t;
 
 #include "vg_cachesim_L2.c"
 #include "vg_cachesim_I1.c"
 #include "vg_cachesim_D1.c"
 
+/*------------------------------------------------------------*/
+/*--- Constants                                            ---*/
+/*------------------------------------------------------------*/
 
 /* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
 #define MAX_x86_INSTR_SIZE              16
 
+#define MIN_LINE_SIZE   16
+
 /* Size of various buffers used for storing strings */
 #define FILENAME_LEN                    256
 #define FN_NAME_LEN                     256
@@ -48,33 +61,29 @@
 #define RESULTS_BUF_LEN                 128
 #define LINE_BUF_LEN                     64
 
-
 /*------------------------------------------------------------*/
-/*--- Generic utility stuff                                ---*/
+/*--- Profiling events                                     ---*/
 /*------------------------------------------------------------*/
 
-Int VG_(log2) ( Int x ) 
-{
-   Int i;
-   /* Any more than 32 and we overflow anyway... */
-   for (i = 0; i < 32; i++) {
-      if (1 << i == x) return i;
-   }
-   return -1;
-}
-
+typedef 
+   enum { 
+      VgpGetBBCC = VgpFini+1,
+      VgpCacheSimulate,
+      VgpCacheResults
+   } 
+   VgpSkinCC;
 
 /*------------------------------------------------------------*/
 /*--- Output file related stuff                            ---*/
 /*------------------------------------------------------------*/
 
-#define OUT_FILE        "cachegrind.out"
+Char cachegrind_out_file[FILENAME_LEN];
 
 static void file_err()
 {
    VG_(message)(Vg_UserMsg,
                 "error: can't open cache simulation output file `%s'",
-                OUT_FILE );
+                cachegrind_out_file );
    VG_(exit)(1);
 }
 
@@ -95,7 +104,15 @@
     cc->m2 = 0;
 }
 
-typedef enum { INSTR_CC, READ_CC, WRITE_CC, MOD_CC } CC_type;
+typedef 
+   enum {
+      InstrCC,         /* eg. mov %eax,   %ebx                      */
+      ReadCC,          /* eg. mov (%ecx), %esi                      */
+      WriteCC,         /* eg. mov %eax,   (%edx)                    */
+      ModCC,           /* eg. incl (%eax) (read+write one addr)     */
+      ReadWriteCC,     /* eg. call*l (%esi), pushl 0x4(%ebx), movsw 
+                               (read+write two different addrs)      */
+   } CC_type;
 
 /* Instruction-level cost-centres.  The typedefs for these structs are in
  * vg_include.c 
@@ -104,33 +121,53 @@
  *
  * This is because we use it to work out what kind of CC we're dealing with.
  */ 
-struct _iCC {
-   /* word 1 */
-   UChar tag;
-   UChar instr_size;
-   /* 2 bytes padding */
+typedef
+   struct {
+      /* word 1 */
+      UChar tag;
+      UChar instr_size;
+      /* 2 bytes padding */
 
-   /* words 2+ */
-   Addr instr_addr;
-   CC I;
-};
+      /* words 2+ */
+      Addr instr_addr;
+      CC I;
+   }
+   iCC;
 
-struct _idCC {
-   /* word 1 */
-   UChar tag;
-   UChar instr_size;
-   UChar data_size;
-   /* 1 byte padding */
+typedef
+   struct _idCC {
+      /* word 1 */
+      UChar tag;
+      UChar instr_size;
+      UChar data_size;
+      /* 1 byte padding */
 
-   /* words 2+ */
-   Addr instr_addr;
-   CC I;
-   CC D;
-};
+      /* words 2+ */
+      Addr instr_addr;
+      CC I;
+      CC D;
+   }
+   idCC;
+
+typedef
+   struct _iddCC {
+      /* word 1 */
+      UChar tag;
+      UChar instr_size;
+      UChar data_size;
+      /* 1 byte padding */
+
+      /* words 2+ */
+      Addr instr_addr;
+      CC I;
+      CC Da;
+      CC Db;
+   }
+   iddCC;
 
 static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
 {
-   cc->tag        = INSTR_CC;
+   cc->tag        = InstrCC;
    cc->instr_size = instr_size;
    cc->instr_addr = instr_addr;
    initCC(&cc->I);
@@ -147,6 +184,18 @@
    initCC(&cc->D);
 }
 
+static void init_iddCC(iddCC* cc, Addr instr_addr,
+                       UInt instr_size, UInt data_size)
+{
+   cc->tag        = ReadWriteCC;
+   cc->instr_size = instr_size;
+   cc->data_size  = data_size;
+   cc->instr_addr = instr_addr;
+   initCC(&cc->I);
+   initCC(&cc->Da);
+   initCC(&cc->Db);
+}
+
 #define ADD_CC_TO(CC_type, cc, total)           \
    total.a  += ((CC_type*)BBCC_ptr)->cc.a;      \
    total.m1 += ((CC_type*)BBCC_ptr)->cc.m1;     \
@@ -193,6 +242,22 @@
 #endif
 }
 
+static __inline__ void sprint_read_write_CC(Char buf[BUF_LEN], iddCC* cc)
+{
+#if PRINT_INSTR_ADDRS
+   VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
+                      cc->I.a,  cc->I.m1,  cc->I.m2, 
+                      cc->Da.a, cc->Da.m1, cc->Da.m2,
+                      cc->Db.a, cc->Db.m1, cc->Db.m2, cc->instr_addr);
+#else
+   VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
+                      cc->I.a,  cc->I.m1,  cc->I.m2, 
+                      cc->Da.a, cc->Da.m1, cc->Da.m2,
+                      cc->Db.a, cc->Db.m1, cc->Db.m2);
+#endif
+}
+
+
 /*------------------------------------------------------------*/
 /*--- BBCC hash table stuff                                ---*/
 /*------------------------------------------------------------*/
@@ -257,11 +322,11 @@
 static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
                            Char fn_name[FN_NAME_LEN], Int* line_num)
 {
-   Bool found1, found2, no_demangle = False;
+   Bool found1, found2;
 
-   found1 = VG_(what_line_is_this)(instr_addr, filename,
-                                   FILENAME_LEN, line_num);
-   found2 = VG_(what_fn_is_this)(no_demangle, instr_addr, fn_name, FN_NAME_LEN);
+   found1 = VG_(get_filename_linenum)(instr_addr, filename,
+                                      FILENAME_LEN, line_num);
+   found2 = VG_(get_fnname)(instr_addr, fn_name, FN_NAME_LEN);
 
    if (!found1 && !found2) {
       no_debug_BBs++;
@@ -290,8 +355,8 @@
 file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
 {
    Int i;
-   file_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(file_node));
-   new->filename  = VG_(strdup)(VG_AR_PRIVATE, filename);
+   file_node* new = VG_(malloc)(sizeof(file_node));
+   new->filename  = VG_(strdup)(filename);
    for (i = 0; i < N_FN_ENTRIES; i++) {
       new->fns[i] = NULL;
    }
@@ -303,8 +368,8 @@
 fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
 {
    Int i;
-   fn_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(fn_node));
-   new->fn_name = VG_(strdup)(VG_AR_PRIVATE, fn_name);
+   fn_node* new = VG_(malloc)(sizeof(fn_node));
+   new->fn_name = VG_(strdup)(fn_name);
    for (i = 0; i < N_BBCC_ENTRIES; i++) {
       new->BBCCs[i] = NULL;
    }
@@ -318,7 +383,7 @@
    Int BBCC_array_size = compute_BBCC_array_size(cb);
    BBCC* new;
 
-   new = (BBCC*)VG_(malloc)(VG_AR_PRIVATE, sizeof(BBCC) + BBCC_array_size);
+   new = (BBCC*)VG_(malloc)(sizeof(BBCC) + BBCC_array_size);
    new->orig_addr  = bb_orig_addr;
    new->array_size = BBCC_array_size;
    new->next = next;
@@ -352,7 +417,7 @@
 
    get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
 
-   VGP_PUSHCC(VgpCacheGetBBCC);
+   VGP_PUSHCC(VgpGetBBCC);
    filename_hash = hash(filename, N_FILE_ENTRIES);
    curr_file_node = BBCC_table[filename_hash];
    while (NULL != curr_file_node && 
@@ -410,7 +475,7 @@
           BB_retranslations++;
       }
    }
-   VGP_POPCC;
+   VGP_POPCC(VgpGetBBCC);
    return curr_BBCC;
 }
 
@@ -418,11 +483,12 @@
 /*--- Cache simulation instrumentation phase               ---*/
 /*------------------------------------------------------------*/
 
+// SSS: do something about all these...
 #define uInstr1   VG_(newUInstr1)
 #define uInstr2   VG_(newUInstr2)
 #define uInstr3   VG_(newUInstr3)
-#define dis       VG_(disassemble)
 #define uLiteral  VG_(setLiteralField)
+#define uCCall    VG_(setCCallFields)
 #define newTemp   VG_(getNewTemp)
 
 static Int compute_BBCC_array_size(UCodeBlock* cb)
@@ -430,12 +496,12 @@
    UInstr* u_in;
    Int     i, CC_size, BBCC_size = 0;
    Bool    is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
+   Int     t_read, t_write;
     
    is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
+   t_read = t_write = INVALID_TEMPREG;
 
    for (i = 0; i < cb->used; i++) {
-      /* VG_(ppUInstr)(0, &cb->instrs[i]); */
-
       u_in = &cb->instrs[i];
       switch(u_in->opcode) {
 
@@ -449,8 +515,13 @@
 
             case_for_end_of_instr:
 
-            CC_size = (is_LOAD || is_STORE || is_FPU_R || is_FPU_W 
-                      ? sizeof(idCC) : sizeof(iCC));
+            if (((is_LOAD && is_STORE) || (is_FPU_R && is_FPU_W)) && 
+                 t_read != t_write)
+               CC_size = sizeof(iddCC);
+            else if (is_LOAD || is_STORE || is_FPU_R || is_FPU_W)
+               CC_size = sizeof(idCC);
+            else
+               CC_size = sizeof(iCC);
 
             BBCC_size += CC_size;
             is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
@@ -461,22 +532,26 @@
             /* Also, a STORE can come after a LOAD for bts/btr/btc */
             vg_assert(/*!is_LOAD &&*/ /* !is_STORE && */ 
                       !is_FPU_R && !is_FPU_W);
+            t_read = u_in->val1;
             is_LOAD = True;
             break;
 
          case STORE:
             /* Multiple STOREs are possible for 'pushal' */
             vg_assert(            /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
+            t_write = u_in->val2;
             is_STORE = True;
             break;
 
          case FPU_R:
             vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
+            t_read = u_in->val2;
             is_FPU_R = True;
             break;
 
          case FPU_W:
             vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
+            t_write = u_in->val2;
             is_FPU_W = True;
             break;
 
@@ -488,41 +563,153 @@
    return BBCC_size;
 }
 
-/* Use this rather than eg. -1 because it's stored as a UInt. */
+static __attribute__ ((regparm (1)))
+void log_1I_0D_cache_access(iCC* cc)
+{
+   //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
+   //            cc, cc->instr_addr, cc->instr_size)
+   VGP_PUSHCC(VgpCacheSimulate);
+   cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
+   cc->I.a++;
+   VGP_POPCC(VgpCacheSimulate);
+}
+
+/* Difference between this function and log_1I_0D_cache_access() is that
+   this one can be passed any kind of CC, not just an iCC.  So we have to
+   be careful to make sure we don't make any assumptions about CC layout.
+   (As it stands, they would be safe, but this will avoid potential heartache
+   if anyone else changes CC layout.)  
+   Note that we only do the switch for the JIFZ version because if we always
+   called this switching version, things would run about 5% slower. */
+static __attribute__ ((regparm (1)))
+void log_1I_0D_cache_access_JIFZ(iCC* cc)
+{
+   UChar instr_size;
+   Addr instr_addr;
+   CC* I;
+
+   //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
+   //            cc, cc->instr_addr, cc->instr_size)
+   VGP_PUSHCC(VgpCacheSimulate);
+
+   switch(cc->tag) {
+       case InstrCC:
+           instr_size = cc->instr_size;
+           instr_addr = cc->instr_addr;
+           I = &(cc->I);
+           break;
+       case ReadCC:
+       case WriteCC:
+       case ModCC:
+           instr_size = ((idCC*)cc)->instr_size;
+           instr_addr = ((idCC*)cc)->instr_addr;
+           I = &( ((idCC*)cc)->I );
+           break;
+       case ReadWriteCC:
+           instr_size = ((iddCC*)cc)->instr_size;
+           instr_addr = ((iddCC*)cc)->instr_addr;
+           I = &( ((iddCC*)cc)->I );
+           break;
+       default:
+           VG_(panic)("Unknown CC type in log_1I_0D_cache_access_JIFZ()\n");
+           break;
+   }
+   cachesim_I1_doref(instr_addr, instr_size, &I->m1, &I->m2);
+   I->a++;
+   VGP_POPCC(VgpCacheSimulate);
+}
+
+__attribute__ ((regparm (2))) static 
+void log_0I_1D_cache_access(idCC* cc, Addr data_addr)
+{
+   //VG_(printf)("0I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
+   //            cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
+   VGP_PUSHCC(VgpCacheSimulate);
+   cachesim_D1_doref(data_addr,      cc->data_size,  &cc->D.m1, &cc->D.m2);
+   cc->D.a++;
+   VGP_POPCC(VgpCacheSimulate);
+}
+
+__attribute__ ((regparm (2))) static
+void log_1I_1D_cache_access(idCC* cc, Addr data_addr)
+{
+   //VG_(printf)("1I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
+   //            cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
+   VGP_PUSHCC(VgpCacheSimulate);
+   cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
+   cc->I.a++;
+
+   cachesim_D1_doref(data_addr,      cc->data_size,  &cc->D.m1, &cc->D.m2);
+   cc->D.a++;
+   VGP_POPCC(VgpCacheSimulate);
+}
+
+__attribute__ ((regparm (3))) static 
+void log_0I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
+{
+   //VG_(printf)("0I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=0x%x, daddr2=%p, size=%u\n",
+   //            cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
+   VGP_PUSHCC(VgpCacheSimulate);
+   cachesim_D1_doref(data_addr1, cc->data_size,  &cc->Da.m1, &cc->Da.m2);
+   cc->Da.a++;
+   cachesim_D1_doref(data_addr2, cc->data_size,  &cc->Db.m1, &cc->Db.m2);
+   cc->Db.a++;
+   VGP_POPCC(VgpCacheSimulate);
+}
+
+__attribute__ ((regparm (3))) static
+void log_1I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
+{
+   //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
+   //            cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
+   VGP_PUSHCC(VgpCacheSimulate);
+   cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1,  &cc->I.m2);
+   cc->I.a++;
+
+   cachesim_D1_doref(data_addr1,     cc->data_size,  &cc->Da.m1, &cc->Da.m2);
+   cc->Da.a++;
+   cachesim_D1_doref(data_addr2,     cc->data_size,  &cc->Db.m1, &cc->Db.m2);
+   cc->Db.a++;
+   VGP_POPCC(VgpCacheSimulate);
+}
+
+UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
+{
+/* Use this rather than eg. -1 because it's a UInt. */
 #define INVALID_DATA_SIZE   999999
 
-UCodeBlock* VG_(cachesim_instrument)(UCodeBlock* cb_in, Addr orig_addr)
-{
    UCodeBlock* cb;
    Int         i;
    UInstr*     u_in;
    BBCC*       BBCC_node;
-   Int         t_CC_addr, t_read_addr, t_write_addr, t_data_addr;
+   Int         t_CC_addr, t_read_addr, t_write_addr, t_data_addr1,
+               t_data_addr2, t_read, t_write;
    Int         CC_size = -1;    /* Shut gcc warnings up */
-   Addr        instr_addr = orig_addr;
-   UInt        instr_size, data_size = INVALID_DATA_SIZE;
-   Int         helper = -1;     /* Shut gcc warnings up */
+   Addr        x86_instr_addr = orig_addr;
+   UInt        x86_instr_size, data_size = INVALID_DATA_SIZE;
+   Addr        helper;
+   Int         argc;
    UInt        stack_used;
-   Bool        BB_seen_before       = False;
-   Bool        prev_instr_was_Jcond = False;
+   Bool        BB_seen_before     = False;
+   Bool        instrumented_Jcond = False;
+   Bool        has_rep_prefix     = False;
    Addr        BBCC_ptr0, BBCC_ptr; 
 
    /* Get BBCC (creating if necessary -- requires a counting pass over the BB
     * if it's the first time it's been seen), and point to start of the 
     * BBCC array.  */
-   BBCC_node = get_BBCC(orig_addr, cb_in, False, &BB_seen_before);
+   BBCC_node = get_BBCC(orig_addr, cb_in, /*remove=*/False, &BB_seen_before);
    BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
 
    cb = VG_(allocCodeBlock)();
    cb->nextTemp = cb_in->nextTemp;
 
-   t_CC_addr = t_read_addr = t_write_addr = t_data_addr = INVALID_TEMPREG;
+   t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 = t_data_addr2 =
+               t_read = t_write = INVALID_TEMPREG;
 
    for (i = 0; i < cb_in->used; i++) {
       u_in = &cb_in->instrs[i];
 
-      //VG_(ppUInstr)(0, u_in);
-
       /* What this is all about:  we want to instrument each x86 instruction 
        * translation.  The end of these are marked in three ways.  The three
        * ways, and the way we instrument them, are as follows:
@@ -531,144 +718,33 @@
        * 2. UCode, Juncond        --> UCode, Instrumentation, Juncond
        * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
        *
-       * We must put the instrumentation before the jumps so that it is always
+       * The last UInstr in a basic block is always a Juncond.  Jconds,
+       * when they appear, are always second last.  We check this with 
+       * various assertions.
+       *
+       * We must put the instrumentation before any jumps so that it is always
        * executed.  We don't have to put the instrumentation before the INCEIP
        * (it could go after) but we do so for consistency.
        *
-       * Junconds are always the last instruction in a basic block.  Jconds are
-       * always the 2nd last, and must be followed by a Jcond.  We check this
-       * with various assertions.
+       * x86 instruction sizes are obtained from INCEIPs (for case 1) or
+       * from .extra4b field of the final JMP (for case 2 & 3).
        *
-       * Note that in VG_(disBB) we patched the `extra4b' field of the first
-       * occurring JMP in a block with the size of its x86 instruction.  This
-       * is used now.
-       *
-       * Note that we don't have to treat JIFZ specially;  unlike JMPs, JIFZ
-       * occurs in the middle of a BB and gets an INCEIP after it.
+       * Note that JIFZ is treated differently.
        *
        * The instrumentation is just a call to the appropriate helper function,
        * passing it the address of the instruction's CC.
        */
-      if (prev_instr_was_Jcond) vg_assert(u_in->opcode == JMP);
+      if (instrumented_Jcond) vg_assert(u_in->opcode == JMP);
 
       switch (u_in->opcode) {
-
-         case INCEIP:
-            instr_size = u_in->val1;
-            goto case_for_end_of_x86_instr;
-
-         case JMP:
-            if (u_in->cond == CondAlways) {
-               vg_assert(i+1 == cb_in->used); 
-
-               /* Don't instrument if previous instr was a Jcond. */
-               if (prev_instr_was_Jcond) {
-                  vg_assert(0 == u_in->extra4b);
-                  VG_(copyUInstr)(cb, u_in);
-                  break;
-               }
-               prev_instr_was_Jcond = False;
-
-            } else {
-               vg_assert(i+2 == cb_in->used);  /* 2nd last instr in block */
-               prev_instr_was_Jcond = True;
-            }
-
-            /* Ah, the first JMP... instrument, please. */
-            instr_size = u_in->extra4b;
-            goto case_for_end_of_x86_instr;
-
-            /* Shared code that is executed at the end of an x86 translation
-             * block, marked by either an INCEIP or an unconditional JMP. */
-            case_for_end_of_x86_instr:
-
-#define IS_(X)      (INVALID_TEMPREG != t_##X##_addr)
-             
-            /* Initialise the CC in the BBCC array appropriately if it hasn't
-             * been initialised before.
-             * Then call appropriate sim function, passing it the CC address.
-             * Note that CALLM_S/CALL_E aren't required here;  by this point,
-             * the checking related to them has already happened. */
-            stack_used = 0;
-
-            vg_assert(instr_size >= 1 && instr_size <= MAX_x86_INSTR_SIZE);
-            vg_assert(0 != instr_addr);
-
-            if (!IS_(read) && !IS_(write)) {
-               iCC* CC_ptr = (iCC*)(BBCC_ptr);
-               vg_assert(INVALID_DATA_SIZE == data_size);
-               vg_assert(INVALID_TEMPREG == t_read_addr && 
-                         INVALID_TEMPREG == t_write_addr);
-               CC_size = sizeof(iCC);
-               if (!BB_seen_before)
-                   init_iCC(CC_ptr, instr_addr, instr_size);
-
-               /* 1st arg: CC addr */
-               t_CC_addr = newTemp(cb);
-               uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_CC_addr);
-               uLiteral(cb, BBCC_ptr);
-
-               uInstr1(cb, CCALL_1_0, 0, TempReg, t_CC_addr);
-               uLiteral(cb, VGOFF_(cachesim_log_non_mem_instr));
-
-            } else { 
-               CC_type X_CC;
-               idCC* CC_ptr = (idCC*)(BBCC_ptr);
-                
-               vg_assert(4 == data_size || 2  == data_size || 1 == data_size || 
-                         8 == data_size || 10 == data_size);
-               
-               CC_size = sizeof(idCC);
-               helper = VGOFF_(cachesim_log_mem_instr);
-
-               if (IS_(read) && !IS_(write)) {
-                  X_CC = READ_CC;
-                  vg_assert(INVALID_TEMPREG != t_read_addr && 
-                            INVALID_TEMPREG == t_write_addr);
-                  t_data_addr = t_read_addr;
-
-               } else if (!IS_(read) && IS_(write)) {
-                  X_CC = WRITE_CC;
-                  vg_assert(INVALID_TEMPREG == t_read_addr && 
-                            INVALID_TEMPREG != t_write_addr);
-                  t_data_addr = t_write_addr;
-
-               } else {
-                  vg_assert(IS_(read) && IS_(write));
-                  X_CC = MOD_CC;
-                  vg_assert(INVALID_TEMPREG != t_read_addr && 
-                            INVALID_TEMPREG != t_write_addr);
-                  t_data_addr = t_read_addr;
-               }
-#undef IS_
-               if (!BB_seen_before)
-                  init_idCC(X_CC, CC_ptr, instr_addr, instr_size, data_size);
-
-               /* 1st arg: CC addr */
-               t_CC_addr = newTemp(cb);
-               uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_CC_addr);
-               uLiteral(cb, BBCC_ptr);
-
-               uInstr2(cb, CCALL_2_0, 0, TempReg, t_CC_addr, 
-                                         TempReg, t_data_addr);
-               uLiteral(cb, VGOFF_(cachesim_log_mem_instr));
-            }
-
-            VG_(copyUInstr)(cb, u_in);
-
-            /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
-            BBCC_ptr   += CC_size; 
-            instr_addr += instr_size;
-            t_CC_addr = t_read_addr = t_write_addr = 
-                                      t_data_addr  = INVALID_TEMPREG;
-            data_size = INVALID_DATA_SIZE;
+         case NOP:  case CALLM_E:  case CALLM_S:
             break;
 
-
          /* For memory-ref instrs, copy the data_addr into a temporary to be
-          * passed to the cachesim_log_function at the end of the instruction.
+          * passed to the cachesim_* helper at the end of the instruction.
           */
          case LOAD: 
+            t_read      = u_in->val1;
             t_read_addr = newTemp(cb);
             uInstr2(cb, MOV, 4, TempReg, u_in->val1,  TempReg, t_read_addr);
             data_size = u_in->size;
@@ -676,26 +752,216 @@
             break;
 
          case FPU_R:
+            t_read      = u_in->val2;
             t_read_addr = newTemp(cb);
             uInstr2(cb, MOV, 4, TempReg, u_in->val2,  TempReg, t_read_addr);
-            data_size = u_in->size;
+            data_size = ( u_in->size <= MIN_LINE_SIZE
+                        ? u_in->size
+                        : MIN_LINE_SIZE);
             VG_(copyUInstr)(cb, u_in);
             break;
 
          /* Note that we must set t_write_addr even for mod instructions;
-          * that's how the code above determines whether it does a write;
-          * without it, it would think a mod instruction is a read.
+          * That's how the code above determines whether it does a write.
+          * Without it, it would think a mod instruction is a read.
           * As for the MOV, if it's a mod instruction it's redundant, but it's
           * not expensive and mod instructions are rare anyway. */
          case STORE:
          case FPU_W:
+            t_write      = u_in->val2;
             t_write_addr = newTemp(cb);
             uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
-            data_size = u_in->size;
+            /* 28 and 108 B data-sized instructions will be done
+             * inaccurately but they're very rare and this avoids errors
+             * from hitting more than two cache lines in the simulation. */
+            data_size = ( u_in->size <= MIN_LINE_SIZE
+                        ? u_in->size
+                        : MIN_LINE_SIZE);
             VG_(copyUInstr)(cb, u_in);
             break;
 
-         case NOP:  case CALLM_E:  case CALLM_S:
+
+         /* For rep-prefixed instructions, log a single I-cache access
+          * before the UCode loop that implements the repeated part, which
+          * is where the multiple D-cache accesses are logged. */
+         case JIFZ:
+            has_rep_prefix = True;
+
+            /* Setup 1st and only arg: CC addr */
+            t_CC_addr = newTemp(cb);
+            uInstr2(cb, MOV,  4, Literal, 0, TempReg, t_CC_addr);
+            uLiteral(cb, BBCC_ptr);
+
+            /* Call helper */
+            uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
+            uCCall(cb, (Addr) & log_1I_0D_cache_access_JIFZ, 1, 1, False);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+
+         /* INCEIP: insert instrumentation */
+         case INCEIP:
+            x86_instr_size = u_in->val1;
+            goto instrument_x86_instr;
+
+         /* JMP: insert instrumentation if the first JMP */
+         case JMP:
+            if (instrumented_Jcond) {
+               vg_assert(CondAlways == u_in->cond);
+               vg_assert(i+1 == cb_in->used);
+               VG_(copyUInstr)(cb, u_in);
+               instrumented_Jcond = False;    /* reset */
+               break;
+            }
+            /* The first JMP... instrument. */
+            if (CondAlways != u_in->cond) {
+               vg_assert(i+2 == cb_in->used);
+               instrumented_Jcond = True;
+            } else {
+               vg_assert(i+1 == cb_in->used);
+            }
+
+            /* Get x86 instr size from final JMP. */
+            x86_instr_size = LAST_UINSTR(cb_in).extra4b;
+            goto instrument_x86_instr;
+
+
+            /* Code executed at the end of each x86 instruction. */
+            instrument_x86_instr:
+             
+            /* Initialise the CC in the BBCC array appropriately if it
+             * hasn't been initialised before.  Then call appropriate sim
+             * function, passing it the CC address. */
+            stack_used = 0;
+
+            vg_assert(x86_instr_size >= 1 && 
+                      x86_instr_size <= MAX_x86_INSTR_SIZE);
+
+#define IS_(X)      (INVALID_TEMPREG != t_##X##_addr)
+
+            if (!IS_(read) && !IS_(write)) {
+               vg_assert(INVALID_DATA_SIZE == data_size);
+               vg_assert(INVALID_TEMPREG == t_read_addr  && 
+                         INVALID_TEMPREG == t_read       && 
+                         INVALID_TEMPREG == t_write_addr &&
+                         INVALID_TEMPREG == t_write);
+               CC_size = sizeof(iCC);
+               if (!BB_seen_before)
+                   init_iCC((iCC*)BBCC_ptr, x86_instr_addr, x86_instr_size);
+               helper = ( has_rep_prefix 
+                        ? (Addr)0      /* no extra log needed */
+                        : (Addr) & log_1I_0D_cache_access
+                        );
+               argc = 1;
+
+            } else { 
+               vg_assert(4 == data_size || 2  == data_size || 1 == data_size || 
+                         8 == data_size || 10 == data_size ||
+                         MIN_LINE_SIZE == data_size);
+               
+               if (IS_(read) && !IS_(write)) {
+                  CC_size = sizeof(idCC);
+                  /* If it uses 'rep', we've already logged the I-cache 
+                   * access at the JIFZ UInstr (see JIFZ case below) so
+                   * don't do it here */
+                  helper = ( has_rep_prefix 
+                           ? (Addr) & log_0I_1D_cache_access
+                           : (Addr) & log_1I_1D_cache_access
+                           );
+                  argc = 2;
+                  if (!BB_seen_before)
+                     init_idCC(ReadCC, (idCC*)BBCC_ptr, x86_instr_addr,
+                               x86_instr_size, data_size);
+                  vg_assert(INVALID_TEMPREG != t_read_addr  && 
+                            INVALID_TEMPREG != t_read       && 
+                            INVALID_TEMPREG == t_write_addr &&
+                            INVALID_TEMPREG == t_write);
+                  t_data_addr1 = t_read_addr;
+
+               } else if (!IS_(read) && IS_(write)) {
+                  CC_size = sizeof(idCC);
+                  helper = ( has_rep_prefix 
+                           ? (Addr) & log_0I_1D_cache_access
+                           : (Addr) & log_1I_1D_cache_access
+                           );
+                  argc = 2;
+                  if (!BB_seen_before)
+                     init_idCC(WriteCC, (idCC*)BBCC_ptr, x86_instr_addr,
+                               x86_instr_size, data_size);
+                  vg_assert(INVALID_TEMPREG == t_read_addr  && 
+                            INVALID_TEMPREG == t_read       && 
+                            INVALID_TEMPREG != t_write_addr &&
+                            INVALID_TEMPREG != t_write);
+                  t_data_addr1 = t_write_addr;
+
+               } else {
+                  vg_assert(IS_(read) && IS_(write));
+                  vg_assert(INVALID_TEMPREG != t_read_addr  && 
+                            INVALID_TEMPREG != t_read       && 
+                            INVALID_TEMPREG != t_write_addr &&
+                            INVALID_TEMPREG != t_write);
+                  if (t_read == t_write) {
+                     CC_size = sizeof(idCC);
+                     helper = ( has_rep_prefix 
+                              ? (Addr) & log_0I_1D_cache_access
+                              : (Addr) & log_1I_1D_cache_access
+                              );
+                     argc = 2;
+                     if (!BB_seen_before)
+                        init_idCC(ModCC, (idCC*)BBCC_ptr, x86_instr_addr,
+                                  x86_instr_size, data_size);
+                     t_data_addr1 = t_read_addr;
+                  } else {
+                     CC_size = sizeof(iddCC);
+                     helper = ( has_rep_prefix 
+                              ? (Addr) & log_0I_2D_cache_access
+                              : (Addr) & log_1I_2D_cache_access
+                              );
+                     argc = 3;
+                     if (!BB_seen_before)
+                        init_iddCC((iddCC*)BBCC_ptr, x86_instr_addr,
+                                    x86_instr_size, data_size);
+                     t_data_addr1 = t_read_addr;
+                     t_data_addr2 = t_write_addr;
+                  }
+               }
+#undef IS_
+            }
+
+            /* Call the helper, if necessary */
+            if ((Addr)0 != helper) {
+
+               /* Setup 1st arg: CC addr */
+               t_CC_addr = newTemp(cb);
+               uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_CC_addr);
+               uLiteral(cb, BBCC_ptr);
+
+               /* Call the helper */
+               if      (1 == argc)
+                  uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
+               else if (2 == argc)
+                  uInstr2(cb, CCALL, 0, TempReg, t_CC_addr, 
+                                        TempReg, t_data_addr1);
+               else if (3 == argc)
+                  uInstr3(cb, CCALL, 0, TempReg, t_CC_addr, 
+                                        TempReg, t_data_addr1,
+                                        TempReg, t_data_addr2);
+               else
+                  VG_(panic)("argc... not 1 or 2 or 3?");
+               
+               uCCall(cb, helper, argc, argc, False);
+            }
+
+            /* Copy original UInstr (INCEIP or JMP) */
+            VG_(copyUInstr)(cb, u_in);
+
+            /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
+            BBCC_ptr       += CC_size; 
+            x86_instr_addr += x86_instr_size;
+            t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 = 
+                        t_data_addr2 = t_read = t_write = INVALID_TEMPREG;
+            data_size = INVALID_DATA_SIZE;
+            has_rep_prefix = False; 
             break;
 
          default:
@@ -709,19 +975,25 @@
 
    VG_(freeCodeBlock)(cb_in);
    return cb;
+
+#undef INVALID_DATA_SIZE
 }
 
 /*------------------------------------------------------------*/
-/*--- Cache simulation stuff                               ---*/
+/*--- Automagic cache initialisation stuff                 ---*/
 /*------------------------------------------------------------*/
 
-#define MIN_LINE_SIZE   16
-
 /* Total reads/writes/misses.  Calculated during CC traversal at the end. */
 static CC Ir_total;
 static CC Dr_total;
 static CC Dw_total;
 
+#define UNDEFINED_CACHE     ((cache_t) { -1, -1, -1 }) 
+
+static cache_t clo_I1_cache = UNDEFINED_CACHE;
+static cache_t clo_D1_cache = UNDEFINED_CACHE;
+static cache_t clo_L2_cache = UNDEFINED_CACHE;
+
 /* All CPUID info taken from sandpile.org/a32/cpuid.htm */
 /* Probably only works for Intel and AMD chips, and probably only for some of
  * them. 
@@ -739,7 +1011,7 @@
 static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
 {
     VG_(message)(Vg_DebugMsg, 
-       "warning: Pentium with %d K micro_op instruction trace cache", 
+       "warning: Pentium with %d K micro-op instruction trace cache", 
        actual_size);
     VG_(message)(Vg_DebugMsg, 
        "         Simulating a %d KB cache with %d B lines", 
@@ -755,6 +1027,7 @@
 {
    UChar info[16];
    Int   i, trials;
+   Bool  L2_found = False;
 
    if (level < 2) {
       VG_(message)(Vg_DebugMsg, 
@@ -782,8 +1055,9 @@
       case 0x0:       /* ignore zeros */
           break;
           
-      case 0x01: case 0x02: case 0x03: case 0x04:     /* TLB info, ignore */
-      case 0x90: case 0x96: case 0x9b:
+      /* TLB info, ignore */
+      case 0x01: case 0x02: case 0x03: case 0x04:
+      case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
           break;      
 
       case 0x06: *I1c = (cache_t) {  8, 4, 32 }; break;
@@ -792,22 +1066,35 @@
       case 0x0a: *D1c = (cache_t) {  8, 2, 32 }; break;
       case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
 
+      /* IA-64 info -- panic! */
+      case 0x10: case 0x15: case 0x1a: 
+      case 0x88: case 0x89: case 0x8a: case 0x8d:
+      case 0x90: case 0x96: case 0x9b:
+         VG_(message)(Vg_DebugMsg,
+            "error: IA-64 cache stats!  Cachegrind doesn't run on IA-64...");
+         VG_(panic)("IA-64 detected");
+
       case 0x22: case 0x23: case 0x25: case 0x29: 
-      case 0x88: case 0x89: case 0x8a:
           VG_(message)(Vg_DebugMsg, 
              "warning: L3 cache detected but ignored\n");
           break;
 
-      case 0x40: 
-          VG_(message)(Vg_DebugMsg, 
-             "warning: L2 cache not installed, ignore L2 results.");
+      /* These are sectored, whatever that means */
+      case 0x39: *L2c = (cache_t) {  128, 4, 64 }; L2_found = True; break;
+      case 0x3c: *L2c = (cache_t) {  256, 4, 64 }; L2_found = True; break;
+
+      /* If a P6 core, this means "no L2 cache".  
+         If a P4 core, this means "no L3 cache".
+         We don't know what core it is, so don't issue a warning.  To detect
+         a missing L2 cache, we use 'L2_found'. */
+      case 0x40:
           break;
 
-      case 0x41: *L2c = (cache_t) {  128, 4, 32 };    break;
-      case 0x42: *L2c = (cache_t) {  256, 4, 32 };    break;
-      case 0x43: *L2c = (cache_t) {  512, 4, 32 };    break;
-      case 0x44: *L2c = (cache_t) { 1024, 4, 32 };    break;
-      case 0x45: *L2c = (cache_t) { 2048, 4, 32 };    break;
+      case 0x41: *L2c = (cache_t) {  128, 4, 32 }; L2_found = True; break;
+      case 0x42: *L2c = (cache_t) {  256, 4, 32 }; L2_found = True; break;
+      case 0x43: *L2c = (cache_t) {  512, 4, 32 }; L2_found = True; break;
+      case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
+      case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
 
       /* These are sectored, whatever that means */
       case 0x66: *D1c = (cache_t) {  8, 4, 64 };  break;      /* sectored */
@@ -832,24 +1119,31 @@
          micro_ops_warn(32, 32, 32); 
          break;  
 
-      case 0x79: *L2c = (cache_t) {  128, 8, 64 };    break;  /* sectored */
-      case 0x7a: *L2c = (cache_t) {  256, 8, 64 };    break;  /* sectored */
-      case 0x7b: *L2c = (cache_t) {  512, 8, 64 };    break;  /* sectored */
-      case 0x7c: *L2c = (cache_t) { 1024, 8, 64 };    break;  /* sectored */
+      /* These are sectored, whatever that means */
+      case 0x79: *L2c = (cache_t) {  128, 8,  64 }; L2_found = True;  break;
+      case 0x7a: *L2c = (cache_t) {  256, 8,  64 }; L2_found = True;  break;
+      case 0x7b: *L2c = (cache_t) {  512, 8,  64 }; L2_found = True;  break;
+      case 0x7c: *L2c = (cache_t) { 1024, 8,  64 }; L2_found = True;  break;
+      case 0x7e: *L2c = (cache_t) {  256, 8, 128 }; L2_found = True;  break;
 
-      case 0x81: *L2c = (cache_t) {  128, 8, 32 };    break;
-      case 0x82: *L2c = (cache_t) {  256, 8, 32 };    break;
-      case 0x83: *L2c = (cache_t) {  512, 8, 32 };    break;
-      case 0x84: *L2c = (cache_t) { 1024, 8, 32 };    break;
-      case 0x85: *L2c = (cache_t) { 2048, 8, 32 };    break;
+      case 0x81: *L2c = (cache_t) {  128, 8, 32 };  L2_found = True;  break;
+      case 0x82: *L2c = (cache_t) {  256, 8, 32 };  L2_found = True;  break;
+      case 0x83: *L2c = (cache_t) {  512, 8, 32 };  L2_found = True;  break;
+      case 0x84: *L2c = (cache_t) { 1024, 8, 32 };  L2_found = True;  break;
+      case 0x85: *L2c = (cache_t) { 2048, 8, 32 };  L2_found = True;  break;
 
       default:
           VG_(message)(Vg_DebugMsg, 
              "warning: Unknown Intel cache config value "
-             "(0x%x), ignoring\n", info[i]);
+             "(0x%x), ignoring", info[i]);
           break;
       }
    }
+
+   if (!L2_found)
+      VG_(message)(Vg_DebugMsg, 
+         "warning: L2 cache not installed, ignore L2 results.");
+
    return 0;
 }
 
@@ -871,12 +1165,16 @@
  * #3  The AMD K7 processor's L2 cache must be configured prior to relying 
  *     upon this information. (Whatever that means -- njn)
  *
+ * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
+ * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
+ * so we detect that.
+ * 
  * Returns 0 on success, non-zero on failure.
  */
 static
 Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
 {
-   Int dummy, ext_level;
+   Int dummy, model, ext_level;
    Int I1i, D1i, L2i;
    
    cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
@@ -891,6 +1189,16 @@
    cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
    cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
 
+   cpuid(0x1, &model, &dummy, &dummy, &dummy);
+   /*VG_(message)(Vg_UserMsg,"CPU model %04x",model);*/
+
+   /* Check for Duron bug */
+   if (model == 0x630) {
+      VG_(message)(Vg_UserMsg,
+         "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
+      L2i = (64 << 16) | (L2i & 0xffff);
+   }
+
    D1c->size      = (D1i >> 24) & 0xff;
    D1c->assoc     = (D1i >> 16) & 0xff;
    D1c->line_size = (D1i >>  0) & 0xff;
@@ -1044,14 +1352,14 @@
    cache_t D1_dflt = (cache_t) {  65536, 2, 64 };
    cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
 
-#define CMD_LINE_DEFINED(L)                 \
-   (-1 != VG_(clo_##L##_cache).size  ||     \
-    -1 != VG_(clo_##L##_cache).assoc ||     \
-    -1 != VG_(clo_##L##_cache).line_size)
+#define CMD_LINE_DEFINED(L)            \
+   (-1 != clo_##L##_cache.size  ||     \
+    -1 != clo_##L##_cache.assoc ||     \
+    -1 != clo_##L##_cache.line_size)
 
-   *I1c = VG_(clo_I1_cache);
-   *D1c = VG_(clo_D1_cache);
-   *L2c = VG_(clo_L2_cache);
+   *I1c = clo_I1_cache;
+   *D1c = clo_D1_cache;
+   *L2c = clo_L2_cache;
 
    /* If any undefined on command-line, try CPUID */
    if (! CMD_LINE_DEFINED(I1) ||
@@ -1061,9 +1369,9 @@
       /* Overwrite CPUID result for any cache defined on command-line */
       if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
    
-         if (CMD_LINE_DEFINED(I1)) *I1c = VG_(clo_I1_cache);
-         if (CMD_LINE_DEFINED(D1)) *D1c = VG_(clo_D1_cache);
-         if (CMD_LINE_DEFINED(L2)) *L2c = VG_(clo_L2_cache);
+         if (CMD_LINE_DEFINED(I1)) *I1c = clo_I1_cache;
+         if (CMD_LINE_DEFINED(D1)) *D1c = clo_D1_cache;
+         if (CMD_LINE_DEFINED(L2)) *L2c = clo_L2_cache;
 
       /* CPUID failed, use defaults for each undefined by command-line */
       } else {
@@ -1071,9 +1379,9 @@
                       "Couldn't detect cache configuration, using one "
                       "or more defaults ");
 
-         *I1c = (CMD_LINE_DEFINED(I1) ? VG_(clo_I1_cache) : I1_dflt);
-         *D1c = (CMD_LINE_DEFINED(D1) ? VG_(clo_D1_cache) : D1_dflt);
-         *L2c = (CMD_LINE_DEFINED(L2) ? VG_(clo_L2_cache) : L2_dflt);
+         *I1c = (CMD_LINE_DEFINED(I1) ? clo_I1_cache : I1_dflt);
+         *D1c = (CMD_LINE_DEFINED(D1) ? clo_D1_cache : D1_dflt);
+         *L2c = (CMD_LINE_DEFINED(L2) ? clo_L2_cache : L2_dflt);
       }
    }
 #undef CMD_LINE_DEFINED
@@ -1093,65 +1401,8 @@
    }
 }
 
-void VG_(init_cachesim)(void)
-{
-   cache_t I1c, D1c, L2c; 
-
-   /* Make sure the output file can be written. */
-   Int fd = VG_(open_write)(OUT_FILE);
-   if (-1 == fd) { 
-      fd = VG_(create_and_write)(OUT_FILE);
-      if (-1 == fd) {
-         file_err(); 
-      }
-   }
-   VG_(close)(fd);
-
-   initCC(&Ir_total);
-   initCC(&Dr_total);
-   initCC(&Dw_total);
-   
-   initCC(&Ir_discards);
-   initCC(&Dr_discards);
-   initCC(&Dw_discards);
-
-   get_caches(&I1c, &D1c, &L2c);
-
-   cachesim_I1_initcache(I1c);
-   //cachesim_I1_initcache();
-   cachesim_D1_initcache(D1c);
-   //cachesim_D1_initcache();
-   cachesim_L2_initcache(L2c);
-   //cachesim_L2_initcache();
-
-   init_BBCC_table();
-}
-
-void VG_(cachesim_log_non_mem_instr)(iCC* cc)
-{
-   //VG_(printf)("sim  I: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
-   //            cc, cc->instr_addr, cc->instr_size)
-   VGP_PUSHCC(VgpCacheSimulate);
-   cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
-   cc->I.a++;
-   VGP_POPCC;
-}
-
-void VG_(cachesim_log_mem_instr)(idCC* cc, Addr data_addr)
-{
-   //VG_(printf)("sim  D: CCaddr=0x%x, iaddr=0x%x, isize=%u, daddr=0x%x, dsize=%u\n",
-   //            cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
-   VGP_PUSHCC(VgpCacheSimulate);
-   cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
-   cc->I.a++;
-
-   cachesim_D1_doref(data_addr,      cc->data_size,  &cc->D.m1, &cc->D.m2);
-   cc->D.a++;
-   VGP_POPCC;
-}
-
 /*------------------------------------------------------------*/
-/*--- Printing of output file and summary stats            ---*/
+/*--- SK_(fini)() and related function                     ---*/
 /*------------------------------------------------------------*/
 
 static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl, 
@@ -1181,15 +1432,15 @@
       Addr instr_addr;
       switch ( ((iCC*)BBCC_ptr)->tag ) {
 
-         case INSTR_CC:
+         case InstrCC:
             instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
             sprint_iCC(buf, (iCC*)BBCC_ptr);
             ADD_CC_TO(iCC, I, Ir_total);
             BBCC_ptr += sizeof(iCC);
             break;
 
-         case READ_CC:
-         case  MOD_CC:
+         case ReadCC:
+         case  ModCC:
             instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
             sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
             ADD_CC_TO(idCC, I, Ir_total);
@@ -1197,7 +1448,7 @@
             BBCC_ptr += sizeof(idCC);
             break;
 
-         case WRITE_CC:
+         case WriteCC:
             instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
             sprint_write_CC(buf, (idCC*)BBCC_ptr);
             ADD_CC_TO(idCC, I, Ir_total);
@@ -1205,6 +1456,15 @@
             BBCC_ptr += sizeof(idCC);
             break;
 
+         case ReadWriteCC:
+            instr_addr = ((iddCC*)BBCC_ptr)->instr_addr;
+            sprint_read_write_CC(buf, (iddCC*)BBCC_ptr);
+            ADD_CC_TO(iddCC, I,  Ir_total);
+            ADD_CC_TO(iddCC, Da, Dr_total);
+            ADD_CC_TO(iddCC, Db, Dw_total);
+            BBCC_ptr += sizeof(iddCC);
+            break;
+
          default:
             VG_(panic)("Unknown CC type in fprint_BBCC()\n");
             break;
@@ -1223,7 +1483,7 @@
 
       /* If the function name for this instruction doesn't match that of the
        * first instruction in the BB, print warning. */
-      if (VG_(clo_trace_symtab) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
+      if (VG_(clo_verbosity > 2) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
          VG_(printf)("Mismatched function names\n");
          VG_(printf)("  filenames: BB:%s, instr:%s;"
                      "  fn_names:  BB:%s, instr:%s;"
@@ -1251,8 +1511,7 @@
    vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
 }
 
-static void fprint_BBCC_table_and_calc_totals(Int client_argc, 
-                                              Char** client_argv)
+static void fprint_BBCC_table_and_calc_totals(void)
 {
    Int        fd;
    Char       buf[BUF_LEN];
@@ -1261,8 +1520,8 @@
    BBCC      *curr_BBCC;
    Int        i,j,k;
 
-   VGP_PUSHCC(VgpCacheDump);
-   fd = VG_(open_write)(OUT_FILE);
+   VGP_PUSHCC(VgpCacheResults);
+   fd = VG_(open)(cachegrind_out_file, VKI_O_WRONLY|VKI_O_TRUNC, 0);
    if (-1 == fd) { file_err(); }
 
    /* "desc:" lines (giving I1/D1/L2 cache configuration) */
@@ -1276,8 +1535,8 @@
    /* "cmd:" line */
    VG_(strcpy)(buf, "cmd:");
    VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
-   for (i = 0; i < client_argc; i++) {
-       VG_(sprintf)(buf, " %s", client_argv[i]);
+   for (i = 0; i < VG_(client_argc); i++) {
+       VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
        VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
    }
    /* "events:" line */
@@ -1395,6 +1654,7 @@
    VG_(sprintf)(buf, "%d.%d%%", n / pow, n % pow);
    len = VG_(strlen)(buf);
    space = field_width - len;
+   if (space < 0) space = 0;     /* Allow for v. small field_width */
    i = len;
 
    /* Right justify in field */
@@ -1402,7 +1662,7 @@
    for (i = 0; i < space; i++)  buf[i] = ' ';
 }
 
-void VG_(do_cachesim_results)(Int client_argc, Char** client_argv)
+void SK_(fini)(void)
 {
    CC D_total;
    ULong L2_total_m, L2_total_mr, L2_total_mw,
@@ -1413,7 +1673,7 @@
    Int l1, l2, l3;
    Int p;
 
-   fprint_BBCC_table_and_calc_totals(client_argc, client_argv);
+   fprint_BBCC_table_and_calc_totals();
 
    if (VG_(clo_verbosity) == 0) 
       return;
@@ -1431,6 +1691,7 @@
 
    p = 100;
 
+   if (0 == Ir_total.a) Ir_total.a = 1;
    percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
    VG_(message)(Vg_UserMsg, "I1  miss rate: %s", buf1);
                 
@@ -1464,6 +1725,9 @@
 
    p = 10;
    
+   if (0 == D_total.a)   D_total.a = 1;
+   if (0 == Dr_total.a) Dr_total.a = 1;
+   if (0 == Dw_total.a) Dw_total.a = 1;
    percentify( D_total.m1 * 100 * p / D_total.a,  p, l1+1, buf1);
    percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
    percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
@@ -1525,7 +1789,7 @@
        VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
        VG_(message)(Vg_DebugMsg, "Distinct instrs:  %d", distinct_instrs);
    }
-   VGP_POPCC;
+   VGP_POPCC(VgpCacheResults);
 }
 
 
@@ -1534,19 +1798,18 @@
  *
  * Finds the BBCC in the table, removes it, adds the counts to the discard
  * counters, and then frees the BBCC. */
-void VG_(cachesim_notify_discard) ( TTEntry* tte )
+void SK_(discard_basic_block_info) ( Addr a, UInt size )
 {
    BBCC *BBCC_node;
    Addr BBCC_ptr0, BBCC_ptr;
    Bool BB_seen_before;
     
    if (0)
-   VG_(printf)( "cachesim_notify_discard: %p for %d\n", 
-                tte->orig_addr, (Int)tte->orig_size);
+      VG_(printf)( "discard_basic_block_info: addr %p, size %u\n", a, size);
 
    /* 2nd arg won't be used since BB should have been seen before (assertions
     * ensure this). */
-   BBCC_node = get_BBCC(tte->orig_addr, NULL, True, &BB_seen_before);
+   BBCC_node = get_BBCC(a, NULL, /*remove=*/True, &BB_seen_before);
    BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
 
    vg_assert(True == BB_seen_before);
@@ -1559,33 +1822,182 @@
 
       switch ( ((iCC*)BBCC_ptr)->tag ) {
 
-         case INSTR_CC:
+         case InstrCC:
             ADD_CC_TO(iCC, I, Ir_discards);
             BBCC_ptr += sizeof(iCC);
             break;
 
-         case READ_CC:
-         case  MOD_CC:
+         case ReadCC:
+         case  ModCC:
             ADD_CC_TO(idCC, I, Ir_discards);
             ADD_CC_TO(idCC, D, Dr_discards);
             BBCC_ptr += sizeof(idCC);
             break;
 
-         case WRITE_CC:
+         case WriteCC:
             ADD_CC_TO(idCC, I, Ir_discards);
             ADD_CC_TO(idCC, D, Dw_discards);
             BBCC_ptr += sizeof(idCC);
             break;
 
+         case ReadWriteCC:
+            ADD_CC_TO(iddCC, I, Ir_discards);
+            ADD_CC_TO(iddCC, Da, Dr_discards);
+            ADD_CC_TO(iddCC, Db, Dw_discards);
+            BBCC_ptr += sizeof(iddCC);
+            break;
+
          default:
-            VG_(panic)("Unknown CC type in VG_(cachesim_notify_discard)()\n");
+            VG_(panic)("Unknown CC type in VG_(discard_basic_block_info)()\n");
             break;
       }
    }
-
-   VG_(free)(VG_AR_PRIVATE, BBCC_node);
+   VG_(free)(BBCC_node);
 }
 
 /*--------------------------------------------------------------------*/
+/*--- Command line processing                                      ---*/
+/*--------------------------------------------------------------------*/
+
+static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
+{
+   int   i1, i2, i3;
+   int   i;
+   char *opt = VG_(strdup)(orig_opt);
+
+   i = i1 = opt_len;
+
+   /* Option looks like "--I1=65536,2,64".
+    * Find commas, replace with NULs to make three independent 
+    * strings, then extract numbers.  Yuck. */
+   while (VG_(isdigit)(opt[i])) i++;
+   if (',' == opt[i]) {
+      opt[i++] = '\0';
+      i2 = i;
+   } else goto bad;
+   while (VG_(isdigit)(opt[i])) i++;
+   if (',' == opt[i]) {
+      opt[i++] = '\0';
+      i3 = i;
+   } else goto bad;
+   while (VG_(isdigit)(opt[i])) i++;
+   if ('\0' != opt[i]) goto bad;
+
+   cache->size      = (Int)VG_(atoll)(opt + i1);
+   cache->assoc     = (Int)VG_(atoll)(opt + i2);
+   cache->line_size = (Int)VG_(atoll)(opt + i3);
+
+   VG_(free)(opt);
+
+   return;
+
+  bad:
+   VG_(bad_option)(orig_opt);
+}
+
+Bool SK_(process_cmd_line_option)(Char* arg)
+{
+   /* 5 is length of "--I1=" */
+   if      (0 == VG_(strncmp)(arg, "--I1=", 5))
+      parse_cache_opt(&clo_I1_cache, arg,   5);
+   else if (0 == VG_(strncmp)(arg, "--D1=", 5))
+      parse_cache_opt(&clo_D1_cache, arg,   5);
+   else if (0 == VG_(strncmp)(arg, "--L2=", 5))
+      parse_cache_opt(&clo_L2_cache, arg,   5);
+   else
+      return False;
+
+   return True;
+}
+
+Char* SK_(usage)(void)
+{
+   return 
+"    --I1=<size>,<assoc>,<line_size>  set I1 cache manually\n"
+"    --D1=<size>,<assoc>,<line_size>  set D1 cache manually\n"
+"    --L2=<size>,<assoc>,<line_size>  set L2 cache manually\n";
+}
+
+/*--------------------------------------------------------------------*/
+/*--- Setup                                                        ---*/
+/*--------------------------------------------------------------------*/
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* not_used) 
+{
+   needs->name                    = "cachegrind";
+   needs->description             = "an I1/D1/L2 cache profiler";
+
+   needs->basic_block_discards    = True;
+   needs->command_line_options    = True;
+
+   VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
+   VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access_JIFZ);
+   VG_(register_compact_helper)((Addr) & log_0I_1D_cache_access);
+   VG_(register_compact_helper)((Addr) & log_1I_1D_cache_access);
+   VG_(register_compact_helper)((Addr) & log_0I_2D_cache_access);
+   VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
+}
+
+void SK_(post_clo_init)(void)
+{
+   cache_t I1c, D1c, L2c; 
+   Int fd;
+
+   /* Set output file name: cachegrind.<pid>.out */
+   VG_(sprintf)(cachegrind_out_file, "cachegrind.out.%d", VG_(getpid)());
+
+   /* Make sure the output file can be written. */
+   fd = VG_(open)(cachegrind_out_file, VKI_O_WRONLY|VKI_O_TRUNC, 0);
+   if (-1 == fd) { 
+      fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_WRONLY,
+                                          VKI_S_IRUSR|VKI_S_IWUSR);
+      if (-1 == fd) {
+         file_err(); 
+      }
+   }
+   VG_(close)(fd);
+
+   initCC(&Ir_total);
+   initCC(&Dr_total);
+   initCC(&Dw_total);
+   
+   initCC(&Ir_discards);
+   initCC(&Dr_discards);
+   initCC(&Dw_discards);
+
+   get_caches(&I1c, &D1c, &L2c);
+
+   cachesim_I1_initcache(I1c);
+   cachesim_D1_initcache(D1c);
+   cachesim_L2_initcache(L2c);
+
+   VGP_(register_profile_event)(VgpGetBBCC,       "get-BBCC");
+   VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
+   VGP_(register_profile_event)(VgpCacheResults,  "cache-results");
+   
+   init_BBCC_table();
+}
+
+#if 0
+Bool SK_(cheap_sanity_check)(void) { return True; }
+
+extern TTEntry* vg_tt;
+
+Bool SK_(expensive_sanity_check)(void)
+{ 
+   Int i;
+   Bool dummy;
+   for (i = 0; i < 200191; i++) {
+      if (vg_tt[i].orig_addr != (Addr)1 &&
+          vg_tt[i].orig_addr != (Addr)3) {
+         VG_(printf)(".");
+         get_BBCC(vg_tt[i].orig_addr, NULL, /*remove=*/True, &dummy);
+      }
+   }
+   return True;
+}
+#endif
+
+/*--------------------------------------------------------------------*/
 /*--- end                                            vg_cachesim.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/cachegrind/cg_sim_D1.c b/cachegrind/cg_sim_D1.c
index 7b8a8da..19d11ba 100644
--- a/cachegrind/cg_sim_D1.c
+++ b/cachegrind/cg_sim_D1.c
@@ -25,7 +25,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_cachesim_gen.c"
diff --git a/cachegrind/cg_sim_I1.c b/cachegrind/cg_sim_I1.c
index 26db3b3..8993ecb 100644
--- a/cachegrind/cg_sim_I1.c
+++ b/cachegrind/cg_sim_I1.c
@@ -25,7 +25,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_cachesim_gen.c"
diff --git a/cachegrind/cg_sim_L2.c b/cachegrind/cg_sim_L2.c
index ec89027..e870db2 100644
--- a/cachegrind/cg_sim_L2.c
+++ b/cachegrind/cg_sim_L2.c
@@ -25,7 +25,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_cachesim_gen.c"
diff --git a/cachegrind/cg_sim_gen.c b/cachegrind/cg_sim_gen.c
index 182a031..89d3337 100644
--- a/cachegrind/cg_sim_gen.c
+++ b/cachegrind/cg_sim_gen.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 /* Notes:
@@ -78,8 +78,7 @@
                                  c->size, c->line_size, c->assoc);
    }
 
-   c->tags = VG_(malloc)(VG_AR_PRIVATE, 
-                         sizeof(UInt) * c->sets * c->assoc);
+   c->tags = VG_(malloc)(sizeof(UInt) * c->sets * c->assoc);
 
    for (i = 0; i < c->sets * c->assoc; i++)
       c->tags[i] = 0;
@@ -100,9 +99,9 @@
 }
 #endif 
 
-/* XXX: This is done as a macro rather than by passing in the cache_t2 as
- * an arg because it slows things down by a small amount (3-5%) due to all that
- * extra indirection. */
+/* This is done as a macro rather than by passing in the cache_t2 as an 
+ * arg because it slows things down by a small amount (3-5%) due to all 
+ * that extra indirection. */
 
 #define CACHESIM(L, MISS_TREATMENT)                                         \
 /* The cache and associated bits and pieces. */                             \
diff --git a/cachegrind/docs/manual.html b/cachegrind/docs/manual.html
index b715ee3..95fe840 100644
--- a/cachegrind/docs/manual.html
+++ b/cachegrind/docs/manual.html
@@ -345,7 +345,7 @@
 </pre>
 
 <p>Note that Valgrind also reads options from the environment variable
-<code>$VALGRIND</code>, and processes them before the command-line
+<code>$VALGRIND_OPTS</code>, and processes them before the command-line
 options.
 
 <p>Valgrind's default settings succeed in giving reasonable behaviour
@@ -838,8 +838,8 @@
   <li>The contents of malloc'd blocks, before you write something
       there.  In C++, the new operator is a wrapper round malloc, so
       if you create an object with new, its fields will be
-      uninitialised until you fill them in, which is only Right and
-      Proper.</li>
+      uninitialised until you (or the constructor) fill them in, which
+      is only Right and Proper.</li>
 </ul>
 
 
@@ -1066,16 +1066,16 @@
       <p>
 
   <li>The "immediate location" specification.  For Value and Addr
-      errors, is either the name of the function in which the error
-      occurred, or, failing that, the full path the the .so file
-      containing the error location.  For Param errors, is the name of
-      the offending system call parameter.  For Free errors, is the
-      name of the function doing the freeing (eg, <code>free</code>,
-      <code>__builtin_vec_delete</code>, etc)</li><br>
+      errors, it is either the name of the function in which the error
+      occurred, or, failing that, the full path of the .so file or
+      executable containing the error location.  For Param errors,
+      is the name of the offending system call parameter.  For Free
+      errors, is the name of the function doing the freeing (eg,
+      <code>free</code>, <code>__builtin_vec_delete</code>, etc)</li><br>
       <p>
 
   <li>The caller of the above "immediate location".  Again, either a
-      function or shared-object name.</li><br>
+      function or shared-object/executable name.</li><br>
       <p>
 
   <li>Optionally, one or two extra calling-function or object names,
@@ -1083,8 +1083,8 @@
 </ul>
 
 <p>
-Locations may be either names of shared objects or wildcards matching
-function names.  They begin <code>obj:</code> and <code>fun:</code>
+Locations may be either names of shared objects/executables or wildcards
+matching function names.  They begin <code>obj:</code> and <code>fun:</code>
 respectively.  Function and object names to match against may use the 
 wildcard characters <code>*</code> and <code>?</code>.
 
@@ -1617,11 +1617,11 @@
 
   <li>If the new size is smaller, the dropped-off section is marked as
       unaddressible.  You may only pass to realloc a pointer
-      previously issued to you by malloc/calloc/new/realloc.</li><br>
+      previously issued to you by malloc/calloc/realloc.</li><br>
       <p>
 
   <li>free/delete: you may only pass to free a pointer previously
-      issued to you by malloc/calloc/new/realloc, or the value
+      issued to you by malloc/calloc/realloc, or the value
       NULL. Otherwise, Valgrind complains.  If the pointer is indeed
       valid, Valgrind marks the entire area it points at as
       unaddressible, and places the block in the freed-blocks-queue.
@@ -2058,7 +2058,9 @@
   <li>Run your program with <code>cachegrind</code> in front of the
       normal command line invocation.  When the program finishes,
       Valgrind will print summary cache statistics. It also collects
-      line-by-line information in a file <code>cachegrind.out</code>.
+      line-by-line information in a file
+      <code>cachegrind.out.<i>pid</i></code>, where <code><i>pid</i></code>
+      is the program's process id.
       <p>
       This step should be done every time you want to collect
       information about a new program, a changed program, or about the
@@ -2197,15 +2199,17 @@
 
 As well as printing summary information, Cachegrind also writes
 line-by-line cache profiling information to a file named
-<code>cachegrind.out</code>.  This file is human-readable, but is best
-interpreted by the accompanying program <code>vg_annotate</code>,
+<code>cachegrind.out.<i>pid</i></code>.  This file is human-readable, but is
+best interpreted by the accompanying program <code>vg_annotate</code>,
 described in the next section.
 <p>
-Things to note about the <code>cachegrind.out</code> file:
+Things to note about the <code>cachegrind.out.<i>pid</i></code> file:
 <ul>
   <li>It is written every time <code>valgrind --cachesim=yes</code> or
       <code>cachegrind</code> is run, and will overwrite any existing
-      <code>cachegrind.out</code> in the current directory.</li>
+      <code>cachegrind.out.<i>pid</i></code> in the current directory (but
+      that won't happen very often because it takes some time for process ids
+      to be recycled).</li>
   <p>
   <li>It can be huge: <code>ls -l</code> generates a file of about
       350KB.  Browsing a few files and web pages with a Konqueror
@@ -2213,6 +2217,13 @@
       of around 15 MB.</li>
 </ul>
 
+Note that older versions of Cachegrind used a log file named
+<code>cachegrind.out</code> (i.e. no <code><i>.pid</i></code> suffix).
+The suffix serves two purposes.  Firstly, it means you don't have to rename old
+log files that you don't want to overwrite.  Secondly, and more importantly,
+it allows correct profiling with the <code>--trace-children=yes</code> option
+of programs that spawn child processes.
+
 <a name="profileflags"></a>
 <h3>7.5&nbsp; Cachegrind options</h3>
 Cachegrind accepts all the options that Valgrind does, although some of them
@@ -2245,9 +2256,13 @@
 window to be at least 120-characters wide if possible, as the output
 lines can be quite long.
 <p>
-To get a function-by-function summary, run <code>vg_annotate</code> in
-directory containing a <code>cachegrind.out</code> file.  The output
-looks like this:
+To get a function-by-function summary, run <code>vg_annotate
+--<i>pid</i></code> in a directory containing a
+<code>cachegrind.out.<i>pid</i></code> file.  The <code>--<i>pid</i></code>
+is required so that <code>vg_annotate</code> knows which log file to use when
+several are present.
+<p>
+The output looks like this:
 
 <pre>
 --------------------------------------------------------------------------------
@@ -2468,8 +2483,9 @@
 specific enough.
 
 Beware that vg_annotate can take some time to digest large
-<code>cachegrind.out</code> files, eg. 30 seconds or more.  Also beware that
-auto-annotation can produce a lot of output if your program is large!
+<code>cachegrind.out.<i>pid</i></code> files, e.g. 30 seconds or more.  Also
+beware that auto-annotation can produce a lot of output if your program is
+large!
 
 
 <h3>7.7&nbsp; Annotating assembler programs</h3>
@@ -2492,13 +2508,18 @@
 
 <h3>7.8&nbsp; <code>vg_annotate</code> options</h3>
 <ul>
+  <li><code>--<i>pid</i></code></li><p>
+
+      Indicates which <code>cachegrind.out.<i>pid</i></code> file to read.
+      Not actually an option -- it is required.
+    
   <li><code>-h, --help</code></li><p>
   <li><code>-v, --version</code><p>
 
       Help and version, as usual.</li>
 
   <li><code>--sort=A,B,C</code> [default: order in 
-      <code>cachegrind.out</code>]<p>
+      <code>cachegrind.out.<i>pid</i></code>]<p>
       Specifies the events upon which the sorting of the function-by-function
       entries will be based.  Useful if you want to concentrate on eg. I cache
       misses (<code>--sort=I1mr,I2mr</code>), or D cache misses
@@ -2506,10 +2527,10 @@
       (<code>--sort=D2mr,I2mr</code>).</li><p>
 
   <li><code>--show=A,B,C</code> [default: all, using order in
-      <code>cachegrind.out</code>]<p>
+      <code>cachegrind.out.<i>pid</i></code>]<p>
       Specifies which events to show (and the column order). Default is to use
-      all present in the <code>cachegrind.out</code> file (and use the order in
-      the file).</li><p>
+      all present in the <code>cachegrind.out.<i>pid</i></code> file (and use
+      the order in the file).</li><p>
 
   <li><code>--threshold=X</code> [default: 99%] <p>
       Sets the threshold for the function-by-function summary.  Functions are
@@ -2547,17 +2568,18 @@
 There are a couple of situations in which vg_annotate issues warnings.
 
 <ul>
-  <li>If a source file is more recent than the <code>cachegrind.out</code>
-      file.  This is because the information in <code>cachegrind.out</code> is
-      only recorded with line numbers, so if the line numbers change at all in
-      the source (eg. lines added, deleted, swapped), any annotations will be 
+  <li>If a source file is more recent than the
+      <code>cachegrind.out.<i>pid</i></code> file.  This is because the
+      information in <code>cachegrind.out.<i>pid</i></code> is only recorded
+      with line numbers, so if the line numbers change at all in the source
+      (eg.  lines added, deleted, swapped), any annotations will be
       incorrect.<p>
 
   <li>If information is recorded about line numbers past the end of a file.
       This can be caused by the above problem, ie. shortening the source file
-      while using an old <code>cachegrind.out</code> file.  If this happens,
-      the figures for the bogus lines are printed anyway (clearly marked as
-      bogus) in case they are important.</li><p>
+      while using an old <code>cachegrind.out.<i>pid</i></code> file.  If this
+      happens, the figures for the bogus lines are printed anyway (clearly
+      marked as bogus) in case they are important.</li><p>
 </ul>
 
 
@@ -2677,6 +2699,13 @@
       <blockquote><code>btsl %eax, %edx</code></blockquote>
 
       This should only happen rarely.
+      </li><p>
+
+  <li>FPU instructions with data sizes of 28 and 108 bytes (e.g.
+      <code>fsave</code>) are treated as though they only access 16 bytes.
+      These instructions seem to be rare so hopefully this won't affect
+      accuracy much.
+      </li><p>
 </ul>
 
 Another thing worth nothing is that results are very sensitive.  Changing the
diff --git a/cachegrind/tests/Makefile.am b/cachegrind/tests/Makefile.am
new file mode 100644
index 0000000..f4e0f44
--- /dev/null
+++ b/cachegrind/tests/Makefile.am
@@ -0,0 +1,25 @@
+## Process this file with automake to produce Makefile.in
+
+##---------------------------------------------------------------------------
+## Cachegrind ones.
+##---------------------------------------------------------------------------
+
+noinst_PROGRAMS = \
+	dlclose fpu-28-108 myprint.so
+
+CFLAGS   = $(WERROR) -Winline -Wall -Wshadow -g
+CXXFLAGS = $(CFLAGS) 
+
+# C ones
+dlclose_SOURCES		= dlclose.c
+dlclose_LDADD		= -ldl
+myprint_so_SOURCES	= myprint.c
+myprint_so_LDFLAGS	= -shared
+
+fpu_28_108_SOURCES	= fpu-28-108.S
+
+##myprint.so$(EXEEXT): $(myprint_so_OBJECTS)
+##	$(CC) $(CFLAGS) -shared -o myprint.so $(myprint_so_OBJECTS)
+
+
+
diff --git a/cachegrind/tests/dlclose.c b/cachegrind/tests/dlclose.c
new file mode 100644
index 0000000..9fee030
--- /dev/null
+++ b/cachegrind/tests/dlclose.c
@@ -0,0 +1,38 @@
+/* This exercises the code that was causing this bug:
+  
+     valgrind: vg_cachesim.c:389 (get_BBCC): Assertion `((Bool)0) == remove' 
+     failed.
+
+   in Cachegrind 1.0.0 and 1.0.1, that was caused by unloading symbols before
+   invalidating translations.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <dlfcn.h>
+
+int main(int argc, char **argv) {
+   void *handle;
+   void (*myprint)(void);
+   char *error;
+
+   handle = dlopen ("./myprint.so", RTLD_LAZY);
+   if (!handle) {
+       fputs (dlerror(), stderr);
+       exit(1);
+   }
+
+   myprint = dlsym(handle, "myprint");
+   if ((error = dlerror()) != NULL)  {
+       fprintf (stderr, "%s\n", error);
+       exit(1);
+   }
+
+   (*myprint)();
+
+   /* Assertion failure was happening here */
+   dlclose(handle);
+
+   return 0;
+}
+
diff --git a/cachegrind/tests/dlclose.stderr.exp b/cachegrind/tests/dlclose.stderr.exp
new file mode 100644
index 0000000..89483cd
--- /dev/null
+++ b/cachegrind/tests/dlclose.stderr.exp
@@ -0,0 +1,19 @@
+
+discard ... (... -> ...) translations in range 0x........ .. 0x........
+discard syms in /.../tests/cachesim/myprint.so due to munmap()
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/cachegrind/tests/dlclose.stderr.exp.hd b/cachegrind/tests/dlclose.stderr.exp.hd
new file mode 100644
index 0000000..89483cd
--- /dev/null
+++ b/cachegrind/tests/dlclose.stderr.exp.hd
@@ -0,0 +1,19 @@
+
+discard ... (... -> ...) translations in range 0x........ .. 0x........
+discard syms in /.../tests/cachesim/myprint.so due to munmap()
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/cachegrind/tests/dlclose.stdout.exp b/cachegrind/tests/dlclose.stdout.exp
new file mode 100644
index 0000000..890082f
--- /dev/null
+++ b/cachegrind/tests/dlclose.stdout.exp
@@ -0,0 +1 @@
+This is myprint!
diff --git a/cachegrind/tests/dlclose.vgtest b/cachegrind/tests/dlclose.vgtest
new file mode 100644
index 0000000..e014f34
--- /dev/null
+++ b/cachegrind/tests/dlclose.vgtest
@@ -0,0 +1,3 @@
+vgopts.hd: --cachesim=yes
+prog: dlclose
+stderr_filter: filter_cachesim_discards
diff --git a/cachegrind/tests/filter_cachesim_discards b/cachegrind/tests/filter_cachesim_discards
new file mode 100755
index 0000000..a4f6732
--- /dev/null
+++ b/cachegrind/tests/filter_cachesim_discards
@@ -0,0 +1,5 @@
+#! /bin/sh
+
+dir=`dirname $0`
+
+$dir/filter_stderr | $dir/../filter_discards
diff --git a/cachegrind/tests/filter_stderr b/cachegrind/tests/filter_stderr
new file mode 100755
index 0000000..c33214c
--- /dev/null
+++ b/cachegrind/tests/filter_stderr
@@ -0,0 +1,12 @@
+#! /bin/sh
+
+dir=`dirname $0`
+
+$dir/../filter_stderr_basic                         |
+
+# Remove numbers from I/D/L2 "refs:" lines
+sed "s/\(\(I\|D\|L2\) *refs:\)[ 0-9,()+rdw]*$/\1/"  |
+
+# Remove numbers from I1/D1/L2/L2i/L2d "misses:" and "miss rates:" lines
+sed "s/\(\(I1\|D1\|L2\|L2i\|L2d\) *\(misses\|miss rate\):\)[ 0-9,()+rdw%\.]*$/\1/" 
+
diff --git a/cachegrind/tests/fpu-28-108.S b/cachegrind/tests/fpu-28-108.S
new file mode 100644
index 0000000..f655c00
--- /dev/null
+++ b/cachegrind/tests/fpu-28-108.S
@@ -0,0 +1,24 @@
+/* Test 28 and 108 byte loads and stores.  (Just make sure program
+   runs without any assertion failures from V.) */
+
+/* Useful listing: 
+	gcc -o tests/fpu_28_108 tests/fpu_28_108.S -Wa,-a */
+
+.data
+fooble:
+        .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+        .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+        .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+bar:
+        
+.text
+.globl main
+main:
+        fstsw   fooble
+        fsave   fooble
+        frstor  fooble
+        fstenv  fooble
+        fldenv  fooble
+        movl    $0, %eax
+        ret
+
diff --git a/cachegrind/tests/fpu-28-108.stderr.exp b/cachegrind/tests/fpu-28-108.stderr.exp
new file mode 100644
index 0000000..8eaf654
--- /dev/null
+++ b/cachegrind/tests/fpu-28-108.stderr.exp
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/cachegrind/tests/fpu-28-108.stderr.exp.hd b/cachegrind/tests/fpu-28-108.stderr.exp.hd
new file mode 100644
index 0000000..8eaf654
--- /dev/null
+++ b/cachegrind/tests/fpu-28-108.stderr.exp.hd
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/cachegrind/tests/fpu-28-108.vgtest b/cachegrind/tests/fpu-28-108.vgtest
new file mode 100644
index 0000000..42d57a3
--- /dev/null
+++ b/cachegrind/tests/fpu-28-108.vgtest
@@ -0,0 +1,2 @@
+vgopts.hd: --cachesim=yes
+prog: fpu-28-108
diff --git a/cachegrind/tests/myprint.c b/cachegrind/tests/myprint.c
new file mode 100644
index 0000000..e22ae87
--- /dev/null
+++ b/cachegrind/tests/myprint.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+void myprint(void)
+{
+   puts("This is myprint!");
+}
diff --git a/configure.in b/configure.in
index 4c28a3a..731b29b 100644
--- a/configure.in
+++ b/configure.in
@@ -1,7 +1,7 @@
 # Process this file with autoconf to produce a configure script.
 AC_INIT(vg_clientmalloc.c)
 AM_CONFIG_HEADER(config.h)
-AM_INIT_AUTOMAKE(valgrind, 1.0.0)
+AM_INIT_AUTOMAKE(valgrind, post-1.0.0-ERASER)
 
 AM_MAINTAINER_MODE
 
@@ -11,6 +11,7 @@
 AC_PROG_LN_S
 AC_PROG_CC
 AC_PROG_CPP
+AC_PROG_CXX
 AC_PROG_RANLIB
 
 # Check for the compiler support
@@ -293,10 +294,13 @@
    vg_annotate
    valgrind
    valgrind.spec
-   cachegrind
    Makefile 
    docs/Makefile 
    tests/Makefile 
+   tests/cachesim/Makefile 
+   tests/corecheck/Makefile 
+   tests/none/Makefile 
+   tests/memcheck/Makefile 
    demangle/Makefile)
 
 cat<<EOF
diff --git a/corecheck/Makefile.am b/corecheck/Makefile.am
index 60553dd..96911ed 100644
--- a/corecheck/Makefile.am
+++ b/corecheck/Makefile.am
@@ -1,15 +1,17 @@
+
+
 SUBDIRS = demangle . docs tests
 
 CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \
-		-Winline -Wall -Wshadow -O -fomit-frame-pointer -g
+		-Winline -Wall -Wshadow -O -fomit-frame-pointer @PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
 
-LDFLAGS = -Wl,-z -Wl,initfirst
+#LDFLAGS = -Wl,-z -Wl,initfirst
 
 INCLUDES = -I$(srcdir)/demangle
 
-bin_SCRIPTS = valgrind cachegrind vg_annotate
+bin_SCRIPTS = valgrind vg_annotate
 
 SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
 
@@ -26,60 +28,103 @@
 	PATCHES_APPLIED ACKNOWLEDGEMENTS \
 	README_KDE3_FOLKS README_PACKAGERS \
 	README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \
-	valgrind.spec valgrind.spec.in
+	valgrind.spec valgrind.spec.in \
+	vg_profile.c \
+	vg_cachesim_I1.c vg_cachesim_D1.c vg_cachesim_L2.c vg_cachesim_gen.c
 
-val_PROGRAMS = valgrind.so valgrinq.so libpthread.so
+val_PROGRAMS = \
+	valgrind.so \
+	valgrinq.so \
+	libpthread.so \
+	vgskin_memcheck.so \
+	vgskin_cachesim.so \
+	vgskin_eraser.so \
+	vgskin_addrcheck.so \
+	vgskin_none.so \
+	vgskin_lackey.so \
+	vgskin_corecheck.so
 
-libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c
+libpthread_so_SOURCES = \
+	vg_libpthread.c \
+	vg_libpthread_unimp.c
+libpthread_so_DEPENDENCIES = $(srcdir)/vg_libpthread.vs
+libpthread_so_LDFLAGS	   = -Werror -fno-omit-frame-pointer -UVG_LIBDIR -shared -fpic -Wl,-version-script $(srcdir)/vg_libpthread.vs
 
 valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+valgrinq_so_LDFLAGS = -shared
 
 valgrind_so_SOURCES = \
 	vg_clientfuncs.c \
 	vg_scheduler.c \
-        vg_cachesim.c \
 	vg_clientmalloc.c \
-	vg_clientperms.c \
+	vg_default.c \
 	vg_demangle.c \
 	vg_dispatch.S \
 	vg_errcontext.c \
 	vg_execontext.c \
 	vg_from_ucode.c \
 	vg_helpers.S \
+	vg_instrument.c \
 	vg_main.c \
 	vg_malloc2.c \
 	vg_memory.c \
 	vg_messages.c \
 	vg_mylibc.c \
 	vg_procselfmaps.c \
-	vg_profile.c \
+	vg_dummy_profile.c \
 	vg_signals.c \
 	vg_startup.S \
 	vg_symtab2.c \
-	vg_syscall_mem.c \
+	vg_syscalls.c \
 	vg_syscall.S \
 	vg_to_ucode.c \
 	vg_translate.c \
-	vg_transtab.c \
-	vg_vtagops.c
-
+	vg_transtab.c
+valgrind_so_LDFLAGS = -Wl,-z -Wl,initfirst -shared
 valgrind_so_LDADD = \
 	demangle/cp-demangle.o \
 	demangle/cplus-dem.o \
 	demangle/dyn-string.o \
 	demangle/safe-ctype.o
 
+vgskin_memcheck_so_SOURCES = \
+	vg_memcheck.c \
+	vg_memcheck_clientreqs.c \
+	vg_memcheck_errcontext.c \
+	vg_memcheck_from_ucode.c \
+	vg_memcheck_translate.c \
+	vg_memcheck_helpers.S
+vgskin_memcheck_so_LDFLAGS = -shared
+
+vgskin_cachesim_so_SOURCES = vg_cachesim.c
+vgskin_cachesim_so_LDFLAGS = -shared
+
+vgskin_eraser_so_SOURCES = vg_eraser.c
+vgskin_eraser_so_LDFLAGS = -shared
+
+vgskin_addrcheck_so_SOURCES = vg_addrcheck.c
+vgskin_addrcheck_so_LDFLAGS = -shared
+
+vgskin_none_so_SOURCES 	 = vg_none.c
+vgskin_none_so_LDFLAGS   = -shared
+
+vgskin_lackey_so_SOURCES = vg_lackey.c
+vgskin_lackey_so_LDFLAGS = -shared
+
+vgskin_corecheck_so_SOURCES = vg_corecheck.c
+vgskin_corecheck_so_LDFLAGS = -shared
+
 include_HEADERS = valgrind.h
 
 noinst_HEADERS = \
-        vg_cachesim_gen.c       \
-        vg_cachesim_I1.c        \
-        vg_cachesim_D1.c        \
-        vg_cachesim_L2.c        \
         vg_kerneliface.h        \
         vg_include.h            \
+        vg_skin.h               \
         vg_constants.h          \
-        vg_unsafe.h
+        vg_constants_skin.h     \
+        vg_unsafe.h		\
+	vg_memcheck_include.h	\
+	vg_memcheck.h
 
 MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) 
 
@@ -92,19 +137,40 @@
 vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS)
 	$(COMPILE) -fno-omit-frame-pointer -c $<
 
-valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
-	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
-		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+##valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+##		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
 
-valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
-	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
+##valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
+##	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
 
-libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
-	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
-		$(libpthread_so_OBJECTS) \
-		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+##libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
+##	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
+##		$(libpthread_so_OBJECTS) \
+##		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+
+##vgskin_memcheck.so$(EXEEXT): $(vgskin_memcheck_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_memcheck.so \
+##		$(vgskin_memcheck_so_OBJECTS)
+
+##vgskin_cachesim.so$(EXEEXT): $(vgskin_cachesim_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_cachesim.so \
+##		$(vgskin_cachesim_so_OBJECTS)
+
+##vgskin_eraser.so$(EXEEXT): $(vgskin_eraser_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_eraser.so \
+##		$(vgskin_eraser_so_OBJECTS)
+
+##vgskin_none.so$(EXEEXT): $(vgskin_none_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_none.so \
+##		$(vgskin_none_so_OBJECTS)
+
+##vgskin_lackey.so$(EXEEXT): $(vgskin_lackey_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_lackey.so \
+##		$(vgskin_lackey_so_OBJECTS)
 
 install-exec-hook:
 	$(mkinstalldirs) $(DESTDIR)$(valdir)
 	rm -f $(DESTDIR)$(valdir)/libpthread.so.0
 	$(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0
+
diff --git a/corecheck/cc_main.c b/corecheck/cc_main.c
new file mode 100644
index 0000000..58568d4
--- /dev/null
+++ b/corecheck/cc_main.c
@@ -0,0 +1,59 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Skin reporting errors detected in core.       vg_corecheck.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2002 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_skin.h"
+
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* track) 
+{
+   needs->name                    = "coregrind";
+   needs->description             = "a rudimentary error detector";
+
+   needs->core_errors             = True;
+
+   /* No core events to track */
+}
+
+void SK_(post_clo_init)(void)
+{
+}
+
+UCodeBlock* SK_(instrument)(UCodeBlock* cb, Addr a)
+{
+    return cb;
+}
+
+void SK_(fini)(void)
+{
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                           vg_corecheck.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/corecheck/tests/Makefile.am b/corecheck/tests/Makefile.am
new file mode 100644
index 0000000..26e906e
--- /dev/null
+++ b/corecheck/tests/Makefile.am
@@ -0,0 +1,36 @@
+## Process this file with automake to produce Makefile.in
+
+##---------------------------------------------------------------------------
+## These test core error checking, eg. "silly values" for malloc/calloc,
+## pthread errors (and suppressions), signal handling errors, invalid fds for
+## blocking syscalls, etc.
+##---------------------------------------------------------------------------
+
+noinst_PROGRAMS = \
+	erringfds malloc3 sigkill \
+	pth_atfork1 pth_cancel2 pth_cvsimple pth_empty \
+	pth_mutexspeed pth_once
+
+CFLAGS   = $(WERROR) -Winline -Wall -Wshadow -g
+CXXFLAGS = $(CFLAGS)
+
+# C ones
+erringfds_SOURCES 	= erringfds.c
+malloc3_SOURCES 	= malloc3.c
+sigkill_SOURCES 	= sigkill.c
+
+# Pthread ones
+pth_atfork1_SOURCES	= pth_atfork1.c
+pth_atfork1_LDADD	= -lpthread
+pth_cancel2_SOURCES	= pth_cancel2.c
+pth_cancel2_LDADD	= -lpthread
+pth_cvsimple_SOURCES	= pth_cvsimple.c
+pth_cvsimple_LDADD	= -lpthread
+pth_empty_SOURCES 	= pth_empty.c
+pth_empty_LDADD 	= -lpthread
+pth_mutexspeed_SOURCES	= pth_mutexspeed.c
+pth_mutexspeed_LDADD	= -lpthread
+pth_once_SOURCES	= pth_once.c
+pth_once_LDADD		= -lpthread
+
+
diff --git a/tests/erringfds.c b/corecheck/tests/erringfds.c
similarity index 100%
copy from tests/erringfds.c
copy to corecheck/tests/erringfds.c
diff --git a/corecheck/tests/erringfds.stderr.exp b/corecheck/tests/erringfds.stderr.exp
new file mode 100644
index 0000000..b6a487d
--- /dev/null
+++ b/corecheck/tests/erringfds.stderr.exp
@@ -0,0 +1,4 @@
+
+Warning: invalid file descriptor -1 in syscall read()
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
diff --git a/corecheck/tests/erringfds.stderr.exp.hd b/corecheck/tests/erringfds.stderr.exp.hd
new file mode 100644
index 0000000..0516e09
--- /dev/null
+++ b/corecheck/tests/erringfds.stderr.exp.hd
@@ -0,0 +1,8 @@
+
+Warning: invalid file descriptor -1 in syscall read()
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/corecheck/tests/erringfds.stdout.exp b/corecheck/tests/erringfds.stdout.exp
new file mode 100644
index 0000000..bcc1770
--- /dev/null
+++ b/corecheck/tests/erringfds.stdout.exp
@@ -0,0 +1,2 @@
+fd = -1
+n = -1
diff --git a/corecheck/tests/erringfds.vgtest b/corecheck/tests/erringfds.vgtest
new file mode 100644
index 0000000..5a8ede4
--- /dev/null
+++ b/corecheck/tests/erringfds.vgtest
@@ -0,0 +1 @@
+prog: erringfds
diff --git a/corecheck/tests/filter_stderr b/corecheck/tests/filter_stderr
new file mode 100755
index 0000000..31c5258
--- /dev/null
+++ b/corecheck/tests/filter_stderr
@@ -0,0 +1,5 @@
+#! /bin/sh
+
+dir=`dirname $0`
+
+$dir/../filter_stderr_basic
diff --git a/tests/malloc3.c b/corecheck/tests/malloc3.c
similarity index 100%
copy from tests/malloc3.c
copy to corecheck/tests/malloc3.c
diff --git a/corecheck/tests/malloc3.stderr.exp b/corecheck/tests/malloc3.stderr.exp
new file mode 100644
index 0000000..97c1780
--- /dev/null
+++ b/corecheck/tests/malloc3.stderr.exp
@@ -0,0 +1,6 @@
+
+Warning: silly arg (-1) to malloc()
+Warning: silly args (0,-1) to calloc()
+Warning: silly args (-1,-1) to calloc()
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
diff --git a/corecheck/tests/malloc3.stderr.exp.hd b/corecheck/tests/malloc3.stderr.exp.hd
new file mode 100644
index 0000000..9a908f3
--- /dev/null
+++ b/corecheck/tests/malloc3.stderr.exp.hd
@@ -0,0 +1,10 @@
+
+Warning: silly arg (-1) to malloc()
+Warning: silly args (0,-1) to calloc()
+Warning: silly args (-1,-1) to calloc()
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 2 allocs, 2 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/corecheck/tests/malloc3.stdout.exp b/corecheck/tests/malloc3.stdout.exp
new file mode 100644
index 0000000..681c9ec
--- /dev/null
+++ b/corecheck/tests/malloc3.stdout.exp
@@ -0,0 +1,5 @@
+malloc(0) = 0x........
+malloc(-1) = (nil)
+calloc(0,1) = 0x........
+calloc(0,-1) = (nil)
+calloc(-1,-1) = (nil)
diff --git a/corecheck/tests/malloc3.vgtest b/corecheck/tests/malloc3.vgtest
new file mode 100644
index 0000000..9feb8f0
--- /dev/null
+++ b/corecheck/tests/malloc3.vgtest
@@ -0,0 +1,2 @@
+prog: malloc3
+stdout_filter: ../filter_addresses
diff --git a/tests/pth_atfork1.c b/corecheck/tests/pth_atfork1.c
similarity index 100%
copy from tests/pth_atfork1.c
copy to corecheck/tests/pth_atfork1.c
diff --git a/corecheck/tests/pth_atfork1.stderr.exp b/corecheck/tests/pth_atfork1.stderr.exp
new file mode 100644
index 0000000..49f4fb4
--- /dev/null
+++ b/corecheck/tests/pth_atfork1.stderr.exp
@@ -0,0 +1,5 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
diff --git a/corecheck/tests/pth_atfork1.stderr.exp.hd b/corecheck/tests/pth_atfork1.stderr.exp.hd
new file mode 100644
index 0000000..77357fb
--- /dev/null
+++ b/corecheck/tests/pth_atfork1.stderr.exp.hd
@@ -0,0 +1,13 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 1 frees, 12 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 1 frees, 12 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/corecheck/tests/pth_atfork1.stdout.exp b/corecheck/tests/pth_atfork1.stdout.exp
new file mode 100644
index 0000000..089bcff
--- /dev/null
+++ b/corecheck/tests/pth_atfork1.stdout.exp
@@ -0,0 +1,4 @@
+prepare
+child
+prepare
+parent
diff --git a/corecheck/tests/pth_atfork1.vgtest b/corecheck/tests/pth_atfork1.vgtest
new file mode 100644
index 0000000..237ff87
--- /dev/null
+++ b/corecheck/tests/pth_atfork1.vgtest
@@ -0,0 +1 @@
+prog: pth_atfork1
diff --git a/corecheck/tests/pth_cancel2.c b/corecheck/tests/pth_cancel2.c
new file mode 100644
index 0000000..5bd7d0b
--- /dev/null
+++ b/corecheck/tests/pth_cancel2.c
@@ -0,0 +1,101 @@
+/********************************************************
+ * An example source module to accompany...
+ *
+ * "Using POSIX Threads: Programming with Pthreads"
+ *     by Brad nichols, Dick Buttlar, Jackie Farrell
+ *     O'Reilly & Associates, Inc.
+ *
+ ********************************************************
+ * async_safe --
+ *
+ * Example showing macro wrappers for calling non-async
+ * safe routines when the caller has asynchronous 
+ * cancellation turned on
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <pthread.h>
+
+
+#define async_cancel_safe_read(fd,buf,amt) \
+   { \
+      int oldtype; \
+      pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, &oldtype); \
+      if (read(fd,buf,amt) < 0) \
+         perror("read"),exit(1); \
+      pthread_setcanceltype(oldtype,NULL); \
+      pthread_testcancel(); \
+   } 
+   
+
+#define async_cancel_safe_write(fd,buf,amt) \
+   { \
+      int oldtype; \
+      pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, &oldtype); \
+      if (write(fd,buf,amt) < 0) \
+         perror("write"), exit(1); \
+      pthread_setcanceltype(oldtype,NULL); \
+      pthread_testcancel(); \
+   }
+
+
+static int fd;
+   
+void *io(void *arg)
+{
+   int *fd2=(int *)arg; 
+   char buf[20]="String";
+   int amt=20;
+
+   for (;;) {
+      async_cancel_safe_write(*fd2,buf,amt);
+      async_cancel_safe_read(*fd2,buf,amt);
+   }
+   return(NULL);
+}
+
+void *killer(void *arg)
+{ 
+   pthread_t * target = (pthread_t *)arg;
+   sleep(1);
+   pthread_cancel(*target);
+   return(NULL);
+}
+
+extern int
+main(void)
+{
+   pthread_t io_thread, killer_thread;   
+
+   extern void *io(void *);
+   extern void *killer(void  *);
+
+   if ((fd = open(".ktemp",O_CREAT | O_RDWR, 0666)) < 0)
+      perror("open"), exit(1);
+
+   pthread_create(&io_thread, 
+		  NULL,
+		  io,
+		  (void *)&fd);
+   pthread_create(&killer_thread,
+		  NULL,
+		  killer,
+		  (void *)&io_thread);
+
+   pthread_join(io_thread, NULL);
+
+   pthread_join(killer_thread,NULL);
+
+   if ((close(fd)) < 0)
+     perror("close"),exit(1);
+   if ((unlink(".ktemp")) < 0)
+     perror("unlink"),exit(1);
+
+   return 0;
+}
diff --git a/corecheck/tests/pth_cancel2.stderr.exp b/corecheck/tests/pth_cancel2.stderr.exp
new file mode 100644
index 0000000..6be4c1b
--- /dev/null
+++ b/corecheck/tests/pth_cancel2.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
diff --git a/corecheck/tests/pth_cancel2.stderr.exp.hd b/corecheck/tests/pth_cancel2.stderr.exp.hd
new file mode 100644
index 0000000..7dd5fed
--- /dev/null
+++ b/corecheck/tests/pth_cancel2.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 2 allocs, 2 frees, 24 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/corecheck/tests/pth_cancel2.vgtest b/corecheck/tests/pth_cancel2.vgtest
new file mode 100644
index 0000000..7bb8e1b
--- /dev/null
+++ b/corecheck/tests/pth_cancel2.vgtest
@@ -0,0 +1 @@
+prog: pth_cancel2
diff --git a/corecheck/tests/pth_cvsimple.c b/corecheck/tests/pth_cvsimple.c
new file mode 100644
index 0000000..3bb5085
--- /dev/null
+++ b/corecheck/tests/pth_cvsimple.c
@@ -0,0 +1,83 @@
+/********************************************************
+ * An example source module to accompany...
+ *
+ * "Using POSIX Threads: Programming with Pthreads"
+ *     by Brad nichols, Dick Buttlar, Jackie Farrell
+ *     O'Reilly & Associates, Inc.
+ *
+ ********************************************************
+ *
+ * cvsimple.c
+ *
+ * Demonstrates pthread cancellation.
+ *
+ */
+
+#include <stdio.h>
+#include <pthread.h>
+
+#define NUM_THREADS  3
+#define TCOUNT 10
+#define COUNT_THRES 12
+
+int     count = 0;
+int     thread_ids[3] = {0,1,2};
+pthread_mutex_t count_lock=PTHREAD_MUTEX_INITIALIZER; 
+pthread_cond_t count_hit_threshold=PTHREAD_COND_INITIALIZER; 
+
+void *inc_count(void *idp)
+{
+  int i=0;
+  int *my_id = idp;
+
+  for (i=0; i<TCOUNT; i++) {
+    pthread_mutex_lock(&count_lock);
+    count++;
+    printf("inc_counter(): thread %d, count = %d, unlocking mutex\n", 
+	   *my_id, count);
+    if (count == COUNT_THRES) {
+      printf("inc_count(): Thread %d, count %d\n", *my_id, count);
+      pthread_cond_signal(&count_hit_threshold);
+    }
+    pthread_mutex_unlock(&count_lock);
+  }
+  
+  return(NULL);
+}
+
+void *watch_count(void *idp)
+{
+  int *my_id = idp;
+
+  printf("watch_count(): thread %d\n", *my_id);
+  fflush(stdout);
+  pthread_mutex_lock(&count_lock);
+
+  while (count < COUNT_THRES) {
+    pthread_cond_wait(&count_hit_threshold, &count_lock);
+    printf("watch_count(): thread %d, count %d\n", *my_id, count);
+  }
+
+  pthread_mutex_unlock(&count_lock);
+  
+  return(NULL);
+}
+
+extern int
+main(void)
+{
+  int       i;
+  pthread_t threads[3];
+
+  pthread_create(&threads[0], NULL, inc_count, (void *)&thread_ids[0]);
+  pthread_create(&threads[1], NULL, inc_count, (void *)&thread_ids[1]);
+  pthread_create(&threads[2], NULL, watch_count, (void *)&thread_ids[2]);
+
+  for (i = 0; i < NUM_THREADS; i++) {
+    pthread_join(threads[i], NULL);
+  }
+
+  return 0;
+}
+
+
diff --git a/corecheck/tests/pth_cvsimple.stderr.exp b/corecheck/tests/pth_cvsimple.stderr.exp
new file mode 100644
index 0000000..6be4c1b
--- /dev/null
+++ b/corecheck/tests/pth_cvsimple.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
diff --git a/corecheck/tests/pth_cvsimple.stderr.exp.hd b/corecheck/tests/pth_cvsimple.stderr.exp.hd
new file mode 100644
index 0000000..17aa2dd
--- /dev/null
+++ b/corecheck/tests/pth_cvsimple.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 3 allocs, 3 frees, 36 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/corecheck/tests/pth_cvsimple.stdout.exp b/corecheck/tests/pth_cvsimple.stdout.exp
new file mode 100644
index 0000000..92dab19
--- /dev/null
+++ b/corecheck/tests/pth_cvsimple.stdout.exp
@@ -0,0 +1,22 @@
+inc_counter(): thread 0, count = 1, unlocking mutex
+inc_counter(): thread 0, count = 2, unlocking mutex
+inc_counter(): thread 0, count = 3, unlocking mutex
+inc_counter(): thread 0, count = 4, unlocking mutex
+inc_counter(): thread 0, count = 5, unlocking mutex
+inc_counter(): thread 0, count = 6, unlocking mutex
+inc_counter(): thread 0, count = 7, unlocking mutex
+inc_counter(): thread 0, count = 8, unlocking mutex
+inc_counter(): thread 0, count = 9, unlocking mutex
+inc_counter(): thread 0, count = 10, unlocking mutex
+inc_counter(): thread 1, count = 11, unlocking mutex
+inc_counter(): thread 1, count = 12, unlocking mutex
+inc_count(): Thread 1, count 12
+inc_counter(): thread 1, count = 13, unlocking mutex
+inc_counter(): thread 1, count = 14, unlocking mutex
+inc_counter(): thread 1, count = 15, unlocking mutex
+inc_counter(): thread 1, count = 16, unlocking mutex
+inc_counter(): thread 1, count = 17, unlocking mutex
+inc_counter(): thread 1, count = 18, unlocking mutex
+inc_counter(): thread 1, count = 19, unlocking mutex
+inc_counter(): thread 1, count = 20, unlocking mutex
+watch_count(): thread 2
diff --git a/corecheck/tests/pth_cvsimple.vgtest b/corecheck/tests/pth_cvsimple.vgtest
new file mode 100644
index 0000000..df57004
--- /dev/null
+++ b/corecheck/tests/pth_cvsimple.vgtest
@@ -0,0 +1 @@
+prog: pth_cvsimple
diff --git a/corecheck/tests/pth_empty.c b/corecheck/tests/pth_empty.c
new file mode 100644
index 0000000..c936a4c
--- /dev/null
+++ b/corecheck/tests/pth_empty.c
@@ -0,0 +1,7 @@
+// Does nothing, but linking it with -lpthread is enough to trigger an error
+// that should be suppressed when it is run.
+
+int main(void)
+{
+   return 0;
+}
diff --git a/corecheck/tests/pth_empty.stderr.exp b/corecheck/tests/pth_empty.stderr.exp
new file mode 100644
index 0000000..6be4c1b
--- /dev/null
+++ b/corecheck/tests/pth_empty.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
diff --git a/corecheck/tests/pth_empty.stderr.exp.hd b/corecheck/tests/pth_empty.stderr.exp.hd
new file mode 100644
index 0000000..6d763a7
--- /dev/null
+++ b/corecheck/tests/pth_empty.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/corecheck/tests/pth_empty.vgtest b/corecheck/tests/pth_empty.vgtest
new file mode 100644
index 0000000..b56f5fe
--- /dev/null
+++ b/corecheck/tests/pth_empty.vgtest
@@ -0,0 +1 @@
+prog: pth_empty
diff --git a/tests/pth_mutexspeed.c b/corecheck/tests/pth_mutexspeed.c
similarity index 100%
copy from tests/pth_mutexspeed.c
copy to corecheck/tests/pth_mutexspeed.c
diff --git a/corecheck/tests/pth_mutexspeed.stderr.exp b/corecheck/tests/pth_mutexspeed.stderr.exp
new file mode 100644
index 0000000..6be4c1b
--- /dev/null
+++ b/corecheck/tests/pth_mutexspeed.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
diff --git a/corecheck/tests/pth_mutexspeed.stderr.exp.hd b/corecheck/tests/pth_mutexspeed.stderr.exp.hd
new file mode 100644
index 0000000..6d763a7
--- /dev/null
+++ b/corecheck/tests/pth_mutexspeed.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/corecheck/tests/pth_mutexspeed.stdout.exp b/corecheck/tests/pth_mutexspeed.stdout.exp
new file mode 100644
index 0000000..8208168
--- /dev/null
+++ b/corecheck/tests/pth_mutexspeed.stdout.exp
@@ -0,0 +1,2 @@
+begin 100000 lock--unlocks
+done  100000 lock--unlocks
diff --git a/corecheck/tests/pth_mutexspeed.vgtest b/corecheck/tests/pth_mutexspeed.vgtest
new file mode 100644
index 0000000..3daee3a
--- /dev/null
+++ b/corecheck/tests/pth_mutexspeed.vgtest
@@ -0,0 +1 @@
+prog: pth_mutexspeed
diff --git a/corecheck/tests/pth_once.c b/corecheck/tests/pth_once.c
new file mode 100644
index 0000000..75f6a1f
--- /dev/null
+++ b/corecheck/tests/pth_once.c
@@ -0,0 +1,82 @@
+/********************************************************
+ * An example source module to accompany...
+ *
+ * "Using POSIX Threads: Programming with Pthreads"
+ *     by Brad nichols, Dick Buttlar, Jackie Farrell
+ *     O'Reilly & Associates, Inc.
+ *
+ ********************************************************
+ * once_exam.c
+ *
+ * An example of using the pthreads_once() call to execute an
+ * initialization procedure.
+ *
+ * A program spawns multiple threads and each one tries to
+ * execute the routine welcome() using the once call. Only
+ * the first thread into the once routine will actually
+ * execute welcome().
+ *
+ * The program's main thread synchronizes its exit with the
+ * exit of the threads using the pthread_join() operation.
+ *
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+
+#include <pthread.h>
+
+#define  NUM_THREADS   10
+
+static pthread_once_t welcome_once_block = PTHREAD_ONCE_INIT;
+
+void welcome(void)
+{
+	printf("welcome: Welcome\n");
+}
+
+void *identify_yourself(void *arg)
+{
+        int *pid=(int *)arg;
+	int rtn;
+
+	if ((rtn = pthread_once(&welcome_once_block,
+			        welcome)) != 0) {
+		fprintf(stderr, "pthread_once failed with %d",rtn);
+		pthread_exit((void *)NULL);
+	}
+	printf("identify_yourself: Hi, I'm thread # %d\n",*pid);
+        return(NULL);
+}
+
+extern int
+main(void)
+{
+	int             *id_arg, thread_num, rtn;
+	pthread_t       threads[NUM_THREADS];
+
+	id_arg = (int *)malloc(NUM_THREADS*sizeof(int));
+
+	for (thread_num = 0; thread_num < NUM_THREADS; (thread_num)++) {
+
+		id_arg[thread_num] = thread_num;
+
+		if (( rtn = pthread_create(&threads[thread_num], 
+					   NULL,
+					   identify_yourself,
+					   (void *) &(id_arg[thread_num]))) 
+		    != 0) {
+		  fprintf(stderr, "pthread_create failed with %d",rtn);
+		  exit(1);
+		}
+	} 	
+
+	for (thread_num = 0; thread_num < NUM_THREADS; thread_num++) {
+	  pthread_join(threads[thread_num], NULL);
+	  printf("main: joined to thread %d\n", thread_num);
+	}
+	printf("main: Goodbye\n");
+        return 0;
+}
diff --git a/corecheck/tests/pth_once.stderr.exp b/corecheck/tests/pth_once.stderr.exp
new file mode 100644
index 0000000..6be4c1b
--- /dev/null
+++ b/corecheck/tests/pth_once.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
diff --git a/corecheck/tests/pth_once.stderr.exp.hd b/corecheck/tests/pth_once.stderr.exp.hd
new file mode 100644
index 0000000..515d565
--- /dev/null
+++ b/corecheck/tests/pth_once.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 40 bytes in 1 blocks.
+malloc/free: 11 allocs, 10 frees, 160 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/corecheck/tests/pth_once.stdout.exp b/corecheck/tests/pth_once.stdout.exp
new file mode 100644
index 0000000..97e25d1
--- /dev/null
+++ b/corecheck/tests/pth_once.stdout.exp
@@ -0,0 +1,22 @@
+welcome: Welcome
+identify_yourself: Hi, I'm thread # 0
+identify_yourself: Hi, I'm thread # 1
+identify_yourself: Hi, I'm thread # 2
+identify_yourself: Hi, I'm thread # 3
+identify_yourself: Hi, I'm thread # 4
+identify_yourself: Hi, I'm thread # 5
+identify_yourself: Hi, I'm thread # 6
+identify_yourself: Hi, I'm thread # 7
+identify_yourself: Hi, I'm thread # 8
+identify_yourself: Hi, I'm thread # 9
+main: joined to thread 0
+main: joined to thread 1
+main: joined to thread 2
+main: joined to thread 3
+main: joined to thread 4
+main: joined to thread 5
+main: joined to thread 6
+main: joined to thread 7
+main: joined to thread 8
+main: joined to thread 9
+main: Goodbye
diff --git a/corecheck/tests/pth_once.vgtest b/corecheck/tests/pth_once.vgtest
new file mode 100644
index 0000000..50bc5b4
--- /dev/null
+++ b/corecheck/tests/pth_once.vgtest
@@ -0,0 +1 @@
+prog: pth_once
diff --git a/corecheck/tests/sigkill.c b/corecheck/tests/sigkill.c
new file mode 100644
index 0000000..6c18d4b
--- /dev/null
+++ b/corecheck/tests/sigkill.c
@@ -0,0 +1,35 @@
+
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+
+static void
+abend (int sig)
+{
+  printf ("Abended on signal %d\n", sig);
+  exit (2);
+}
+
+int
+main (void)
+{
+  struct sigaction  sa;
+
+  int i;
+  for (i = 1; i <= 65; i++) {
+     sa.sa_flags   = 0;
+     sigemptyset( &sa.sa_mask );
+     sa.sa_handler = abend;
+     errno = 0;
+     fprintf(stderr,"setting signal %d: ", i);
+     sigaction (i /*SIGKILL*/, &sa, NULL);
+     perror ("");
+     errno = 0;
+     fprintf(stderr,"getting signal %d: ", i);
+     sigaction (i /*SIGKILL*/, NULL, &sa);
+     perror ("");
+     fprintf(stderr,"\n");
+  }
+  return 0;
+}
diff --git a/corecheck/tests/sigkill.stderr.exp b/corecheck/tests/sigkill.stderr.exp
new file mode 100644
index 0000000..563be09
--- /dev/null
+++ b/corecheck/tests/sigkill.stderr.exp
@@ -0,0 +1,202 @@
+
+setting signal 1: Success
+getting signal 1: Success
+
+setting signal 2: Success
+getting signal 2: Success
+
+setting signal 3: Success
+getting signal 3: Success
+
+setting signal 4: Success
+getting signal 4: Success
+
+setting signal 5: Success
+getting signal 5: Success
+
+setting signal 6: Success
+getting signal 6: Success
+
+setting signal 7: Success
+getting signal 7: Success
+
+setting signal 8: Success
+getting signal 8: Success
+
+setting signal 9: Warning: attempt to set SIGKILL handler in __NR_sigaction.
+Invalid argument
+getting signal 9: Success
+
+setting signal 10: Success
+getting signal 10: Success
+
+setting signal 11: Success
+getting signal 11: Success
+
+setting signal 12: Success
+getting signal 12: Success
+
+setting signal 13: Success
+getting signal 13: Success
+
+setting signal 14: Success
+getting signal 14: Success
+
+setting signal 15: Success
+getting signal 15: Success
+
+setting signal 16: Success
+getting signal 16: Success
+
+setting signal 17: Success
+getting signal 17: Success
+
+setting signal 18: Success
+getting signal 18: Success
+
+setting signal 19: Warning: attempt to set SIGSTOP handler in __NR_sigaction.
+Invalid argument
+getting signal 19: Success
+
+setting signal 20: Success
+getting signal 20: Success
+
+setting signal 21: Success
+getting signal 21: Success
+
+setting signal 22: Success
+getting signal 22: Success
+
+setting signal 23: Success
+getting signal 23: Success
+
+setting signal 24: Success
+getting signal 24: Success
+
+setting signal 25: Success
+getting signal 25: Success
+
+setting signal 26: Success
+getting signal 26: Success
+
+setting signal 27: Success
+getting signal 27: Success
+
+setting signal 28: Success
+getting signal 28: Success
+
+setting signal 29: Success
+getting signal 29: Success
+
+setting signal 30: Success
+getting signal 30: Success
+
+setting signal 31: Success
+getting signal 31: Success
+
+setting signal 32: Success
+getting signal 32: Success
+
+setting signal 33: Success
+getting signal 33: Success
+
+setting signal 34: Success
+getting signal 34: Success
+
+setting signal 35: Success
+getting signal 35: Success
+
+setting signal 36: Success
+getting signal 36: Success
+
+setting signal 37: Success
+getting signal 37: Success
+
+setting signal 38: Success
+getting signal 38: Success
+
+setting signal 39: Success
+getting signal 39: Success
+
+setting signal 40: Success
+getting signal 40: Success
+
+setting signal 41: Success
+getting signal 41: Success
+
+setting signal 42: Success
+getting signal 42: Success
+
+setting signal 43: Success
+getting signal 43: Success
+
+setting signal 44: Success
+getting signal 44: Success
+
+setting signal 45: Success
+getting signal 45: Success
+
+setting signal 46: Success
+getting signal 46: Success
+
+setting signal 47: Success
+getting signal 47: Success
+
+setting signal 48: Success
+getting signal 48: Success
+
+setting signal 49: Success
+getting signal 49: Success
+
+setting signal 50: Success
+getting signal 50: Success
+
+setting signal 51: Success
+getting signal 51: Success
+
+setting signal 52: Success
+getting signal 52: Success
+
+setting signal 53: Success
+getting signal 53: Success
+
+setting signal 54: Success
+getting signal 54: Success
+
+setting signal 55: Success
+getting signal 55: Success
+
+setting signal 56: Success
+getting signal 56: Success
+
+setting signal 57: Success
+getting signal 57: Success
+
+setting signal 58: Success
+getting signal 58: Success
+
+setting signal 59: Success
+getting signal 59: Success
+
+setting signal 60: Success
+getting signal 60: Success
+
+setting signal 61: Success
+getting signal 61: Success
+
+setting signal 62: Success
+getting signal 62: Success
+
+setting signal 63: Success
+getting signal 63: Success
+
+setting signal 64: Success
+getting signal 64: Success
+
+setting signal 65: Warning: bad signal number 65 in __NR_sigaction.
+Invalid argument
+getting signal 65: Warning: bad signal number 65 in __NR_sigaction.
+Invalid argument
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
diff --git a/corecheck/tests/sigkill.stderr.exp.hd b/corecheck/tests/sigkill.stderr.exp.hd
new file mode 100644
index 0000000..69b457f
--- /dev/null
+++ b/corecheck/tests/sigkill.stderr.exp.hd
@@ -0,0 +1,206 @@
+
+setting signal 1: Success
+getting signal 1: Success
+
+setting signal 2: Success
+getting signal 2: Success
+
+setting signal 3: Success
+getting signal 3: Success
+
+setting signal 4: Success
+getting signal 4: Success
+
+setting signal 5: Success
+getting signal 5: Success
+
+setting signal 6: Success
+getting signal 6: Success
+
+setting signal 7: Success
+getting signal 7: Success
+
+setting signal 8: Success
+getting signal 8: Success
+
+setting signal 9: Warning: attempt to set SIGKILL handler in __NR_sigaction.
+Invalid argument
+getting signal 9: Success
+
+setting signal 10: Success
+getting signal 10: Success
+
+setting signal 11: Success
+getting signal 11: Success
+
+setting signal 12: Success
+getting signal 12: Success
+
+setting signal 13: Success
+getting signal 13: Success
+
+setting signal 14: Success
+getting signal 14: Success
+
+setting signal 15: Success
+getting signal 15: Success
+
+setting signal 16: Success
+getting signal 16: Success
+
+setting signal 17: Success
+getting signal 17: Success
+
+setting signal 18: Success
+getting signal 18: Success
+
+setting signal 19: Warning: attempt to set SIGSTOP handler in __NR_sigaction.
+Invalid argument
+getting signal 19: Success
+
+setting signal 20: Success
+getting signal 20: Success
+
+setting signal 21: Success
+getting signal 21: Success
+
+setting signal 22: Success
+getting signal 22: Success
+
+setting signal 23: Success
+getting signal 23: Success
+
+setting signal 24: Success
+getting signal 24: Success
+
+setting signal 25: Success
+getting signal 25: Success
+
+setting signal 26: Success
+getting signal 26: Success
+
+setting signal 27: Success
+getting signal 27: Success
+
+setting signal 28: Success
+getting signal 28: Success
+
+setting signal 29: Success
+getting signal 29: Success
+
+setting signal 30: Success
+getting signal 30: Success
+
+setting signal 31: Success
+getting signal 31: Success
+
+setting signal 32: Success
+getting signal 32: Success
+
+setting signal 33: Success
+getting signal 33: Success
+
+setting signal 34: Success
+getting signal 34: Success
+
+setting signal 35: Success
+getting signal 35: Success
+
+setting signal 36: Success
+getting signal 36: Success
+
+setting signal 37: Success
+getting signal 37: Success
+
+setting signal 38: Success
+getting signal 38: Success
+
+setting signal 39: Success
+getting signal 39: Success
+
+setting signal 40: Success
+getting signal 40: Success
+
+setting signal 41: Success
+getting signal 41: Success
+
+setting signal 42: Success
+getting signal 42: Success
+
+setting signal 43: Success
+getting signal 43: Success
+
+setting signal 44: Success
+getting signal 44: Success
+
+setting signal 45: Success
+getting signal 45: Success
+
+setting signal 46: Success
+getting signal 46: Success
+
+setting signal 47: Success
+getting signal 47: Success
+
+setting signal 48: Success
+getting signal 48: Success
+
+setting signal 49: Success
+getting signal 49: Success
+
+setting signal 50: Success
+getting signal 50: Success
+
+setting signal 51: Success
+getting signal 51: Success
+
+setting signal 52: Success
+getting signal 52: Success
+
+setting signal 53: Success
+getting signal 53: Success
+
+setting signal 54: Success
+getting signal 54: Success
+
+setting signal 55: Success
+getting signal 55: Success
+
+setting signal 56: Success
+getting signal 56: Success
+
+setting signal 57: Success
+getting signal 57: Success
+
+setting signal 58: Success
+getting signal 58: Success
+
+setting signal 59: Success
+getting signal 59: Success
+
+setting signal 60: Success
+getting signal 60: Success
+
+setting signal 61: Success
+getting signal 61: Success
+
+setting signal 62: Success
+getting signal 62: Success
+
+setting signal 63: Success
+getting signal 63: Success
+
+setting signal 64: Success
+getting signal 64: Success
+
+setting signal 65: Warning: bad signal number 65 in __NR_sigaction.
+Invalid argument
+getting signal 65: Warning: bad signal number 65 in __NR_sigaction.
+Invalid argument
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/corecheck/tests/sigkill.vgtest b/corecheck/tests/sigkill.vgtest
new file mode 100644
index 0000000..a681430
--- /dev/null
+++ b/corecheck/tests/sigkill.vgtest
@@ -0,0 +1 @@
+prog: sigkill
diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am
index 60553dd..96911ed 100644
--- a/coregrind/Makefile.am
+++ b/coregrind/Makefile.am
@@ -1,15 +1,17 @@
+
+
 SUBDIRS = demangle . docs tests
 
 CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \
-		-Winline -Wall -Wshadow -O -fomit-frame-pointer -g
+		-Winline -Wall -Wshadow -O -fomit-frame-pointer @PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
 
-LDFLAGS = -Wl,-z -Wl,initfirst
+#LDFLAGS = -Wl,-z -Wl,initfirst
 
 INCLUDES = -I$(srcdir)/demangle
 
-bin_SCRIPTS = valgrind cachegrind vg_annotate
+bin_SCRIPTS = valgrind vg_annotate
 
 SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
 
@@ -26,60 +28,103 @@
 	PATCHES_APPLIED ACKNOWLEDGEMENTS \
 	README_KDE3_FOLKS README_PACKAGERS \
 	README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \
-	valgrind.spec valgrind.spec.in
+	valgrind.spec valgrind.spec.in \
+	vg_profile.c \
+	vg_cachesim_I1.c vg_cachesim_D1.c vg_cachesim_L2.c vg_cachesim_gen.c
 
-val_PROGRAMS = valgrind.so valgrinq.so libpthread.so
+val_PROGRAMS = \
+	valgrind.so \
+	valgrinq.so \
+	libpthread.so \
+	vgskin_memcheck.so \
+	vgskin_cachesim.so \
+	vgskin_eraser.so \
+	vgskin_addrcheck.so \
+	vgskin_none.so \
+	vgskin_lackey.so \
+	vgskin_corecheck.so
 
-libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c
+libpthread_so_SOURCES = \
+	vg_libpthread.c \
+	vg_libpthread_unimp.c
+libpthread_so_DEPENDENCIES = $(srcdir)/vg_libpthread.vs
+libpthread_so_LDFLAGS	   = -Werror -fno-omit-frame-pointer -UVG_LIBDIR -shared -fpic -Wl,-version-script $(srcdir)/vg_libpthread.vs
 
 valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+valgrinq_so_LDFLAGS = -shared
 
 valgrind_so_SOURCES = \
 	vg_clientfuncs.c \
 	vg_scheduler.c \
-        vg_cachesim.c \
 	vg_clientmalloc.c \
-	vg_clientperms.c \
+	vg_default.c \
 	vg_demangle.c \
 	vg_dispatch.S \
 	vg_errcontext.c \
 	vg_execontext.c \
 	vg_from_ucode.c \
 	vg_helpers.S \
+	vg_instrument.c \
 	vg_main.c \
 	vg_malloc2.c \
 	vg_memory.c \
 	vg_messages.c \
 	vg_mylibc.c \
 	vg_procselfmaps.c \
-	vg_profile.c \
+	vg_dummy_profile.c \
 	vg_signals.c \
 	vg_startup.S \
 	vg_symtab2.c \
-	vg_syscall_mem.c \
+	vg_syscalls.c \
 	vg_syscall.S \
 	vg_to_ucode.c \
 	vg_translate.c \
-	vg_transtab.c \
-	vg_vtagops.c
-
+	vg_transtab.c
+valgrind_so_LDFLAGS = -Wl,-z -Wl,initfirst -shared
 valgrind_so_LDADD = \
 	demangle/cp-demangle.o \
 	demangle/cplus-dem.o \
 	demangle/dyn-string.o \
 	demangle/safe-ctype.o
 
+vgskin_memcheck_so_SOURCES = \
+	vg_memcheck.c \
+	vg_memcheck_clientreqs.c \
+	vg_memcheck_errcontext.c \
+	vg_memcheck_from_ucode.c \
+	vg_memcheck_translate.c \
+	vg_memcheck_helpers.S
+vgskin_memcheck_so_LDFLAGS = -shared
+
+vgskin_cachesim_so_SOURCES = vg_cachesim.c
+vgskin_cachesim_so_LDFLAGS = -shared
+
+vgskin_eraser_so_SOURCES = vg_eraser.c
+vgskin_eraser_so_LDFLAGS = -shared
+
+vgskin_addrcheck_so_SOURCES = vg_addrcheck.c
+vgskin_addrcheck_so_LDFLAGS = -shared
+
+vgskin_none_so_SOURCES 	 = vg_none.c
+vgskin_none_so_LDFLAGS   = -shared
+
+vgskin_lackey_so_SOURCES = vg_lackey.c
+vgskin_lackey_so_LDFLAGS = -shared
+
+vgskin_corecheck_so_SOURCES = vg_corecheck.c
+vgskin_corecheck_so_LDFLAGS = -shared
+
 include_HEADERS = valgrind.h
 
 noinst_HEADERS = \
-        vg_cachesim_gen.c       \
-        vg_cachesim_I1.c        \
-        vg_cachesim_D1.c        \
-        vg_cachesim_L2.c        \
         vg_kerneliface.h        \
         vg_include.h            \
+        vg_skin.h               \
         vg_constants.h          \
-        vg_unsafe.h
+        vg_constants_skin.h     \
+        vg_unsafe.h		\
+	vg_memcheck_include.h	\
+	vg_memcheck.h
 
 MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) 
 
@@ -92,19 +137,40 @@
 vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS)
 	$(COMPILE) -fno-omit-frame-pointer -c $<
 
-valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
-	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
-		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+##valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+##		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
 
-valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
-	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
+##valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
+##	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
 
-libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
-	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
-		$(libpthread_so_OBJECTS) \
-		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+##libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
+##	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
+##		$(libpthread_so_OBJECTS) \
+##		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+
+##vgskin_memcheck.so$(EXEEXT): $(vgskin_memcheck_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_memcheck.so \
+##		$(vgskin_memcheck_so_OBJECTS)
+
+##vgskin_cachesim.so$(EXEEXT): $(vgskin_cachesim_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_cachesim.so \
+##		$(vgskin_cachesim_so_OBJECTS)
+
+##vgskin_eraser.so$(EXEEXT): $(vgskin_eraser_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_eraser.so \
+##		$(vgskin_eraser_so_OBJECTS)
+
+##vgskin_none.so$(EXEEXT): $(vgskin_none_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_none.so \
+##		$(vgskin_none_so_OBJECTS)
+
+##vgskin_lackey.so$(EXEEXT): $(vgskin_lackey_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_lackey.so \
+##		$(vgskin_lackey_so_OBJECTS)
 
 install-exec-hook:
 	$(mkinstalldirs) $(DESTDIR)$(valdir)
 	rm -f $(DESTDIR)$(valdir)/libpthread.so.0
 	$(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0
+
diff --git a/coregrind/arch/x86-linux/vg_libpthread.c b/coregrind/arch/x86-linux/vg_libpthread.c
index 994cdb7..5972dfa 100644
--- a/coregrind/arch/x86-linux/vg_libpthread.c
+++ b/coregrind/arch/x86-linux/vg_libpthread.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 /* ALL THIS CODE RUNS ON THE SIMULATED CPU.
@@ -257,6 +257,12 @@
    return 0;
 }
 
+int pthread_attr_getdetachstate(const pthread_attr_t *attr, int *detachstate)
+{
+   *detachstate = attr->__detachstate;
+   return 0;
+}
+
 int pthread_attr_setinheritsched(pthread_attr_t *attr, int inherit)
 {
    static int moans = N_MOANS;
@@ -1044,6 +1050,7 @@
 void __my_pthread_testcancel(void)
 {
    int res;
+   ensure_valgrind("__my_pthread_testcancel");
    VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
                            VG_USERREQ__TESTCANCEL,
                            0, 0, 0, 0);
@@ -1178,7 +1185,7 @@
       if (n_now != n_orig) break;
 
       nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 52 * 1000 * 1000; /* 52 milliseconds */
+      nanosleep_interval.tv_nsec = 12 * 1000 * 1000; /* 12 milliseconds */
       /* It's critical here that valgrind's nanosleep implementation
          is nonblocking. */
       (void)my_do_syscall2(__NR_nanosleep, 
@@ -1381,13 +1388,14 @@
 /* Relies on assumption that initial private data is NULL.  This
    should be fixed somehow. */
 
-/* The allowable keys (indices) (all 2 of them). 
+/* The allowable keys (indices) (all 3 of them). 
    From sysdeps/pthread/bits/libc-tsd.h
 */
-#define N_LIBC_TSD_EXTRA_KEYS 1
+#define N_LIBC_TSD_EXTRA_KEYS 0
 
 enum __libc_tsd_key_t { _LIBC_TSD_KEY_MALLOC = 0,
                         _LIBC_TSD_KEY_DL_ERROR,
+                        _LIBC_TSD_KEY_RPC_VARS,
                         _LIBC_TSD_KEY_N };
 
 /* Auto-initialising subsystem.  libc_specifics_inited is set 
@@ -1877,6 +1885,10 @@
 }
 
 
+pid_t __vfork(void)
+{
+   return __fork();
+}
 
 
 /* ---------------------------------------------------------------------
@@ -1965,7 +1977,7 @@
    Basic idea is: modify the timeout parameter to select so that it
    returns immediately.  Poll like this until select returns non-zero,
    indicating something interesting happened, or until our time is up.
-   Space out the polls with nanosleeps of say 20 milliseconds, which
+   Space out the polls with nanosleeps of say 11 milliseconds, which
    is required to be nonblocking; this allows other threads to run.  
 
    Assumes:
@@ -2083,7 +2095,7 @@
       /* fprintf(stderr, "MY_SELECT: nanosleep\n"); */
       /* nanosleep and go round again */
       nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 50 * 1000 * 1000; /* 50 milliseconds */
+      nanosleep_interval.tv_nsec = 11 * 1000 * 1000; /* 11 milliseconds */
       /* It's critical here that valgrind's nanosleep implementation
          is nonblocking. */
       res = my_do_syscall2(__NR_nanosleep, 
@@ -2193,7 +2205,7 @@
       /* fprintf(stderr, "MY_POLL: nanosleep\n"); */
       /* nanosleep and go round again */
       nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 51 * 1000 * 1000; /* 51 milliseconds */
+      nanosleep_interval.tv_nsec = 13 * 1000 * 1000; /* 13 milliseconds */
       /* It's critical here that valgrind's nanosleep implementation
          is nonblocking. */
       (void)my_do_syscall2(__NR_nanosleep, 
@@ -2810,6 +2822,7 @@
 weak_alias (__pread64, pread64)
 weak_alias (__pwrite64, pwrite64)
 weak_alias(__fork, fork)
+weak_alias(__vfork, vfork)
 
 weak_alias (__pthread_kill_other_threads_np, pthread_kill_other_threads_np)
 
diff --git a/coregrind/arch/x86-linux/vg_libpthread_unimp.c b/coregrind/arch/x86-linux/vg_libpthread_unimp.c
index f413887..f3938ec 100644
--- a/coregrind/arch/x86-linux/vg_libpthread_unimp.c
+++ b/coregrind/arch/x86-linux/vg_libpthread_unimp.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 /* ---------------------------------------------------------------------
@@ -82,7 +82,7 @@
 //void longjmp ( void )  { unimp("longjmp"); }
 //void pthread_atfork ( void )  { unimp("pthread_atfork"); }
 //void pthread_attr_destroy ( void )  { unimp("pthread_attr_destroy"); }
-void pthread_attr_getdetachstate ( void )  { unimp("pthread_attr_getdetachstate"); }
+//void pthread_attr_getdetachstate ( void )  { unimp("pthread_attr_getdetachstate"); }
 void pthread_attr_getinheritsched ( void )  { unimp("pthread_attr_getinheritsched"); }
 //void pthread_attr_getschedparam ( void )  { unimp("pthread_attr_getschedparam"); }
 //void pthread_attr_getschedpolicy ( void )  { unimp("pthread_attr_getschedpolicy"); }
diff --git a/coregrind/arch/x86-linux/vg_syscall.S b/coregrind/arch/x86-linux/vg_syscall.S
index adabbed..52d6091 100644
--- a/coregrind/arch/x86-linux/vg_syscall.S
+++ b/coregrind/arch/x86-linux/vg_syscall.S
@@ -26,7 +26,7 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.
 
-  The GNU General Public License is contained in the file LICENSE.
+  The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_constants.h"
diff --git a/coregrind/demangle/cp-demangle.c b/coregrind/demangle/cp-demangle.c
index 5cf99c8..8d91d29 100644
--- a/coregrind/demangle/cp-demangle.c
+++ b/coregrind/demangle/cp-demangle.c
@@ -46,9 +46,9 @@
 #include "demangle.h"
 
 #ifndef STANDALONE
-#define malloc(s) VG_(malloc)(VG_AR_DEMANGLE, s)
-#define free(p) VG_(free)(VG_AR_DEMANGLE, p)
-#define realloc(p,s) VG_(realloc)(VG_AR_DEMANGLE, p, s)
+#define malloc(s)    VG_(arena_malloc) (VG_AR_DEMANGLE, s)
+#define free(p)      VG_(arena_free)   (VG_AR_DEMANGLE, p)
+#define realloc(p,s) VG_(arena_realloc)(VG_AR_DEMANGLE, p, /*alignment*/4, s)
 #endif
 
 /* If CP_DEMANGLE_DEBUG is defined, a trace of the grammar evaluation,
@@ -1406,7 +1406,7 @@
   }
 
   if (base == 36) {
-     *value = VG_(atoll36) (dyn_string_buf (number));
+     *value = VG_(atoll36) (36, dyn_string_buf (number));
   } else {
      *value = VG_(atoll) (dyn_string_buf (number));
   }
diff --git a/coregrind/demangle/cplus-dem.c b/coregrind/demangle/cplus-dem.c
index 56c3261..959dbd3 100644
--- a/coregrind/demangle/cplus-dem.c
+++ b/coregrind/demangle/cplus-dem.c
@@ -70,17 +70,18 @@
 #endif
 
 #ifndef STANDALONE
-#define xstrdup(ptr) VG_(strdup)(VG_AR_DEMANGLE, ptr)
-#define free(ptr) VG_(free)(VG_AR_DEMANGLE, ptr)
-#define xmalloc(size) VG_(malloc)(VG_AR_DEMANGLE, size)
-#define xrealloc(ptr, size) VG_(realloc)(VG_AR_DEMANGLE, ptr, size)
+#define xstrdup(ptr)        VG_(arena_strdup) (VG_AR_DEMANGLE, ptr)
+#define free(ptr)           VG_(arena_free)   (VG_AR_DEMANGLE, ptr)
+#define xmalloc(size)       VG_(arena_malloc) (VG_AR_DEMANGLE, size)
+#define xrealloc(ptr, size) VG_(arena_realloc)(VG_AR_DEMANGLE, ptr, \
+                                               /*align*/4, size)
 #define abort() vg_assert(0)
 #undef strstr
-#define strstr VG_(strstr)
+#define strstr  VG_(strstr)
 #define sprintf VG_(sprintf)
 #define strncpy VG_(strncpy)
 #define strncat VG_(strncat)
-#define strchr VG_(strchr)
+#define strchr  VG_(strchr)
 #define strpbrk VG_(strpbrk)
 #endif
 
diff --git a/coregrind/demangle/dyn-string.c b/coregrind/demangle/dyn-string.c
index aaa7e36..d6130a1 100644
--- a/coregrind/demangle/dyn-string.c
+++ b/coregrind/demangle/dyn-string.c
@@ -36,9 +36,9 @@
 #include "dyn-string.h"
 
 #ifndef STANDALONE
-#define malloc(s) VG_(malloc)(VG_AR_DEMANGLE, s)
-#define free(p) VG_(free)(VG_AR_DEMANGLE, p)
-#define realloc(p,s) VG_(realloc)(VG_AR_DEMANGLE, p, s)
+#define malloc(s)    VG_(arena_malloc) (VG_AR_DEMANGLE, s)
+#define free(p)      VG_(arena_free)   (VG_AR_DEMANGLE, p)
+#define realloc(p,s) VG_(arena_realloc)(VG_AR_DEMANGLE, p, /*alignment*/4, s)
 #endif
 
 /* If this file is being compiled for inclusion in the C++ runtime
diff --git a/coregrind/docs/manual.html b/coregrind/docs/manual.html
index b715ee3..95fe840 100644
--- a/coregrind/docs/manual.html
+++ b/coregrind/docs/manual.html
@@ -345,7 +345,7 @@
 </pre>
 
 <p>Note that Valgrind also reads options from the environment variable
-<code>$VALGRIND</code>, and processes them before the command-line
+<code>$VALGRIND_OPTS</code>, and processes them before the command-line
 options.
 
 <p>Valgrind's default settings succeed in giving reasonable behaviour
@@ -838,8 +838,8 @@
   <li>The contents of malloc'd blocks, before you write something
       there.  In C++, the new operator is a wrapper round malloc, so
       if you create an object with new, its fields will be
-      uninitialised until you fill them in, which is only Right and
-      Proper.</li>
+      uninitialised until you (or the constructor) fill them in, which
+      is only Right and Proper.</li>
 </ul>
 
 
@@ -1066,16 +1066,16 @@
       <p>
 
   <li>The "immediate location" specification.  For Value and Addr
-      errors, is either the name of the function in which the error
-      occurred, or, failing that, the full path the the .so file
-      containing the error location.  For Param errors, is the name of
-      the offending system call parameter.  For Free errors, is the
-      name of the function doing the freeing (eg, <code>free</code>,
-      <code>__builtin_vec_delete</code>, etc)</li><br>
+      errors, it is either the name of the function in which the error
+      occurred, or, failing that, the full path of the .so file or
+      executable containing the error location.  For Param errors,
+      is the name of the offending system call parameter.  For Free
+      errors, is the name of the function doing the freeing (eg,
+      <code>free</code>, <code>__builtin_vec_delete</code>, etc)</li><br>
       <p>
 
   <li>The caller of the above "immediate location".  Again, either a
-      function or shared-object name.</li><br>
+      function or shared-object/executable name.</li><br>
       <p>
 
   <li>Optionally, one or two extra calling-function or object names,
@@ -1083,8 +1083,8 @@
 </ul>
 
 <p>
-Locations may be either names of shared objects or wildcards matching
-function names.  They begin <code>obj:</code> and <code>fun:</code>
+Locations may be either names of shared objects/executables or wildcards
+matching function names.  They begin <code>obj:</code> and <code>fun:</code>
 respectively.  Function and object names to match against may use the 
 wildcard characters <code>*</code> and <code>?</code>.
 
@@ -1617,11 +1617,11 @@
 
   <li>If the new size is smaller, the dropped-off section is marked as
       unaddressible.  You may only pass to realloc a pointer
-      previously issued to you by malloc/calloc/new/realloc.</li><br>
+      previously issued to you by malloc/calloc/realloc.</li><br>
       <p>
 
   <li>free/delete: you may only pass to free a pointer previously
-      issued to you by malloc/calloc/new/realloc, or the value
+      issued to you by malloc/calloc/realloc, or the value
       NULL. Otherwise, Valgrind complains.  If the pointer is indeed
       valid, Valgrind marks the entire area it points at as
       unaddressible, and places the block in the freed-blocks-queue.
@@ -2058,7 +2058,9 @@
   <li>Run your program with <code>cachegrind</code> in front of the
       normal command line invocation.  When the program finishes,
       Valgrind will print summary cache statistics. It also collects
-      line-by-line information in a file <code>cachegrind.out</code>.
+      line-by-line information in a file
+      <code>cachegrind.out.<i>pid</i></code>, where <code><i>pid</i></code>
+      is the program's process id.
       <p>
       This step should be done every time you want to collect
       information about a new program, a changed program, or about the
@@ -2197,15 +2199,17 @@
 
 As well as printing summary information, Cachegrind also writes
 line-by-line cache profiling information to a file named
-<code>cachegrind.out</code>.  This file is human-readable, but is best
-interpreted by the accompanying program <code>vg_annotate</code>,
+<code>cachegrind.out.<i>pid</i></code>.  This file is human-readable, but is
+best interpreted by the accompanying program <code>vg_annotate</code>,
 described in the next section.
 <p>
-Things to note about the <code>cachegrind.out</code> file:
+Things to note about the <code>cachegrind.out.<i>pid</i></code> file:
 <ul>
   <li>It is written every time <code>valgrind --cachesim=yes</code> or
       <code>cachegrind</code> is run, and will overwrite any existing
-      <code>cachegrind.out</code> in the current directory.</li>
+      <code>cachegrind.out.<i>pid</i></code> in the current directory (but
+      that won't happen very often because it takes some time for process ids
+      to be recycled).</li>
   <p>
   <li>It can be huge: <code>ls -l</code> generates a file of about
       350KB.  Browsing a few files and web pages with a Konqueror
@@ -2213,6 +2217,13 @@
       of around 15 MB.</li>
 </ul>
 
+Note that older versions of Cachegrind used a log file named
+<code>cachegrind.out</code> (i.e. no <code><i>.pid</i></code> suffix).
+The suffix serves two purposes.  Firstly, it means you don't have to rename old
+log files that you don't want to overwrite.  Secondly, and more importantly,
+it allows correct profiling with the <code>--trace-children=yes</code> option
+of programs that spawn child processes.
+
 <a name="profileflags"></a>
 <h3>7.5&nbsp; Cachegrind options</h3>
 Cachegrind accepts all the options that Valgrind does, although some of them
@@ -2245,9 +2256,13 @@
 window to be at least 120-characters wide if possible, as the output
 lines can be quite long.
 <p>
-To get a function-by-function summary, run <code>vg_annotate</code> in
-directory containing a <code>cachegrind.out</code> file.  The output
-looks like this:
+To get a function-by-function summary, run <code>vg_annotate
+--<i>pid</i></code> in a directory containing a
+<code>cachegrind.out.<i>pid</i></code> file.  The <code>--<i>pid</i></code>
+is required so that <code>vg_annotate</code> knows which log file to use when
+several are present.
+<p>
+The output looks like this:
 
 <pre>
 --------------------------------------------------------------------------------
@@ -2468,8 +2483,9 @@
 specific enough.
 
 Beware that vg_annotate can take some time to digest large
-<code>cachegrind.out</code> files, eg. 30 seconds or more.  Also beware that
-auto-annotation can produce a lot of output if your program is large!
+<code>cachegrind.out.<i>pid</i></code> files, e.g. 30 seconds or more.  Also
+beware that auto-annotation can produce a lot of output if your program is
+large!
 
 
 <h3>7.7&nbsp; Annotating assembler programs</h3>
@@ -2492,13 +2508,18 @@
 
 <h3>7.8&nbsp; <code>vg_annotate</code> options</h3>
 <ul>
+  <li><code>--<i>pid</i></code></li><p>
+
+      Indicates which <code>cachegrind.out.<i>pid</i></code> file to read.
+      Not actually an option -- it is required.
+    
   <li><code>-h, --help</code></li><p>
   <li><code>-v, --version</code><p>
 
       Help and version, as usual.</li>
 
   <li><code>--sort=A,B,C</code> [default: order in 
-      <code>cachegrind.out</code>]<p>
+      <code>cachegrind.out.<i>pid</i></code>]<p>
       Specifies the events upon which the sorting of the function-by-function
       entries will be based.  Useful if you want to concentrate on eg. I cache
       misses (<code>--sort=I1mr,I2mr</code>), or D cache misses
@@ -2506,10 +2527,10 @@
       (<code>--sort=D2mr,I2mr</code>).</li><p>
 
   <li><code>--show=A,B,C</code> [default: all, using order in
-      <code>cachegrind.out</code>]<p>
+      <code>cachegrind.out.<i>pid</i></code>]<p>
       Specifies which events to show (and the column order). Default is to use
-      all present in the <code>cachegrind.out</code> file (and use the order in
-      the file).</li><p>
+      all present in the <code>cachegrind.out.<i>pid</i></code> file (and use
+      the order in the file).</li><p>
 
   <li><code>--threshold=X</code> [default: 99%] <p>
       Sets the threshold for the function-by-function summary.  Functions are
@@ -2547,17 +2568,18 @@
 There are a couple of situations in which vg_annotate issues warnings.
 
 <ul>
-  <li>If a source file is more recent than the <code>cachegrind.out</code>
-      file.  This is because the information in <code>cachegrind.out</code> is
-      only recorded with line numbers, so if the line numbers change at all in
-      the source (eg. lines added, deleted, swapped), any annotations will be 
+  <li>If a source file is more recent than the
+      <code>cachegrind.out.<i>pid</i></code> file.  This is because the
+      information in <code>cachegrind.out.<i>pid</i></code> is only recorded
+      with line numbers, so if the line numbers change at all in the source
+      (eg.  lines added, deleted, swapped), any annotations will be
       incorrect.<p>
 
   <li>If information is recorded about line numbers past the end of a file.
       This can be caused by the above problem, ie. shortening the source file
-      while using an old <code>cachegrind.out</code> file.  If this happens,
-      the figures for the bogus lines are printed anyway (clearly marked as
-      bogus) in case they are important.</li><p>
+      while using an old <code>cachegrind.out.<i>pid</i></code> file.  If this
+      happens, the figures for the bogus lines are printed anyway (clearly
+      marked as bogus) in case they are important.</li><p>
 </ul>
 
 
@@ -2677,6 +2699,13 @@
       <blockquote><code>btsl %eax, %edx</code></blockquote>
 
       This should only happen rarely.
+      </li><p>
+
+  <li>FPU instructions with data sizes of 28 and 108 bytes (e.g.
+      <code>fsave</code>) are treated as though they only access 16 bytes.
+      These instructions seem to be rare so hopefully this won't affect
+      accuracy much.
+      </li><p>
 </ul>
 
 Another thing worth nothing is that results are very sensitive.  Changing the
diff --git a/coregrind/valgrind.in b/coregrind/valgrind.in
index 7b99277..4fee909 100755
--- a/coregrind/valgrind.in
+++ b/coregrind/valgrind.in
@@ -1,11 +1,37 @@
 #!/bin/sh
+##--------------------------------------------------------------------##
+##--- The startup script.                                 valgrind ---##
+##--------------------------------------------------------------------##
+
+#  This file is part of Valgrind, an x86 protected-mode emulator 
+#  designed for debugging and profiling binaries on x86-Unixes.
+#
+#  Copyright (C) 2002 Julian Seward
+#     jseward@acm.org
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License as
+#  published by the Free Software Foundation; either version 2 of the
+#  License, or (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+#  02111-1307, USA.
+#
+#  The GNU General Public License is contained in the file COPYING.
+
 
 # Should point to the installation directory
 prefix="@prefix@"
 exec_prefix="@exec_prefix@"
 VALGRIND="@libdir@/valgrind"
 
-
 # Other stuff ...
 version="@VERSION@"
 emailto="jseward@acm.org"
@@ -13,168 +39,57 @@
 # The default name of the suppressions file
 vgsupp="--suppressions=$VALGRIND/default.supp"
 
-# name we were invoked with
-vgname=`echo $0 | sed 's,^.*/,,'`
-
 # Valgrind options
 vgopts=
 
-# Prog and arg to run
-argopts=
+# --skin=<foo> arg, specifying skin used
+skin_arg=
 
-# Show usage info?
-dousage=0
-
-# show version info?
-doversion=0
-
-# Collect up args for Valgrind
+# Collect up args for Valgrind.  Only --version and --skin are intercepted 
+# here;  the rest are passed to vg_main.c.
 while [ $+ != 0 ]
 do
   arg=$1
   case "$arg" in
-#   options for the user
-    --help)                 dousage=1; break;;
-    --version)              doversion=1; break;;
-    --logfile-fd=*)         vgopts="$vgopts $arg"; shift;;
-    -v)                     vgopts="$vgopts $arg"; shift;;
-    --verbose)              vgopts="$vgopts -v"; shift;;
-    -q)                     vgopts="$vgopts $arg"; shift;;
-    --quiet)                vgopts="$vgopts $arg"; shift;;
-    --error-limit=no)       vgopts="$vgopts $arg"; shift;;
-    --error-limit=yes)      vgopts="$vgopts $arg"; shift;;
-    --check-addrVs=no)      vgopts="$vgopts $arg"; shift;;
-    --check-addrVs=yes)     vgopts="$vgopts $arg"; shift;;
-    --gdb-attach=no)        vgopts="$vgopts $arg"; shift;;
-    --gdb-attach=yes)       vgopts="$vgopts $arg"; shift;;
-    --demangle=no)          vgopts="$vgopts $arg"; shift;;
-    --demangle=yes)         vgopts="$vgopts $arg"; shift;;
-    --num-callers=*)        vgopts="$vgopts $arg"; shift;;
-    --partial-loads-ok=no)  vgopts="$vgopts $arg"; shift;;
-    --partial-loads-ok=yes) vgopts="$vgopts $arg"; shift;;
-    --leak-check=no)        vgopts="$vgopts $arg"; shift;;
-    --leak-check=yes)       vgopts="$vgopts $arg"; shift;;
-    --show-reachable=no)    vgopts="$vgopts $arg"; shift;;
-    --show-reachable=yes)   vgopts="$vgopts $arg"; shift;;
-    --leak-resolution=low)  vgopts="$vgopts $arg"; shift;;
-    --leak-resolution=med)  vgopts="$vgopts $arg"; shift;;
-    --leak-resolution=high) vgopts="$vgopts $arg"; shift;;
-    --sloppy-malloc=no)     vgopts="$vgopts $arg"; shift;;
-    --sloppy-malloc=yes)    vgopts="$vgopts $arg"; shift;;
-    --alignment=*)          vgopts="$vgopts $arg"; shift;;
-    --trace-children=no)    vgopts="$vgopts $arg"; shift;;
-    --trace-children=yes)   vgopts="$vgopts $arg"; shift;;
-    --workaround-gcc296-bugs=no)    vgopts="$vgopts $arg"; shift;;
-    --workaround-gcc296-bugs=yes)   vgopts="$vgopts $arg"; shift;;
-    --freelist-vol=*)       vgopts="$vgopts $arg"; shift;;
-    --suppressions=*)       vgopts="$vgopts $arg"; shift;;
-    --cachesim=yes)         vgopts="$vgopts $arg"; shift;;
-    --cachesim=no)          vgopts="$vgopts $arg"; shift;;
-    --I1=*,*,*)             vgopts="$vgopts $arg"; shift;;
-    --D1=*,*,*)             vgopts="$vgopts $arg"; shift;;
-    --L2=*,*,*)             vgopts="$vgopts $arg"; shift;;
-    --weird-hacks=*)        vgopts="$vgopts $arg"; shift;;
-#   options for debugging Valgrind
-    --sanity-level=*)       vgopts="$vgopts $arg"; shift;;
-    --single-step=yes)      vgopts="$vgopts $arg"; shift;;
-    --single-step=no)       vgopts="$vgopts $arg"; shift;;
-    --optimise=yes)         vgopts="$vgopts $arg"; shift;;
-    --optimise=no)          vgopts="$vgopts $arg"; shift;;
-    --instrument=yes)       vgopts="$vgopts $arg"; shift;;
-    --instrument=no)        vgopts="$vgopts $arg"; shift;;
-    --cleanup=yes)          vgopts="$vgopts $arg"; shift;;
-    --cleanup=no)           vgopts="$vgopts $arg"; shift;;
-    --smc-check=none)       vgopts="$vgopts $arg"; shift;;
-    --smc-check=some)       vgopts="$vgopts $arg"; shift;;
-    --smc-check=all)        vgopts="$vgopts $arg"; shift;;
-    --trace-syscalls=yes)   vgopts="$vgopts $arg"; shift;;
-    --trace-syscalls=no)    vgopts="$vgopts $arg"; shift;;
-    --trace-signals=yes)    vgopts="$vgopts $arg"; shift;;
-    --trace-signals=no)     vgopts="$vgopts $arg"; shift;;
-    --trace-symtab=yes)     vgopts="$vgopts $arg"; shift;;
-    --trace-symtab=no)      vgopts="$vgopts $arg"; shift;;
-    --trace-malloc=yes)     vgopts="$vgopts $arg"; shift;;
-    --trace-malloc=no)      vgopts="$vgopts $arg"; shift;;
-    --trace-sched=yes)      vgopts="$vgopts $arg"; shift;;
-    --trace-sched=no)       vgopts="$vgopts $arg"; shift;;
-    --trace-pthread=none)   vgopts="$vgopts $arg"; shift;;
-    --trace-pthread=some)   vgopts="$vgopts $arg"; shift;;
-    --trace-pthread=all)    vgopts="$vgopts $arg"; shift;;
-    --stop-after=*)         vgopts="$vgopts $arg"; shift;;
-    --dump-error=*)         vgopts="$vgopts $arg"; shift;;
-    -*)                     dousage=1; break;;
+    --version)              echo "valgrind-$version"; exit 1 ;;
+    --skin=*)               skin_arg=$arg;            shift;;
+    -*)                     vgopts="$vgopts $arg";    shift;;
     *)                      break;;
   esac
 done
 
-if [ z"$doversion" = z1 ]; then
-   echo "valgrind-$version"
+
+# Decide on the skin.  Default to memory checking if not specified.
+if [ z"$skin_arg" = z ]; then
+   skin=memcheck
+else
+   # Hack off the "--skin=" prefix.
+   skin=`echo $skin_arg | sed 's/--skin=//'`
+fi
+
+# Setup skin shared object.
+skin_so="vgskin_${skin}.so"
+if [ ! -r $VALGRIND/$skin_so ] ; then
+   echo
+   echo "Extension error:"
+   echo "  The shared library \`$skin_so' for the chosen"
+   echo "  skin \`$skin' could not be found in"
+   echo "  $VALGRIND"
+   echo
    exit 1
 fi
 
-if [ $# = 0 ] || [ z"$dousage" = z1 ]; then
-   echo
-   echo "usage: $vgname [options] prog-and-args"
-   echo
-   echo "  options for the user, with defaults in [ ], are:"
-   echo "    --help                    show this message"
-   echo "    --version                 show version"
-   echo "    -q --quiet                run silently; only print error msgs"
-   echo "    -v --verbose              be more verbose, incl counts of errors"
-   echo "    --gdb-attach=no|yes       start GDB when errors detected? [no]"
-   echo "    --demangle=no|yes         automatically demangle C++ names? [yes]"
-   echo "    --num-callers=<number>    show <num> callers in stack traces [4]"
-   echo "    --error-limit=no|yes      stop showing new errors if too many? [yes]"
-   echo "    --partial-loads-ok=no|yes too hard to explain here; see manual [yes]"
-   echo "    --leak-check=no|yes       search for memory leaks at exit? [no]"
-   echo "    --leak-resolution=low|med|high"
-   echo "                              amount of bt merging in leak check [low]"
-   echo "    --show-reachable=no|yes   show reachable blocks in leak check? [no]"
-   echo "    --sloppy-malloc=no|yes    round malloc sizes to next word? [no]"
-   echo "    --alignment=<number>      set minimum alignment of allocations [4]"
-   echo "    --trace-children=no|yes   Valgrind-ise child processes? [no]"
-   echo "    --logfile-fd=<number>     file descriptor for messages [2=stderr]"
-   echo "    --freelist-vol=<number>   volume of freed blocks queue [1000000]"
-   echo "    --workaround-gcc296-bugs=no|yes  self explanatory [no]"
-   echo "    --suppressions=<filename> suppress errors described in"
-   echo "                              suppressions file <filename>"
-   echo "    --check-addrVs=no|yes     experimental lighterweight checking? [yes]"
-   echo "                              yes == Valgrind's original behaviour"
-   echo "    --cachesim=no|yes         do cache profiling? [no]"
-   echo "    --I1=<size>,<assoc>,<line_size>  set I1 cache manually"
-   echo "    --D1=<size>,<assoc>,<line_size>  set D1 cache manually"
-   echo "    --L2=<size>,<assoc>,<line_size>  set L2 cache manually"
-   echo "    --weird-hacks=hack1,hack2,...  [no hacks selected]"
-   echo "         recognised hacks are: ioctl-VTIME truncate-writes"
-   echo ""
-   echo
-   echo "  options for debugging Valgrind itself are:"
-   echo "    --sanity-level=<number>   level of sanity checking to do [1]"
-   echo "    --single-step=no|yes      translate each instr separately? [no]"
-   echo "    --optimise=no|yes         improve intermediate code? [yes]"
-   echo "    --instrument=no|yes       actually do memory checks? [yes]"
-   echo "    --cleanup=no|yes          improve after instrumentation? [yes]"
-   echo "    --smc-check=none|some|all check writes for s-m-c? [some]"
-   echo "    --trace-syscalls=no|yes   show all system calls? [no]"
-   echo "    --trace-signals=no|yes    show signal handling details? [no]"
-   echo "    --trace-symtab=no|yes     show symbol table details? [no]"
-   echo "    --trace-malloc=no|yes     show client malloc details? [no]"
-   echo "    --trace-sched=no|yes      show thread scheduler details? [no]"
-   echo "    --trace-pthread=none|some|all  show pthread event details? [no]"
-   echo "    --stop-after=<number>     switch to real CPU after executing"
-   echo "                              <number> basic blocks [infinity]"
-   echo "    --dump-error=<number>     show translation for basic block"
-   echo "                              associated with <number>'th"
-   echo "                              error context [0=don't show any]"
-   echo
-   echo "  Extra options are read from env variable \$VALGRIND_OPTS"
-   echo
-   echo "  Valgrind is Copyright (C) 2000-2002 Julian Seward"
-   echo "  and licensed under the GNU General Public License, version 2."
-   echo "  Bug reports, feedback, admiration, abuse, etc, to: $emailto."
-   echo
-   exit 1
+VG_CMD="$@"
+VG_ARGS="$VALGRIND_OPTS $vgsupp $vgopts"
+
+# If no command given, act like -h was given so vg_main.c prints out
+# the usage string.  And set VG_CMD to be any program, doesn't matter which
+# because it won't be run anyway (we use 'true').
+if [ z"$VG_CMD" = z ] ; then
+   VG_ARGS="$VG_ARGS -h"
+   VG_CMD=true
 fi
+export VG_ARGS
 
 # A bit subtle.  The LD_PRELOAD added entry must be absolute
 # and not depend on LD_LIBRARY_PATH.  This is so that we can
@@ -182,13 +97,19 @@
 # libpthread.so fall out of visibility, independently of
 # whether valgrind.so is visible.
 
-VG_ARGS="$VALGRIND_OPTS $vgsupp $vgopts"
-export VG_ARGS
 LD_LIBRARY_PATH=$VALGRIND:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH
-LD_PRELOAD=$VALGRIND/valgrind.so:$LD_PRELOAD
+
+# Insert skin .so before valgrind.so to override template functions.
+LD_PRELOAD=$VALGRIND/$skin_so:$VALGRIND/valgrind.so:$LD_PRELOAD
 export LD_PRELOAD
 #LD_DEBUG=files
 #LD_DEBUG=symbols
 #export LD_DEBUG
-exec "$@"
+
+exec $VG_CMD
+
+##--------------------------------------------------------------------##
+##--- end                                                 valgrind ---##
+##--------------------------------------------------------------------##
+
diff --git a/coregrind/vg_clientfuncs.c b/coregrind/vg_clientfuncs.c
index c71b6db..b37059b 100644
--- a/coregrind/vg_clientfuncs.c
+++ b/coregrind/vg_clientfuncs.c
@@ -26,11 +26,10 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
 
 #include "valgrind.h"   /* for VALGRIND_MAGIC_SEQUENCE */
 
@@ -72,7 +71,7 @@
    the real one, this is because the dynamic linker is running the
    static initialisers for C++, before starting up Valgrind itself.
    In this case it is safe to route calls through to
-   VG_(malloc)/vg_free, since that is self-initialising.
+   VG_(arena_malloc)/VG_(arena_free), since they are self-initialising.
 
    Once Valgrind is initialised, vg_running_on_simd_CPU becomes True.
    The call needs to be transferred from the simulated CPU back to the
@@ -91,15 +90,16 @@
                   (UInt)VG_(running_on_simd_CPU), n );
    if (n < 0) {
       v = NULL;
-      VG_(message)(Vg_UserMsg, 
-         "Warning: silly arg (%d) to malloc()", n );
+      if (VG_(needs).core_errors)
+         VG_(message)(Vg_UserMsg, 
+                      "Warning: silly arg (%d) to malloc()", n );
    } else {
       if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
 
       if (VG_(running_on_simd_CPU)) {
          v = (void*)SIMPLE_REQUEST1(VG_USERREQ__MALLOC, n);
       } else {
-         v = VG_(malloc)(VG_AR_CLIENT, n);
+         v = VG_(arena_malloc)(VG_AR_CLIENT, n);
       }
    }
    if (VG_(clo_trace_malloc)) 
@@ -116,15 +116,16 @@
                   (UInt)VG_(running_on_simd_CPU), n );
    if (n < 0) {
       v = NULL;
-      VG_(message)(Vg_UserMsg, 
-         "Warning: silly arg (%d) to __builtin_new()", n );
+      if (VG_(needs).core_errors)
+         VG_(message)(Vg_UserMsg, 
+                      "Warning: silly arg (%d) to __builtin_new()", n );
    } else {
       if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
 
       if (VG_(running_on_simd_CPU)) {
          v = (void*)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_NEW, n);
       } else {
-         v = VG_(malloc)(VG_AR_CLIENT, n);
+         v = VG_(arena_malloc)(VG_AR_CLIENT, n);
       }
    }
    if (VG_(clo_trace_malloc)) 
@@ -147,15 +148,16 @@
                   (UInt)VG_(running_on_simd_CPU), n );
    if (n < 0) {
       v = NULL;
-      VG_(message)(Vg_UserMsg, 
-         "Warning: silly arg (%d) to __builtin_vec_new()", n );
+      if (VG_(needs).core_errors)
+         VG_(message)(Vg_UserMsg, 
+                      "Warning: silly arg (%d) to __builtin_vec_new()", n );
    } else {
       if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
 
       if (VG_(running_on_simd_CPU)) {
          v = (void*)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_VEC_NEW, n);
       } else {
-         v = VG_(malloc)(VG_AR_CLIENT, n);
+         v = VG_(arena_malloc)(VG_AR_CLIENT, n);
       }
    }
    if (VG_(clo_trace_malloc)) 
@@ -179,7 +181,7 @@
    if (VG_(running_on_simd_CPU)) {
       (void)SIMPLE_REQUEST1(VG_USERREQ__FREE, p);
    } else {
-      VG_(free)(VG_AR_CLIENT, p);      
+      VG_(arena_free)(VG_AR_CLIENT, p);      
    }
 }
 
@@ -193,7 +195,7 @@
    if (VG_(running_on_simd_CPU)) {
       (void)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_DELETE, p);
    } else {
-      VG_(free)(VG_AR_CLIENT, p);
+      VG_(arena_free)(VG_AR_CLIENT, p);
    }
 }
 
@@ -213,7 +215,7 @@
    if (VG_(running_on_simd_CPU)) {
       (void)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_VEC_DELETE, p);
    } else {
-      VG_(free)(VG_AR_CLIENT, p);
+      VG_(arena_free)(VG_AR_CLIENT, p);
    }
 }
 
@@ -232,13 +234,14 @@
                   (UInt)VG_(running_on_simd_CPU), nmemb, size );
    if (nmemb < 0 || size < 0) {
       v = NULL;
-      VG_(message)(Vg_UserMsg, "Warning: silly args (%d,%d) to calloc()", 
-                               nmemb, size );
+      if (VG_(needs).core_errors)
+         VG_(message)(Vg_UserMsg, "Warning: silly args (%d,%d) to calloc()", 
+                                  nmemb, size );
    } else {
       if (VG_(running_on_simd_CPU)) {
          v = (void*)SIMPLE_REQUEST2(VG_USERREQ__CALLOC, nmemb, size);
       } else {
-         v = VG_(calloc)(VG_AR_CLIENT, nmemb, size);
+         v = VG_(arena_calloc)(VG_AR_CLIENT, nmemb, size);
       }
    }
    if (VG_(clo_trace_malloc)) 
@@ -269,7 +272,7 @@
    if (VG_(running_on_simd_CPU)) {
       v = (void*)SIMPLE_REQUEST2(VG_USERREQ__REALLOC, ptrV, new_size);
    } else {
-      v = VG_(realloc)(VG_AR_CLIENT, ptrV, new_size);
+      v = VG_(arena_realloc)(VG_AR_CLIENT, ptrV, /*alignment*/4, new_size);
    }
    if (VG_(clo_trace_malloc)) 
       VG_(printf)(" = %p\n", v );
@@ -292,7 +295,7 @@
       if (VG_(running_on_simd_CPU)) {
          v = (void*)SIMPLE_REQUEST2(VG_USERREQ__MEMALIGN, alignment, n);
       } else {
-         v = VG_(malloc_aligned)(VG_AR_CLIENT, alignment, n);
+         v = VG_(arena_malloc_aligned)(VG_AR_CLIENT, alignment, n);
       }
    }
    if (VG_(clo_trace_malloc)) 
@@ -579,7 +582,7 @@
 {
    int res;
    extern void __libc_freeres(void);
-   __libc_freeres();
+   //__libc_freeres();
    VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
                            VG_USERREQ__LIBC_FREERES_DONE, 0, 0, 0, 0);
    /*NOTREACHED*/
diff --git a/coregrind/vg_clientmalloc.c b/coregrind/vg_clientmalloc.c
index 0292aa4..0959843 100644
--- a/coregrind/vg_clientmalloc.c
+++ b/coregrind/vg_clientmalloc.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -38,15 +38,9 @@
 
 /* #define DEBUG_CLIENTMALLOC */
 
-/* Holds malloc'd but not freed blocks. */
+/* Holds malloc'd but not freed blocks.  Static, so zero-inited by default. */
 #define VG_MALLOCLIST_NO(aa) (((UInt)(aa)) % VG_N_MALLOCLISTS)
 static ShadowChunk* vg_malloclist[VG_N_MALLOCLISTS];
-static Bool         vg_client_malloc_init_done = False;
-
-/* Holds blocks after freeing. */
-static ShadowChunk* vg_freed_list_start   = NULL;
-static ShadowChunk* vg_freed_list_end     = NULL;
-static Int          vg_freed_list_volume  = 0;
 
 /* Stats ... */
 static UInt         vg_cmalloc_n_mallocs  = 0;
@@ -61,6 +55,105 @@
 /*--- Fns                                                  ---*/
 /*------------------------------------------------------------*/
 
+static __inline__
+Bool needs_shadow_chunks ( void )
+{
+   return VG_(needs).core_errors             ||
+          VG_(needs).alternative_free        ||
+          VG_(needs).sizeof_shadow_block > 0 ||
+          VG_(track_events).bad_free         ||
+          VG_(track_events).mismatched_free  ||
+          VG_(track_events).copy_mem_heap    ||
+          VG_(track_events).die_mem_heap;
+}
+
+#ifdef DEBUG_CLIENTMALLOC
+static 
+Int count_malloclists ( void )
+{
+   ShadowChunk* sc;
+   UInt ml_no;
+   Int  n = 0;
+
+   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) 
+      for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next)
+         n++;
+   return n;
+}
+#endif
+
+/*------------------------------------------------------------*/
+/*--- Shadow chunks, etc                                   ---*/
+/*------------------------------------------------------------*/
+
+/* Allocate a user-chunk of size bytes.  Also allocate its shadow
+   block, make the shadow block point at the user block.  Put the
+   shadow chunk on the appropriate list, and set all memory
+   protections correctly. */
+static void addShadowChunk ( ThreadState* tst,
+                             Addr p, UInt size, VgAllocKind kind )
+{
+   ShadowChunk* sc;
+   UInt         ml_no = VG_MALLOCLIST_NO(p);
+
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] addShadowChunk "
+               "( sz %d, addr %p, list %d )\n", 
+               count_malloclists(), 
+               0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+               size, p, ml_no );
+#  endif
+
+   sc = VG_(arena_malloc)(VG_AR_CORE, 
+                          sizeof(ShadowChunk)
+                           + VG_(needs).sizeof_shadow_block);
+   sc->size      = size;
+   sc->allockind = kind;
+   sc->data      = p;
+   /* Fill in any skin-specific shadow chunk stuff */
+   if (VG_(needs).sizeof_shadow_block > 0)
+      SK_(complete_shadow_chunk) ( sc, tst );
+
+   sc->next  = vg_malloclist[ml_no];
+   vg_malloclist[ml_no] = sc;
+}
+
+/* Get the sc, and return the address of the previous node's next pointer
+   which allows sc to be removed from the list later without having to look
+   it up again.  */
+static ShadowChunk* getShadowChunk ( Addr a, /*OUT*/ShadowChunk*** next_ptr )
+{
+   ShadowChunk *prev, *curr;
+   Int ml_no;
+   
+   ml_no = VG_MALLOCLIST_NO(a);
+
+   prev = NULL;
+   curr = vg_malloclist[ml_no];
+   while (True) {
+      if (curr == NULL) 
+         break;
+      if (a == curr->data)
+         break;
+      prev = curr;
+      curr = curr->next;
+   }
+
+   if (NULL == prev)
+      *next_ptr = &vg_malloclist[ml_no];
+   else
+      *next_ptr = &prev->next;
+
+   return curr;
+}
+
+void VG_(freeShadowChunk) ( ShadowChunk* sc )
+{
+   VG_(arena_free) ( VG_AR_CLIENT, (void*)sc->data );
+   VG_(arena_free) ( VG_AR_CORE,   sc );
+}
+
+
 /* Allocate a suitably-sized array, copy all the malloc-d block
    shadows into it, and return both the array and the size of it.
    This is used by the memory-leak detector.
@@ -78,8 +171,7 @@
    }
    if (*n_shadows == 0) return NULL;
 
-   arr = VG_(malloc)( VG_AR_PRIVATE, 
-                      *n_shadows * sizeof(ShadowChunk*) );
+   arr = VG_(malloc)( *n_shadows * sizeof(ShadowChunk*) );
 
    i = 0;
    for (scn = 0; scn < VG_N_MALLOCLISTS; scn++) {
@@ -91,405 +183,284 @@
    return arr;
 }
 
-static void client_malloc_init ( void )
+Bool VG_(addr_is_in_block)( Addr a, Addr start, UInt size )
+{
+   return (start - VG_AR_CLIENT_REDZONE_SZB <= a
+           && a < start + size + VG_AR_CLIENT_REDZONE_SZB);
+}
+
+/* Return the first shadow chunk satisfying the predicate p. */
+ShadowChunk* VG_(any_matching_mallocd_ShadowChunks)
+                        ( Bool (*p) ( ShadowChunk* ))
 {
    UInt ml_no;
-   if (vg_client_malloc_init_done) return;
+   ShadowChunk* sc;
+
    for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++)
-      vg_malloclist[ml_no] = NULL;
-   vg_client_malloc_init_done = True;
-}
-
-
-static __attribute__ ((unused))
-       Int count_freelist ( void )
-{
-   ShadowChunk* sc;
-   Int n = 0;
-   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
-      n++;
-   return n;
-}
-
-static __attribute__ ((unused))
-       Int count_malloclists ( void )
-{
-   ShadowChunk* sc;
-   UInt ml_no;
-   Int  n = 0;
-   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) 
       for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next)
-         n++;
-   return n;
-}
+         if (p(sc))
+            return sc;
 
-static __attribute__ ((unused))
-       void freelist_sanity ( void )
-{
-   ShadowChunk* sc;
-   Int n = 0;
-   /* VG_(printf)("freelist sanity\n"); */
-   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
-      n += sc->size;
-   vg_assert(n == vg_freed_list_volume);
-}
-
-/* Remove sc from malloc list # sc.  It is an unchecked error for
-   sc not to be present in the list. 
-*/
-static void remove_from_malloclist ( UInt ml_no, ShadowChunk* sc )
-{
-   ShadowChunk *sc1, *sc2;
-   if (sc == vg_malloclist[ml_no]) {
-      vg_malloclist[ml_no] = vg_malloclist[ml_no]->next;
-   } else {
-      sc1 = vg_malloclist[ml_no];
-      vg_assert(sc1 != NULL);
-      sc2 = sc1->next;
-      while (sc2 != sc) {
-         vg_assert(sc2 != NULL);
-         sc1 = sc2;
-         sc2 = sc2->next;
-      }
-      vg_assert(sc1->next == sc);
-      vg_assert(sc2 == sc);
-      sc1->next = sc2->next;
-   }
+   return NULL;
 }
 
 
-/* Put a shadow chunk on the freed blocks queue, possibly freeing up
-   some of the oldest blocks in the queue at the same time. */
-
-static void add_to_freed_queue ( ShadowChunk* sc )
-{
-   ShadowChunk* sc1;
-
-   /* Put it at the end of the freed list */
-   if (vg_freed_list_end == NULL) {
-      vg_assert(vg_freed_list_start == NULL);
-      vg_freed_list_end = vg_freed_list_start = sc;
-      vg_freed_list_volume = sc->size;
-   } else {
-      vg_assert(vg_freed_list_end->next == NULL);
-      vg_freed_list_end->next = sc;
-      vg_freed_list_end = sc;
-      vg_freed_list_volume += sc->size;
-   }
-   sc->next = NULL;
-
-   /* Release enough of the oldest blocks to bring the free queue
-      volume below vg_clo_freelist_vol. */
-
-   while (vg_freed_list_volume > VG_(clo_freelist_vol)) {
-      /* freelist_sanity(); */
-      vg_assert(vg_freed_list_start != NULL);
-      vg_assert(vg_freed_list_end != NULL);
-
-      sc1 = vg_freed_list_start;
-      vg_freed_list_volume -= sc1->size;
-      /* VG_(printf)("volume now %d\n", vg_freed_list_volume); */
-      vg_assert(vg_freed_list_volume >= 0);
-
-      if (vg_freed_list_start == vg_freed_list_end) {
-         vg_freed_list_start = vg_freed_list_end = NULL;
-      } else {
-         vg_freed_list_start = sc1->next;
-      }
-      sc1->next = NULL; /* just paranoia */
-      VG_(free)(VG_AR_CLIENT,  (void*)(sc1->data));
-      VG_(free)(VG_AR_PRIVATE, sc1);
-   }
-}
-
-
-/* Allocate a user-chunk of size bytes.  Also allocate its shadow
-   block, make the shadow block point at the user block.  Put the
-   shadow chunk on the appropriate list, and set all memory
-   protections correctly. */
-
-static ShadowChunk* client_malloc_shadow ( ThreadState* tst,
-                                           UInt align, UInt size, 
-                                           VgAllocKind kind )
-{
-   ShadowChunk* sc;
-   Addr         p;
-   UInt         ml_no;
-
-#  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_malloc_shadow ( al %d, sz %d )\n", 
-               count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               align, size );
-#  endif
-
-   vg_assert(align >= 4);
-   if (align == 4)
-      p = (Addr)VG_(malloc)(VG_AR_CLIENT, size);
-   else
-      p = (Addr)VG_(malloc_aligned)(VG_AR_CLIENT, align, size);
-
-   sc        = VG_(malloc)(VG_AR_PRIVATE, sizeof(ShadowChunk));
-   sc->where = VG_(get_ExeContext)(False, tst->m_eip, tst->m_ebp);
-   sc->size  = size;
-   sc->allockind = kind;
-   sc->data  = p;
-   ml_no     = VG_MALLOCLIST_NO(p);
-   sc->next  = vg_malloclist[ml_no];
-   vg_malloclist[ml_no] = sc;
-
-   VGM_(make_writable)(p, size);
-   VGM_(make_noaccess)(p + size, 
-                       VG_AR_CLIENT_REDZONE_SZB);
-   VGM_(make_noaccess)(p - VG_AR_CLIENT_REDZONE_SZB, 
-                       VG_AR_CLIENT_REDZONE_SZB);
-
-   return sc;
-}
-
+/*------------------------------------------------------------*/
+/*--- client_malloc(), etc                                 ---*/
+/*------------------------------------------------------------*/
 
 /* Allocate memory, noticing whether or not we are doing the full
    instrumentation thing. */
-
-void* VG_(client_malloc) ( ThreadState* tst, UInt size, VgAllocKind kind )
+static __inline__
+void* alloc_and_new_mem ( ThreadState* tst, UInt size, UInt alignment,
+                          Bool is_zeroed, VgAllocKind kind )
 {
-   ShadowChunk* sc;
+   Addr p;
 
    VGP_PUSHCC(VgpCliMalloc);
-   client_malloc_init();
-#  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_malloc ( %d, %x )\n", 
-               count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               size, raw_alloc_kind );
-#  endif
 
    vg_cmalloc_n_mallocs ++;
    vg_cmalloc_bs_mallocd += size;
 
-   if (!VG_(clo_instrument)) {
-      VGP_POPCC;
-      return VG_(malloc) ( VG_AR_CLIENT, size );
-   }
+   vg_assert(alignment >= 4);
+   if (alignment == 4)
+      p = (Addr)VG_(arena_malloc)(VG_AR_CLIENT, size);
+   else
+      p = (Addr)VG_(arena_malloc_aligned)(VG_AR_CLIENT, alignment, size);
 
-   sc = client_malloc_shadow ( tst, VG_(clo_alignment), size, kind );
-   VGP_POPCC;
-   return (void*)(sc->data);
+   if (needs_shadow_chunks())
+      addShadowChunk ( tst, p, size, kind );
+
+   VG_TRACK( ban_mem_heap, p-VG_AR_CLIENT_REDZONE_SZB, 
+                           VG_AR_CLIENT_REDZONE_SZB );
+   VG_TRACK( new_mem_heap, p, size, is_zeroed );
+   VG_TRACK( ban_mem_heap, p+size, VG_AR_CLIENT_REDZONE_SZB );
+
+   VGP_POPCC(VgpCliMalloc);
+   return (void*)p;
+}
+
+void* VG_(client_malloc) ( ThreadState* tst, UInt size, VgAllocKind kind )
+{
+   void* p = alloc_and_new_mem ( tst, size, VG_(clo_alignment), 
+                                 /*is_zeroed*/False, kind );
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_malloc ( %d, %x ) = %p\n", 
+               count_malloclists(), 
+               0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+               size, kind, p );
+#  endif
+   return p;
 }
 
 
 void* VG_(client_memalign) ( ThreadState* tst, UInt align, UInt size )
 {
-   ShadowChunk* sc;
-   VGP_PUSHCC(VgpCliMalloc);
-   client_malloc_init();
+   void* p = alloc_and_new_mem ( tst, size, align, 
+                                 /*is_zeroed*/False, Vg_AllocMalloc );
 #  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_memalign ( al %d, sz %d )\n", 
+   VG_(printf)("[m %d, f %d (%d)] client_memalign ( al %d, sz %d ) = %p\n", 
                count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               align, size );
+               0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+               align, size, p );
 #  endif
-
-   vg_cmalloc_n_mallocs ++;
-   vg_cmalloc_bs_mallocd += size;
-
-   if (!VG_(clo_instrument)) {
-      VGP_POPCC;
-      return VG_(malloc_aligned) ( VG_AR_CLIENT, align, size );
-   }
-   sc = client_malloc_shadow ( tst, align, size, Vg_AllocMalloc );
-   VGP_POPCC;
-   return (void*)(sc->data);
+   return p;
 }
 
 
-void VG_(client_free) ( ThreadState* tst, void* ptrV, VgAllocKind kind )
-{
-   ShadowChunk* sc;
-   UInt         ml_no;
-
-   VGP_PUSHCC(VgpCliMalloc);
-   client_malloc_init();
-#  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_free ( %p, %x )\n", 
-               count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               ptrV, raw_alloc_kind );
-#  endif
-
-   vg_cmalloc_n_frees ++;
-
-   if (!VG_(clo_instrument)) {
-      VGP_POPCC;
-      VG_(free) ( VG_AR_CLIENT, ptrV );
-      return;
-   }
-
-   /* first, see if ptrV is one vg_client_malloc gave out. */
-   ml_no = VG_MALLOCLIST_NO(ptrV);
-   vg_mlist_frees++;
-   for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
-      vg_mlist_tries++;
-      if ((Addr)ptrV == sc->data)
-         break;
-   }
-
-   if (sc == NULL) {
-      VG_(record_free_error) ( tst, (Addr)ptrV );
-      VGP_POPCC;
-      return;
-   }
-
-   /* check if its a matching free() / delete / delete [] */
-   if (kind != sc->allockind)
-      VG_(record_freemismatch_error) ( tst, (Addr) ptrV );
-
-   /* Remove the shadow chunk from the mallocd list. */
-   remove_from_malloclist ( ml_no, sc );
-
-   /* Declare it inaccessible. */
-   VGM_(make_noaccess) ( sc->data - VG_AR_CLIENT_REDZONE_SZB, 
-                         sc->size + 2*VG_AR_CLIENT_REDZONE_SZB );
-   VGM_(make_noaccess) ( (Addr)sc, sizeof(ShadowChunk) );
-   sc->where = VG_(get_ExeContext)(False, tst->m_eip, tst->m_ebp);
-
-   /* Put it out of harm's way for a while. */
-   add_to_freed_queue ( sc );
-   VGP_POPCC;
-}
-
-
-
 void* VG_(client_calloc) ( ThreadState* tst, UInt nmemb, UInt size1 )
 {
-   ShadowChunk* sc;
-   Addr         p;
-   UInt         size, i, ml_no;
+   void*        p;
+   UInt         size, i;
 
-   VGP_PUSHCC(VgpCliMalloc);
-   client_malloc_init();
+   size = nmemb * size1;
 
-#  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_calloc ( %d, %d )\n", 
-               count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               nmemb, size1 );
-#  endif
-
-   vg_cmalloc_n_mallocs ++;
-   vg_cmalloc_bs_mallocd += nmemb * size1;
-
-   if (!VG_(clo_instrument)) {
-      VGP_POPCC;
-      return VG_(calloc) ( VG_AR_CLIENT, nmemb, size1 );
-   }
-
-   size      = nmemb * size1;
-   p         = (Addr)VG_(malloc)(VG_AR_CLIENT, size);
-   sc        = VG_(malloc)(VG_AR_PRIVATE, sizeof(ShadowChunk));
-   sc->where = VG_(get_ExeContext)(False, tst->m_eip, tst->m_ebp);
-   sc->size  = size;
-   sc->allockind = Vg_AllocMalloc; /* its a lie - but true. eat this :) */
-   sc->data  = p;
-   ml_no     = VG_MALLOCLIST_NO(p);
-   sc->next  = vg_malloclist[ml_no];
-   vg_malloclist[ml_no] = sc;
-
-   VGM_(make_readable)(p, size);
-   VGM_(make_noaccess)(p + size, 
-                       VG_AR_CLIENT_REDZONE_SZB);
-   VGM_(make_noaccess)(p - VG_AR_CLIENT_REDZONE_SZB, 
-                       VG_AR_CLIENT_REDZONE_SZB);
-
+   p = alloc_and_new_mem ( tst, size, VG_(clo_alignment), 
+                              /*is_zeroed*/True, Vg_AllocMalloc );
+   /* Must zero block for calloc! */
    for (i = 0; i < size; i++) ((UChar*)p)[i] = 0;
 
-   VGP_POPCC;
-   return (void*)p;
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_calloc ( %d, %d ) = %p\n", 
+               count_malloclists(), 
+               0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+               nmemb, size1, p );
+#  endif
+
+   return p;
+}
+
+static
+void die_and_free_mem ( ThreadState* tst, ShadowChunk* sc,
+                        ShadowChunk** prev_chunks_next_ptr )
+{
+   /* Note: ban redzones again -- just in case user de-banned them
+      with a client request... */
+   VG_TRACK( ban_mem_heap, sc->data-VG_AR_CLIENT_REDZONE_SZB, 
+                           VG_AR_CLIENT_REDZONE_SZB );
+   VG_TRACK( die_mem_heap, sc->data, sc->size );
+   VG_TRACK( ban_mem_heap, sc->data+sc->size, VG_AR_CLIENT_REDZONE_SZB );
+
+   /* Remove sc from the malloclist using prev_chunks_next_ptr to
+      avoid repeating the hash table lookup.  Can't remove until at least
+      after free and free_mismatch errors are done because they use
+      describe_addr() which looks for it in malloclist. */
+   *prev_chunks_next_ptr = sc->next;
+
+   if (VG_(needs).alternative_free)
+      SK_(alt_free) ( sc, tst );
+   else
+      VG_(freeShadowChunk) ( sc );
 }
 
 
-void* VG_(client_realloc) ( ThreadState* tst, void* ptrV, UInt size_new )
+void VG_(client_free) ( ThreadState* tst, void* p, VgAllocKind kind )
 {
-   ShadowChunk *sc, *sc_new;
-   UInt         i, ml_no;
+   ShadowChunk*  sc;
+   ShadowChunk** prev_chunks_next_ptr;
 
    VGP_PUSHCC(VgpCliMalloc);
-   client_malloc_init();
 
 #  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_realloc ( %p, %d )\n", 
+   VG_(printf)("[m %d, f %d (%d)] client_free ( %p, %x )\n", 
                count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               ptrV, size_new );
+               0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+               p, kind );
 #  endif
 
    vg_cmalloc_n_frees ++;
-   vg_cmalloc_n_mallocs ++;
-   vg_cmalloc_bs_mallocd += size_new;
 
-   if (!VG_(clo_instrument)) {
-      vg_assert(ptrV != NULL && size_new != 0);
-      VGP_POPCC;
-      return VG_(realloc) ( VG_AR_CLIENT, ptrV, size_new );
-   }
+   if (! needs_shadow_chunks()) {
+      VG_(arena_free) ( VG_AR_CLIENT, p );
 
-   /* First try and find the block. */
-   ml_no = VG_MALLOCLIST_NO(ptrV);
-   for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
-      if ((Addr)ptrV == sc->data)
-         break;
-   }
-  
-   if (sc == NULL) {
-      VG_(record_free_error) ( tst, (Addr)ptrV );
-      /* Perhaps we should keep going regardless. */
-      VGP_POPCC;
-      return NULL;
-   }
-
-   if (sc->allockind != Vg_AllocMalloc) {
-      /* can not realloc a range that was allocated with new or new [] */
-      VG_(record_freemismatch_error) ( tst, (Addr)ptrV );
-      /* but keep going anyway */
-   }
-
-   if (sc->size == size_new) {
-      /* size unchanged */
-      VGP_POPCC;
-      return ptrV;
-   }
-   if (sc->size > size_new) {
-      /* new size is smaller */
-      VGM_(make_noaccess)( sc->data + size_new, 
-                           sc->size - size_new );
-      sc->size = size_new;
-      VGP_POPCC;
-      return ptrV;
    } else {
-      /* new size is bigger */
-      sc_new = client_malloc_shadow ( tst, VG_(clo_alignment), 
-                                      size_new, Vg_AllocMalloc );
-      for (i = 0; i < sc->size; i++)
-         ((UChar*)(sc_new->data))[i] = ((UChar*)(sc->data))[i];
-      VGM_(copy_address_range_perms) ( 
-         sc->data, sc_new->data, sc->size );
-      remove_from_malloclist ( VG_MALLOCLIST_NO(sc->data), sc );
-      VGM_(make_noaccess) ( sc->data - VG_AR_CLIENT_REDZONE_SZB, 
-                            sc->size + 2*VG_AR_CLIENT_REDZONE_SZB );
-      VGM_(make_noaccess) ( (Addr)sc, sizeof(ShadowChunk) );
-      add_to_freed_queue ( sc );
-      VGP_POPCC;
-      return (void*)sc_new->data;
-   }  
+      sc = getShadowChunk ( (Addr)p, &prev_chunks_next_ptr );
+
+      if (sc == NULL) {
+         VG_TRACK( bad_free, tst, (Addr)p );
+         VGP_POPCC(VgpCliMalloc);
+         return;
+      }
+
+      /* check if its a matching free() / delete / delete [] */
+      if (kind != sc->allockind)
+         VG_TRACK( mismatched_free, tst, (Addr)p );
+
+      die_and_free_mem ( tst, sc, prev_chunks_next_ptr );
+   } 
+   VGP_POPCC(VgpCliMalloc);
 }
 
 
-void VG_(clientmalloc_done) ( void )
+void* VG_(client_realloc) ( ThreadState* tst, void* p, UInt new_size )
+{
+   ShadowChunk  *sc;
+   ShadowChunk **prev_chunks_next_ptr;
+   UInt          i;
+
+   VGP_PUSHCC(VgpCliMalloc);
+
+   vg_cmalloc_n_frees ++;
+   vg_cmalloc_n_mallocs ++;
+   vg_cmalloc_bs_mallocd += new_size;
+
+   if (! needs_shadow_chunks()) {
+      vg_assert(p != NULL && new_size != 0);
+      p = VG_(arena_realloc) ( VG_AR_CLIENT, p, VG_(clo_alignment), 
+                               new_size );
+      VGP_POPCC(VgpCliMalloc);
+      return p;
+
+   } else {
+      /* First try and find the block. */
+      sc = getShadowChunk ( (Addr)p, &prev_chunks_next_ptr );
+
+      if (sc == NULL) {
+         VG_TRACK( bad_free, tst, (Addr)p );
+         /* Perhaps we should return to the program regardless. */
+         VGP_POPCC(VgpCliMalloc);
+         return NULL;
+      }
+     
+      /* check if its a matching free() / delete / delete [] */
+      if (Vg_AllocMalloc != sc->allockind) {
+         /* can not realloc a range that was allocated with new or new [] */
+         VG_TRACK( mismatched_free, tst, (Addr)p );
+         /* but keep going anyway */
+      }
+
+      if (sc->size == new_size) {
+         /* size unchanged */
+         VGP_POPCC(VgpCliMalloc);
+         return p;
+         
+      } else if (sc->size > new_size) {
+         /* new size is smaller */
+         VG_TRACK( die_mem_heap, sc->data+new_size, sc->size-new_size );
+         sc->size = new_size;
+         VGP_POPCC(VgpCliMalloc);
+#        ifdef DEBUG_CLIENTMALLOC
+         VG_(printf)("[m %d, f %d (%d)] client_realloc_smaller ( %p, %d ) = %p\n", 
+                     count_malloclists(), 
+                     0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+                     p, new_size, p );
+#        endif
+         return p;
+
+      } else {
+         /* new size is bigger */
+         Addr p_new;
+         
+         /* Get new memory */
+         vg_assert(VG_(clo_alignment) >= 4);
+         if (VG_(clo_alignment) == 4)
+            p_new = (Addr)VG_(arena_malloc)(VG_AR_CLIENT, new_size);
+         else
+            p_new = (Addr)VG_(arena_malloc_aligned)(VG_AR_CLIENT, 
+                                            VG_(clo_alignment), new_size);
+
+         /* First half kept and copied, second half new, 
+            red zones as normal */
+         VG_TRACK( ban_mem_heap, p_new-VG_AR_CLIENT_REDZONE_SZB, 
+                                 VG_AR_CLIENT_REDZONE_SZB );
+         VG_TRACK( copy_mem_heap, (Addr)p, p_new, sc->size );
+         VG_TRACK( new_mem_heap, p_new+sc->size, new_size-sc->size, 
+                   /*inited=*/False );
+         VG_TRACK( ban_mem_heap, p_new+new_size, VG_AR_CLIENT_REDZONE_SZB );
+
+         /* Copy from old to new */
+         for (i = 0; i < sc->size; i++)
+            ((UChar*)p_new)[i] = ((UChar*)p)[i];
+
+         /* Free old memory */
+         die_and_free_mem ( tst, sc, prev_chunks_next_ptr );
+
+         /* this has to be after die_and_free_mem, otherwise the
+            former succeeds in shorting out the new block, not the
+            old, in the case when both are on the same list.  */
+         addShadowChunk ( tst, p_new, new_size, Vg_AllocMalloc );
+
+         VGP_POPCC(VgpCliMalloc);
+#        ifdef DEBUG_CLIENTMALLOC
+         VG_(printf)("[m %d, f %d (%d)] client_realloc_bigger ( %p, %d ) = %p\n", 
+                     count_malloclists(), 
+                     0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+                     p, new_size, (void*)p_new );
+#        endif
+         return (void*)p_new;
+      }  
+   }
+}
+
+void VG_(print_malloc_stats) ( void )
 {
    UInt         nblocks, nbytes, ml_no;
    ShadowChunk* sc;
 
-   client_malloc_init();
+   if (VG_(clo_verbosity) == 0)
+      return;
+
+   vg_assert(needs_shadow_chunks());
 
    nblocks = nbytes = 0;
 
@@ -500,9 +471,6 @@
       }
    }
 
-   if (VG_(clo_verbosity) == 0)
-     return;
-
    VG_(message)(Vg_UserMsg, 
                 "malloc/free: in use at exit: %d bytes in %d blocks.",
                 nbytes, nblocks);
@@ -510,9 +478,6 @@
                 "malloc/free: %d allocs, %d frees, %d bytes allocated.",
                 vg_cmalloc_n_mallocs,
                 vg_cmalloc_n_frees, vg_cmalloc_bs_mallocd);
-   if (!VG_(clo_leak_check))
-      VG_(message)(Vg_UserMsg, 
-                   "For a detailed leak analysis,  rerun with: --leak-check=yes");
    if (0)
       VG_(message)(Vg_DebugMsg,
                    "free search: %d tries, %d frees", 
@@ -522,58 +487,6 @@
       VG_(message)(Vg_UserMsg, "");
 }
 
-
-/* Describe an address as best you can, for error messages,
-   putting the result in ai. */
-
-void VG_(describe_addr) ( Addr a, AddrInfo* ai )
-{
-   ShadowChunk* sc;
-   UInt         ml_no;
-   Bool         ok;
-   ThreadId     tid;
-
-   /* Perhaps it's a user-def'd block ? */
-   ok = VG_(client_perm_maybe_describe)( a, ai );
-   if (ok)
-      return;
-   /* Perhaps it's on a thread's stack? */
-   tid = VG_(identify_stack_addr)(a);
-   if (tid != VG_INVALID_THREADID) {
-      ai->akind     = Stack;
-      ai->stack_tid = tid;
-      return;
-   }
-   /* Search for a freed block which might bracket it. */
-   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next) {
-      if (sc->data - VG_AR_CLIENT_REDZONE_SZB <= a
-          && a < sc->data + sc->size + VG_AR_CLIENT_REDZONE_SZB) {
-         ai->akind      = Freed;
-         ai->blksize    = sc->size;
-         ai->rwoffset   = (Int)(a) - (Int)(sc->data);
-         ai->lastchange = sc->where;
-         return;
-      }
-   }
-   /* Search for a mallocd block which might bracket it. */
-   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) {
-      for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
-         if (sc->data - VG_AR_CLIENT_REDZONE_SZB <= a
-             && a < sc->data + sc->size + VG_AR_CLIENT_REDZONE_SZB) {
-            ai->akind      = Mallocd;
-            ai->blksize    = sc->size;
-            ai->rwoffset   = (Int)(a) - (Int)(sc->data);
-            ai->lastchange = sc->where;
-            return;
-         }
-      }
-   }
-   /* Clueless ... */
-   ai->akind = Unknown;
-   return;
-}
-
-
 /*--------------------------------------------------------------------*/
 /*--- end                                        vg_clientmalloc.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_constants.h b/coregrind/vg_constants.h
index d3da14b..abf7240 100644
--- a/coregrind/vg_constants.h
+++ b/coregrind/vg_constants.h
@@ -26,30 +26,17 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #ifndef __VG_CONSTANTS_H
 #define __VG_CONSTANTS_H
 
+#include "vg_constants_skin.h"
 
 /* This file is included in all Valgrind source files, including
    assembly ones. */
 
-/* All symbols externally visible from valgrind.so are prefixed
-   as specified here.  The prefix can be changed, so as to avoid
-   namespace conflict problems.
-*/
-#define VGAPPEND(str1,str2) str1##str2
-
-/* These macros should add different prefixes so the same base
-   name can safely be used across different macros. */
-#define VG_(str)    VGAPPEND(vgPlain_,str)
-#define VGM_(str)   VGAPPEND(vgMem_,str)
-#define VGP_(str)   VGAPPEND(vgProf_,str)
-#define VGOFF_(str) VGAPPEND(vgOff_,str)
-
-
 /* Magic values that %ebp might be set to when returning to the
    dispatcher.  The only other legitimate value is to point to the
    start of VG_(baseBlock).  These also are return values from
@@ -59,13 +46,12 @@
    returns to the dispatch loop.  TRC means that this value is a valid
    thread return code, which the dispatch loop may return to the
    scheduler.  */
-#define VG_TRC_EBP_JMP_STKADJ     17 /* EBP only; handled by dispatcher */
 #define VG_TRC_EBP_JMP_SYSCALL    19 /* EBP and TRC */
 #define VG_TRC_EBP_JMP_CLIENTREQ  23 /* EBP and TRC */
 
-#define VG_TRC_INNER_COUNTERZERO  29  /* TRC only; means bb ctr == 0 */
-#define VG_TRC_INNER_FASTMISS     31  /* TRC only; means fast-cache miss. */
-#define VG_TRC_UNRESUMABLE_SIGNAL 37  /* TRC only; got sigsegv/sigbus */
+#define VG_TRC_INNER_FASTMISS     31 /* TRC only; means fast-cache miss. */
+#define VG_TRC_INNER_COUNTERZERO  29 /* TRC only; means bb ctr == 0 */
+#define VG_TRC_UNRESUMABLE_SIGNAL 37 /* TRC only; got sigsegv/sigbus */
 
 
 /* Debugging hack for assembly code ... sigh. */
@@ -93,7 +79,7 @@
 /* Assembly code stubs make this request */
 #define VG_USERREQ__SIGNAL_RETURNS          0x4001
 
-#endif /* ndef __VG_INCLUDE_H */
+#endif /* ndef __VG_CONSTANTS_H */
 
 /*--------------------------------------------------------------------*/
 /*--- end                                           vg_constants.h ---*/
diff --git a/coregrind/vg_default.c b/coregrind/vg_default.c
new file mode 100644
index 0000000..a4b52ea
--- /dev/null
+++ b/coregrind/vg_default.c
@@ -0,0 +1,249 @@
+/*--------------------------------------------------------------------*/
+/*--- Default panicky definitions of template functions that skins ---*/
+/*--- should override.                                             ---*/
+/*---                                                vg_defaults.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+
+/* These functions aren't intended to be run.  Replacement functions used by
+ * the chosen skin are substituted by compiling the skin into a .so and
+ * LD_PRELOADing it.  Nasty :) */
+
+#include "vg_include.h"
+
+/* ---------------------------------------------------------------------
+   Error messages (for malformed skins)
+   ------------------------------------------------------------------ */
+
+/* If the skin fails to define one or more of the required functions,
+ * make it very clear what went wrong! */
+
+static __attribute__ ((noreturn))
+void fund_panic ( Char* fn )
+{
+   VG_(printf)(
+      "\nSkin error:\n"
+      "  The skin you have selected is missing the function `%s',\n"
+      "  which is required.\n\n",
+      fn);
+   VG_(skin_error)("Missing skin function");
+}
+
+static __attribute__ ((noreturn))
+void non_fund_panic ( Char* fn )
+{
+   VG_(printf)(
+      "\nSkin error:\n"
+      "  The skin you have selected is missing the function `%s'\n"
+      "  required by one of its needs.\n\n",
+      fn);
+   VG_(skin_error)("Missing skin function");
+}
+
+/* ---------------------------------------------------------------------
+   Fundamental template functions
+   ------------------------------------------------------------------ */
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* track)
+{
+   fund_panic("SK_(pre_clo_init)");
+}
+
+void SK_(post_clo_init)(void)
+{
+   fund_panic("SK_(post_clo_init)");
+}
+
+UCodeBlock* SK_(instrument)(UCodeBlock* cb, Addr not_used)
+{
+   fund_panic("SK_(instrument)");
+}
+
+void SK_(fini)(void)
+{
+   fund_panic("SK_(fini)");
+}
+
+/* ---------------------------------------------------------------------
+   For error reporting and suppression handling
+   ------------------------------------------------------------------ */
+
+Bool SK_(eq_SkinError)(VgRes res, SkinError* e1, SkinError* e2)
+{
+   non_fund_panic("SK_(eq_SkinError)");
+}
+
+void SK_(pp_SkinError)(SkinError* ec, void (*pp_ExeContext)(void))
+{
+   non_fund_panic("SK_(pp_SkinError)");
+}
+
+void SK_(dup_extra_and_update)(SkinError* ec)
+{
+   non_fund_panic("SK_(dup_extra_and_update)");
+}
+
+Bool SK_(recognised_suppression)(Char* name, SuppKind* skind)
+{
+   non_fund_panic("SK_(recognised_suppression)");
+}
+
+Bool SK_(read_extra_suppression_info)(Int fd, Char* buf, 
+                                       Int nBuf, SkinSupp *s)
+{
+   non_fund_panic("SK_(read_extra_suppression_info)");
+}
+
+Bool SK_(error_matches_suppression)(SkinError* ec, SkinSupp* su)
+{
+   non_fund_panic("SK_(error_matches_suppression)");
+}
+
+
+/* ---------------------------------------------------------------------
+   For throwing out basic block level info when code is invalidated
+   ------------------------------------------------------------------ */
+
+void SK_(discard_basic_block_info)(Addr a, UInt size)
+{
+   non_fund_panic("SK_(discard_basic_block_info)");
+}
+
+
+/* ---------------------------------------------------------------------
+   For throwing out basic block level info when code is invalidated
+   ------------------------------------------------------------------ */
+
+void SK_(written_shadow_regs_values)(UInt* gen_reg, UInt* eflags)
+{
+   non_fund_panic("SK_(written_shadow_regs_values)");
+}
+
+
+/* ---------------------------------------------------------------------
+   Command line arg template function
+   ------------------------------------------------------------------ */
+
+Bool SK_(process_cmd_line_option)(Char* argv)
+{
+   non_fund_panic("SK_(process_cmd_line_option)");
+}
+
+Char* SK_(usage)(void)
+{
+   non_fund_panic("SK_(usage)");
+}
+
+/* ---------------------------------------------------------------------
+   Client request template function
+   ------------------------------------------------------------------ */
+
+UInt SK_(handle_client_request)(ThreadState* tst, UInt* arg_block)
+{
+   non_fund_panic("SK_(handle_client_request)");
+}
+
+/* ---------------------------------------------------------------------
+   UCode extension
+   ------------------------------------------------------------------ */
+
+void SK_(emitExtUInstr)(UInstr* u, RRegSet regs_live_before)
+{
+   non_fund_panic("SK_(emitExtUInstr)");
+}
+
+Bool SK_(saneExtUInstr)(Bool beforeRA, Bool beforeLiveness, UInstr* u)
+{
+   non_fund_panic("SK_(saneExtUInstr)");
+}
+
+Char* SK_(nameExtUOpcode)(Opcode opc)
+{
+   non_fund_panic("SK_(nameExtUOpcode)");
+}
+
+void SK_(ppExtUInstr)(UInstr* u)
+{
+   non_fund_panic("SK_(ppExtUInstr)");
+}
+
+Int SK_(getExtRegUsage)(UInstr* u, Tag tag, RegUse* arr)
+{
+   non_fund_panic("SK_(getExtTempUsage)");
+}
+
+/* ---------------------------------------------------------------------
+   Syscall wrapping
+   ------------------------------------------------------------------ */
+
+void* SK_(pre_syscall)(ThreadId tid, UInt syscallno, Bool is_blocking)
+{
+   non_fund_panic("SK_(pre_syscall)");
+}
+
+void  SK_(post_syscall)(ThreadId tid, UInt syscallno,
+                         void* pre_result, Int res, Bool is_blocking)
+{
+   non_fund_panic("SK_(post_syscall)");
+}
+
+/* ---------------------------------------------------------------------
+   Shadow chunks
+   ------------------------------------------------------------------ */
+
+void SK_(complete_shadow_chunk)( ShadowChunk* sc, ThreadState* tst )
+{
+   non_fund_panic("SK_(complete_shadow_chunk)");
+}
+
+/* ---------------------------------------------------------------------
+   Alternative free()
+   ------------------------------------------------------------------ */
+
+void SK_(alt_free) ( ShadowChunk* sc, ThreadState* tst )
+{
+   non_fund_panic("SK_(alt_free)");
+}
+
+/* ---------------------------------------------------------------------
+   Sanity checks
+   ------------------------------------------------------------------ */
+
+Bool SK_(cheap_sanity_check)(void)
+{
+   non_fund_panic("SK_(cheap_sanity_check)");
+}
+
+Bool SK_(expensive_sanity_check)(void)
+{
+   non_fund_panic("SK_(expensive_sanity_check)");
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_defaults.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_demangle.c b/coregrind/vg_demangle.c
index f07f7f3..6dff76f 100644
--- a/coregrind/vg_demangle.c
+++ b/coregrind/vg_demangle.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -49,12 +49,14 @@
    Int   n_result  = 0;
    Char* demangled = NULL;
 
+   VGP_PUSHCC(VgpDemangle);
+
    if (VG_(clo_demangle))
       demangled = VG_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS );
 
    if (demangled) {
       ADD_TO_RESULT(demangled, VG_(strlen)(demangled));
-      VG_(free) (VG_AR_DEMANGLE, demangled);
+      VG_(arena_free) (VG_AR_DEMANGLE, demangled);
    } else {
       ADD_TO_RESULT(orig, VG_(strlen)(orig));
    }
@@ -65,6 +67,8 @@
    vg_assert(VG_(is_empty_arena)(VG_AR_DEMANGLE));
 
    /* VG_(show_all_arena_stats)(); */
+
+   VGP_POPCC(VgpDemangle);
 }
 
 
diff --git a/coregrind/vg_dispatch.S b/coregrind/vg_dispatch.S
index bd1c5b9..7cdb209 100644
--- a/coregrind/vg_dispatch.S
+++ b/coregrind/vg_dispatch.S
@@ -1,8 +1,8 @@
 
-##--------------------------------------------------------------------##
-##--- The core dispatch loop, for jumping to a code address.       ---##
-##---                                                vg_dispatch.S ---##
-##--------------------------------------------------------------------##
+/*--------------------------------------------------------------------*/
+/*--- The core dispatch loop, for jumping to a code address.       ---*/
+/*---                                                vg_dispatch.S ---*/
+/*--------------------------------------------------------------------*/
 
 /*
   This file is part of Valgrind, an x86 protected-mode emulator 
@@ -26,7 +26,7 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.
 
-  The GNU General Public License is contained in the file LICENSE.
+  The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_constants.h"
@@ -59,9 +59,9 @@
 	
 .globl VG_(run_innerloop)
 VG_(run_innerloop):
-	#OYNK(1000)
+	/* OYNK(1000) */
 
-	# ----- entry point to VG_(run_innerloop) -----
+	/* ----- entry point to VG_(run_innerloop) ----- */
 	pushl	%ebx
 	pushl	%ecx
 	pushl	%edx
@@ -69,74 +69,98 @@
 	pushl	%edi
 	pushl	%ebp
 
-	# Set up the baseBlock pointer
+	/* Set up the baseBlock pointer */
 	movl	$VG_(baseBlock), %ebp
 
-	# fetch m_eip into %eax
+	/* fetch m_eip into %eax */
 	movl	VGOFF_(m_eip), %esi
 	movl	(%ebp, %esi, 4), %eax
 	
-	# Start off dispatching paranoically, since we no longer have
-	# any indication whether or not this might be a special call/ret
-	# transfer.
-	jmp	dispatch_stkadj
-	
-	
 dispatch_main:
-	# Jump here to do a new dispatch.
-	# %eax holds destination (original) address.
-	# %ebp indicates further details of the control transfer
-	# requested to the address in %eax.
-	#
-	# If ebp == & VG_(baseBlock), just jump next to %eax.
-	# 
-	# If ebp == VG_EBP_JMP_SYSCALL, do a system call before 
-	# continuing at eax.
-	#
-	# If ebp == VG_EBP_JMP_CLIENTREQ, do a client request before 
-	# continuing at eax.
-	#
-	# If %ebp has any other value, we panic.
+	/* Jump here to do a new dispatch.
+	   %eax holds destination (original) address.
+	   %ebp indicates further details of the control transfer
+	   requested to the address in %eax.
 	
+	   If ebp == & VG_(baseBlock), just jump next to %eax.
+	 
+	   If ebp == VG_EBP_JMP_SYSCALL, do a system call before 
+	   continuing at eax.
+	
+	   If ebp == VG_EBP_JMP_CLIENTREQ, do a client request before 
+	   continuing at eax.
+	
+	   If %ebp has any other value, we panic.
+	*/
+	cmpl	$VG_(baseBlock), %ebp
+	jnz	dispatch_exceptional
+	/* fall into main loop */
+
+
+dispatch_boring:
+	/* save the jump address at VG_(baseBlock)[VGOFF_(m_eip)] */
+	movl	VGOFF_(m_eip), %esi
+	movl	%eax, (%ebp, %esi, 4)
+	/* Are we out of timeslice?  If yes, defer to scheduler. */
+	decl	VG_(dispatch_ctr)
+	jz	counter_is_zero
+	/* try a fast lookup in the translation cache */
+	movl	%eax, %ebx
+	andl	$VG_TT_FAST_MASK, %ebx	
+	/* ebx = tt_fast index */
+	movl	VG_(tt_fast)(,%ebx,4), %ebx	
+	/* ebx points at a tt entry
+	   now compare target with the tte.orig_addr field (+0) */
+	cmpl	%eax, (%ebx)
+	jnz	fast_lookup_failed
+#if 1
+	/* Found a match.  Set the tte.mru_epoch field (+8)
+	   and call the tte.trans_addr field (+4) */
+	movl	VG_(current_epoch), %ecx
+	movl	%ecx, 8(%ebx)
+#endif
+	call	*4(%ebx)
 	cmpl	$VG_(baseBlock), %ebp
 	jnz	dispatch_exceptional
 
-dispatch_boring:
-	# save the jump address at VG_(baseBlock)[VGOFF_(m_eip)],
+dispatch_boring_unroll2:
+	/* save the jump address at VG_(baseBlock)[VGOFF_(m_eip)] */
 	movl	VGOFF_(m_eip), %esi
 	movl	%eax, (%ebp, %esi, 4)
-	
-	# do a timeslice check.
-	# are we out of timeslice?  If yes, defer to scheduler.
-	#OYNK(1001)
+#if 1
+	/* Are we out of timeslice?  If yes, defer to scheduler. */
 	decl	VG_(dispatch_ctr)
 	jz	counter_is_zero
-
-	#OYNK(1002)
-	# try a fast lookup in the translation cache
+#endif
+	/* try a fast lookup in the translation cache */
 	movl	%eax, %ebx
 	andl	$VG_TT_FAST_MASK, %ebx	
-	# ebx = tt_fast index
+	/* ebx = tt_fast index */
 	movl	VG_(tt_fast)(,%ebx,4), %ebx	
-	# ebx points at a tt entry
-	# now compare target with the tte.orig_addr field (+0)
+	/* ebx points at a tt entry
+	   now compare target with the tte.orig_addr field (+0) */
 	cmpl	%eax, (%ebx)
 	jnz	fast_lookup_failed
-
-	# Found a match.  Set the tte.mru_epoch field (+8)
-	# and call the tte.trans_addr field (+4)
+#if 1
+	/* Found a match.  Set the tte.mru_epoch field (+8)
+	   and call the tte.trans_addr field (+4) */
 	movl	VG_(current_epoch), %ecx
 	movl	%ecx, 8(%ebx)
+#endif
 	call	*4(%ebx)
-	jmp	dispatch_main
+	cmpl	$VG_(baseBlock), %ebp
+	jz	dispatch_boring
+
+	jmp	dispatch_exceptional
+
 	
 fast_lookup_failed:
-	# %EIP is up to date here since dispatch_boring dominates
+	/* %EIP is up to date here since dispatch_boring dominates */
 	movl	$VG_TRC_INNER_FASTMISS, %eax
 	jmp	run_innerloop_exit
 
 counter_is_zero:
-	# %EIP is up to date here since dispatch_boring dominates
+	/* %EIP is up to date here since dispatch_boring dominates */
 	movl	$VG_TRC_INNER_COUNTERZERO, %eax
 	jmp	run_innerloop_exit
 	
@@ -155,21 +179,19 @@
    make it look cleaner. 
 */
 dispatch_exceptional:
-	# this is jumped to only, not fallen-through from above
-	cmpl	$VG_TRC_EBP_JMP_STKADJ, %ebp
-	jz	dispatch_stkadj
+	/* this is jumped to only, not fallen-through from above */
 	cmpl	$VG_TRC_EBP_JMP_SYSCALL, %ebp
 	jz	dispatch_syscall
 	cmpl	$VG_TRC_EBP_JMP_CLIENTREQ, %ebp
 	jz	dispatch_clientreq
 
-	# ebp has an invalid value ... crap out.
+	/* ebp has an invalid value ... crap out. */
 	pushl	$panic_msg_ebp
 	call	VG_(panic)
-	#	(never returns)
+	/* (never returns) */
 
 dispatch_syscall:
-	# save %eax in %EIP and defer to sched
+	/* save %eax in %EIP and defer to sched */
 	movl	$VG_(baseBlock), %ebp
 	movl	VGOFF_(m_eip), %esi
 	movl	%eax, (%ebp, %esi, 4)
@@ -177,29 +199,13 @@
 	jmp	run_innerloop_exit
 	
 dispatch_clientreq:
-	# save %eax in %EIP and defer to sched
+	/* save %eax in %EIP and defer to sched */
 	movl	$VG_(baseBlock), %ebp
 	movl	VGOFF_(m_eip), %esi
 	movl	%eax, (%ebp, %esi, 4)
 	movl	$VG_TRC_EBP_JMP_CLIENTREQ, %eax
 	jmp	run_innerloop_exit
 
-dispatch_stkadj:
-	# save %eax in %EIP
-	movl	$VG_(baseBlock), %ebp
-	movl	VGOFF_(m_eip), %esi
-	movl	%eax, (%ebp, %esi, 4)
-
-	# see if we need to mess with stack blocks
-	pushl	%eax
-	call	VG_(delete_client_stack_blocks_following_ESP_change)
-	popl	%eax
-	movl	$VG_(baseBlock), %ebp
-		
-	# ok, its not interesting.  Handle the normal way.
-	jmp	dispatch_boring
-
-
 .data
 panic_msg_ebp:
 .ascii	"vg_dispatch: %ebp has invalid value!"
@@ -207,6 +213,6 @@
 .text	
 
 
-##--------------------------------------------------------------------##
-##--- end                                            vg_dispatch.S ---##
-##--------------------------------------------------------------------##
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_dispatch.S ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_dummy_profile.c b/coregrind/vg_dummy_profile.c
new file mode 100644
index 0000000..2f869c9
--- /dev/null
+++ b/coregrind/vg_dummy_profile.c
@@ -0,0 +1,67 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Dummy profiling machinery -- overridden by skins when they   ---*/
+/*--- want profiling.                                              ---*/
+/*---                                           vg_dummy_profile.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_include.h"
+
+
+void VGP_(register_profile_event) ( Int n, Char* name )
+{
+}
+
+void VGP_(init_profiling) ( void )
+{
+   VG_(printf)(
+      "\nProfiling error:\n"
+      "  The --profile=yes option was specified, but the skin\n"
+      "  wasn't built for profiling.  #include \"vg_profile.c\"\n"
+      "  into the skin and rebuild to allow profiling.\n\n");
+   VG_(exit)(1);
+}
+
+void VGP_(done_profiling) ( void )
+{
+   VG_(panic)("done_profiling");
+}
+
+void VGP_(pushcc) ( UInt cc )
+{
+   VG_(panic)("pushcc");
+}
+
+void VGP_(popcc) ( UInt cc )
+{
+   VG_(panic)("popcc");
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                       vg_dummy_profile.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_errcontext.c b/coregrind/vg_errcontext.c
index 46838b6..f38ade6 100644
--- a/coregrind/vg_errcontext.c
+++ b/coregrind/vg_errcontext.c
@@ -25,147 +25,22 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
-
 
 /*------------------------------------------------------------*/
-/*--- Defns                                                ---*/
+/*--- Globals                                              ---*/
 /*------------------------------------------------------------*/
 
-/* Suppression is a type describing an error which we want to
-   suppress, ie, not show the user, usually because it is caused by a
-   problem in a library which we can't fix, replace or work around.
-   Suppressions are read from a file at startup time, specified by
-   vg_clo_suppressions, and placed in the vg_suppressions list.  This
-   gives flexibility so that new suppressions can be added to the file
-   as and when needed. 
-*/
-typedef 
-   enum { 
-      /* Bad syscall params */
-      Param, 
-      /* Use of invalid values of given size */
-      Value0, Value1, Value2, Value4, Value8, 
-      /* Invalid read/write attempt at given size */
-      Addr1, Addr2, Addr4, Addr8,
-      /* Invalid or mismatching free */
-      FreeS,
-      /* Pthreading error */
-      PThread
-   } 
-   SuppressionKind;
-
-
-/* For each caller specified for a suppression, record the nature of
-   the caller name. */
-typedef
-   enum { 
-      /* Name is of an shared object file. */
-      ObjName,
-      /* Name is of a function. */
-      FunName 
-   }
-   SuppressionLocTy;
-
-
-/* A complete suppression record. */
-typedef
-   struct _Suppression {
-      struct _Suppression* next;
-      /* The number of times this error has been suppressed. */
-      Int count;
-      /* The name by which the suppression is referred to. */
-      Char* sname;
-      /* What kind of suppression. */
-      SuppressionKind skind;
-      /* Name of syscall param if skind==Param */
-      Char* param;
-      /* Name of fn where err occurs, and immediate caller (mandatory). */
-      SuppressionLocTy caller0_ty;
-      Char*            caller0;
-      SuppressionLocTy caller1_ty;
-      Char*            caller1;
-      /* Optional extra callers. */
-      SuppressionLocTy caller2_ty;
-      Char*            caller2;
-      SuppressionLocTy caller3_ty;
-      Char*            caller3;
-   } 
-   Suppression;
-
-
-/* ErrContext is a type for recording just enough info to generate an
-   error report for an illegal memory access.  The idea is that
-   (typically) the same few points in the program generate thousands
-   of illegal accesses, and we don't want to spew out a fresh error
-   message for each one.  Instead, we use these structures to common
-   up duplicates.  
-*/
-
-/* What kind of error it is. */
-typedef 
-   enum { ValueErr, AddrErr, 
-          ParamErr, UserErr, /* behaves like an anonymous ParamErr */
-          FreeErr, FreeMismatchErr,
-          PThreadErr /* pthread API error */
-   }
-   ErrKind;
-
-/* What kind of memory access is involved in the error? */
-typedef
-   enum { ReadAxs, WriteAxs, ExecAxs }
-   AxsKind;
-
-/* Top-level struct for recording errors. */
-typedef
-   struct _ErrContext {
-      /* ALL */
-      struct _ErrContext* next;
-      /* ALL */
-      /* NULL if unsuppressed; or ptr to suppression record. */
-      Suppression* supp;
-      /* ALL */
-      Int count;
-      /* ALL */
-      ErrKind ekind;
-      /* ALL */
-      ExeContext* where;
-      /* Addr */
-      AxsKind axskind;
-      /* Addr, Value */
-      Int size;
-      /* Addr, Free, Param, User */
-      Addr addr;
-      /* Addr, Free, Param, User */
-      AddrInfo addrinfo;
-      /* Param; hijacked for PThread as a description */
-      Char* syscall_param;
-      /* Param, User */
-      Bool isWriteableLack;
-      /* ALL */
-      ThreadId tid;
-      /* ALL */
-      /* These record %EIP, %ESP and %EBP at the error point.  They
-         are only used to make GDB-attaching convenient; there is no
-         other purpose; specifically they are not used to do
-         comparisons between errors. */
-      UInt m_eip;
-      UInt m_esp;
-      UInt m_ebp;
-   } 
-   ErrContext;
-
 /* The list of error contexts found, both suppressed and unsuppressed.
    Initially empty, and grows as errors are detected. */
-static ErrContext* vg_err_contexts = NULL;
+static CoreError* vg_errors = NULL;
 
 /* The list of suppression directives, as read from the specified
    suppressions file. */
-static Suppression* vg_suppressions = NULL;
+static CoreSupp* vg_suppressions = NULL;
 
 /* Running count of unsuppressed errors detected. */
 static UInt vg_n_errs_found = 0;
@@ -173,265 +48,76 @@
 /* Running count of suppressed errors detected. */
 static UInt vg_n_errs_suppressed = 0;
 
-/* Used to disable further error reporting once some huge number of
-   errors have already been logged. */
-static Bool vg_ignore_errors = False;
-
 /* forwards ... */
-static Suppression* is_suppressible_error ( ErrContext* ec );
+static CoreSupp* is_suppressible_error ( CoreError* err );
 
 
 /*------------------------------------------------------------*/
 /*--- Helper fns                                           ---*/
 /*------------------------------------------------------------*/
 
-
-static void clear_AddrInfo ( AddrInfo* ai )
-{
-   ai->akind      = Unknown;
-   ai->blksize    = 0;
-   ai->rwoffset   = 0;
-   ai->lastchange = NULL;
-   ai->stack_tid  = VG_INVALID_THREADID;
-   ai->maybe_gcc  = False;
-}
-
-static void clear_ErrContext ( ErrContext* ec )
-{
-   ec->next    = NULL;
-   ec->supp    = NULL;
-   ec->count   = 0;
-   ec->ekind   = ValueErr;
-   ec->where   = NULL;
-   ec->axskind = ReadAxs;
-   ec->size    = 0;
-   ec->addr    = 0;
-   clear_AddrInfo ( &ec->addrinfo );
-   ec->syscall_param   = NULL;
-   ec->isWriteableLack = False;
-   ec->m_eip   = 0xDEADB00F;
-   ec->m_esp   = 0xDEADBE0F;
-   ec->m_ebp   = 0xDEADB0EF;
-   ec->tid     = VG_INVALID_THREADID;
-}
-
-
-static __inline__
-Bool vg_eq_ExeContext ( Bool top_2_only,
-                        ExeContext* e1, ExeContext* e2 )
-{
-   /* Note that frames after the 4th are always ignored. */
-   if (top_2_only) {
-      return VG_(eq_ExeContext_top2(e1, e2));
-   } else {
-      return VG_(eq_ExeContext_top4(e1, e2));
-   }
-}
-
-
-static Bool eq_AddrInfo ( Bool cheap_addr_cmp,
-                          AddrInfo* ai1, AddrInfo* ai2 )
-{
-   if (ai1->akind != Undescribed 
-       && ai2->akind != Undescribed
-       && ai1->akind != ai2->akind) 
-      return False;
-   if (ai1->akind == Freed || ai1->akind == Mallocd) {
-      if (ai1->blksize != ai2->blksize)
-         return False;
-      if (!vg_eq_ExeContext(cheap_addr_cmp, 
-                            ai1->lastchange, ai2->lastchange))
-         return False;
-   }
-   return True;
-}
-
 /* Compare error contexts, to detect duplicates.  Note that if they
    are otherwise the same, the faulting addrs and associated rwoffsets
    are allowed to be different.  */
-
-static Bool eq_ErrContext ( Bool cheap_addr_cmp,
-                            ErrContext* e1, ErrContext* e2 )
+static Bool eq_CoreError ( VgRes res, CoreError* e1, CoreError* e2 )
 {
-   if (e1->ekind != e2->ekind) 
+   if (e1->skin_err.ekind != e2->skin_err.ekind) 
       return False;
-   if (!vg_eq_ExeContext(cheap_addr_cmp, e1->where, e2->where))
+   if (!VG_(eq_ExeContext)(res, e1->where, e2->where))
       return False;
 
-   switch (e1->ekind) {
+   switch (e1->skin_err.ekind) {
       case PThreadErr:
-         if (e1->syscall_param == e2->syscall_param) 
+         vg_assert(VG_(needs).core_errors);
+         if (e1->skin_err.string == e2->skin_err.string) 
             return True;
-         if (0 == VG_(strcmp)(e1->syscall_param, e2->syscall_param))
+         if (0 == VG_(strcmp)(e1->skin_err.string, e2->skin_err.string))
             return True;
          return False;
-      case UserErr:
-      case ParamErr:
-         if (e1->isWriteableLack != e2->isWriteableLack) return False;
-         if (e1->ekind == ParamErr 
-             && 0 != VG_(strcmp)(e1->syscall_param, e2->syscall_param))
-            return False;
-         return True;
-      case FreeErr:
-      case FreeMismatchErr:
-         if (e1->addr != e2->addr) return False;
-         if (!eq_AddrInfo(cheap_addr_cmp, &e1->addrinfo, &e2->addrinfo)) 
-            return False;
-         return True;
-      case AddrErr:
-         if (e1->axskind != e2->axskind) return False;
-         if (e1->size != e2->size) return False;
-         if (!eq_AddrInfo(cheap_addr_cmp, &e1->addrinfo, &e2->addrinfo)) 
-            return False;
-         return True;
-      case ValueErr:
-         if (e1->size != e2->size) return False;
-         return True;
       default: 
-         VG_(panic)("eq_ErrContext");
+         if (VG_(needs).skin_errors)
+            return SK_(eq_SkinError)(res, &e1->skin_err, &e2->skin_err);
+         else {
+            VG_(printf)("\nUnhandled error type: %u. VG_(needs).skin_errors\n"
+                        "probably needs to be set.\n",
+                        e1->skin_err.ekind);
+            VG_(skin_error)("unhandled error type");
+         }
    }
 }
 
-static void pp_AddrInfo ( Addr a, AddrInfo* ai )
+static void pp_CoreError ( CoreError* err, Bool printCount )
 {
-   switch (ai->akind) {
-      case Stack: 
-         VG_(message)(Vg_UserMsg, 
-                      "   Address 0x%x is on thread %d's stack", 
-                      a, ai->stack_tid);
-         break;
-      case Unknown:
-         if (ai->maybe_gcc) {
-            VG_(message)(Vg_UserMsg, 
-               "   Address 0x%x is just below %%esp.  Possibly a bug in GCC/G++",
-               a);
-            VG_(message)(Vg_UserMsg, 
-               "   v 2.96 or 3.0.X.  To suppress, use: --workaround-gcc296-bugs=yes");
-	 } else {
-            VG_(message)(Vg_UserMsg, 
-               "   Address 0x%x is not stack'd, malloc'd or free'd", a);
-         }
-         break;
-      case Freed: case Mallocd: case UserG: case UserS: {
-         UInt delta;
-         UChar* relative;
-         if (ai->rwoffset < 0) {
-            delta    = (UInt)(- ai->rwoffset);
-            relative = "before";
-         } else if (ai->rwoffset >= ai->blksize) {
-            delta    = ai->rwoffset - ai->blksize;
-            relative = "after";
-         } else {
-            delta    = ai->rwoffset;
-            relative = "inside";
-         }
-         if (ai->akind == UserS) {
-            VG_(message)(Vg_UserMsg, 
-               "   Address 0x%x is %d bytes %s a %d-byte stack red-zone created",
-               a, delta, relative, 
-               ai->blksize );
-	 } else {
-            VG_(message)(Vg_UserMsg, 
-               "   Address 0x%x is %d bytes %s a block of size %d %s",
-               a, delta, relative, 
-               ai->blksize,
-               ai->akind==Mallocd ? "alloc'd" 
-                  : ai->akind==Freed ? "free'd" 
-                                     : "client-defined");
-         }
-         VG_(pp_ExeContext)(ai->lastchange);
-         break;
-      }
-      default:
-         VG_(panic)("pp_AddrInfo");
+   /* Closure for printing where the error occurred.  Abstracts details
+      about the `where' field away from the skin. */
+   void pp_ExeContextClosure(void)
+   {
+      VG_(pp_ExeContext) ( err->where );
    }
-}
-
-static void pp_ErrContext ( ErrContext* ec, Bool printCount )
-{
+   
    if (printCount)
-      VG_(message)(Vg_UserMsg, "Observed %d times:", ec->count );
-   if (ec->tid > 1)
-      VG_(message)(Vg_UserMsg, "Thread %d:", ec->tid );
-   switch (ec->ekind) {
-      case ValueErr:
-         if (ec->size == 0) {
-             VG_(message)(
-                Vg_UserMsg,
-                "Conditional jump or move depends on uninitialised value(s)");
-         } else {
-             VG_(message)(Vg_UserMsg,
-                          "Use of uninitialised value of size %d",
-                          ec->size);
-         }
-         VG_(pp_ExeContext)(ec->where);
-         break;
-      case AddrErr:
-         switch (ec->axskind) {
-            case ReadAxs:
-               VG_(message)(Vg_UserMsg, "Invalid read of size %d", 
-                                        ec->size ); 
-               break;
-            case WriteAxs:
-               VG_(message)(Vg_UserMsg, "Invalid write of size %d", 
-                                        ec->size ); 
-               break;
-            case ExecAxs:
-               VG_(message)(Vg_UserMsg, "Jump to the invalid address "
-                                        "stated on the next line");
-               break;
-            default: 
-               VG_(panic)("pp_ErrContext(axskind)");
-         }
-         VG_(pp_ExeContext)(ec->where);
-         pp_AddrInfo(ec->addr, &ec->addrinfo);
-         break;
-      case FreeErr:
-         VG_(message)(Vg_UserMsg,"Invalid free() / delete / delete[]");
-         /* fall through */
-      case FreeMismatchErr:
-         if (ec->ekind == FreeMismatchErr)
-            VG_(message)(Vg_UserMsg, 
-                         "Mismatched free() / delete / delete []");
-         VG_(pp_ExeContext)(ec->where);
-         pp_AddrInfo(ec->addr, &ec->addrinfo);
-         break;
-      case ParamErr:
-         if (ec->isWriteableLack) {
-            VG_(message)(Vg_UserMsg, 
-               "Syscall param %s contains unaddressable byte(s)",
-                ec->syscall_param );
-         } else {
-            VG_(message)(Vg_UserMsg, 
-                "Syscall param %s contains uninitialised or "
-                "unaddressable byte(s)",
-            ec->syscall_param);
-         }
-         VG_(pp_ExeContext)(ec->where);
-         pp_AddrInfo(ec->addr, &ec->addrinfo);
-         break;
-      case UserErr:
-         if (ec->isWriteableLack) {
-            VG_(message)(Vg_UserMsg, 
-               "Unaddressable byte(s) found during client check request");
-         } else {
-            VG_(message)(Vg_UserMsg, 
-               "Uninitialised or "
-               "unaddressable byte(s) found during client check request");
-         }
-         VG_(pp_ExeContext)(ec->where);
-         pp_AddrInfo(ec->addr, &ec->addrinfo);
-         break;
+      VG_(message)(Vg_UserMsg, "Observed %d times:", err->count );
+   if (err->tid > 1)
+      VG_(message)(Vg_UserMsg, "Thread %d:", err->tid );
+
+   switch (err->skin_err.ekind) {
       case PThreadErr:
-         VG_(message)(Vg_UserMsg, "%s", ec->syscall_param );
-         VG_(pp_ExeContext)(ec->where);
+         vg_assert(VG_(needs).core_errors);
+         VG_(message)(Vg_UserMsg, "%s", err->skin_err.string );
+         VG_(pp_ExeContext)(err->where);
          break;
       default: 
-         VG_(panic)("pp_ErrContext");
+         if (VG_(needs).skin_errors)
+            SK_(pp_SkinError)( &err->skin_err, &pp_ExeContextClosure );
+         else {
+            VG_(printf)("\nUnhandled error type: %u.  VG_(needs).skin_errors\n"
+                        "probably needs to be set?\n",
+                        err->skin_err.ekind);
+            VG_(skin_error)("unhandled error type");
+         }
    }
 }
 
-
 /* Figure out if we want to attach for GDB for this error, possibly
    by asking the user. */
 static
@@ -476,21 +162,69 @@
 }
 
 
-/* Top-level entry point to the error management subsystem.  All
-   detected errors are notified here; this routine decides if/when the
-   user should see the error. */
-static void VG_(maybe_add_context) ( ErrContext* ec )
+/* I've gone all object-oriented... initialisation depends on where the
+   error comes from:
+
+   - If from generated code (tst == NULL), the %EIP/%EBP values that we
+     need in order to create proper error messages are picked up out of
+     VG_(baseBlock) rather than from the thread table (vg_threads in
+     vg_scheduler.c).
+
+   - If not from generated code but in response to requests passed back to
+     the scheduler (tst != NULL), we pick up %EIP/%EBP values from the
+     stored thread state, not from VG_(baseBlock).  
+*/
+static __inline__
+void construct_error ( CoreError* err, ThreadState* tst, 
+                       ErrorKind ekind, Addr a, Char* s, void* extra )
 {
-   ErrContext* p;
-   ErrContext* p_prev;
-   Bool        cheap_addr_cmp         = False;
+   /* CoreError parts */
+   err->next     = NULL;
+   err->supp     = NULL;
+   err->count    = 1;
+   if (NULL == tst) {
+      err->tid   = VG_(get_current_tid)();
+      err->where = 
+         VG_(get_ExeContext2)( VG_(baseBlock)[VGOFF_(m_eip)], 
+                               VG_(baseBlock)[VGOFF_(m_ebp)],
+                               VG_(baseBlock)[VGOFF_(m_esp)],
+                               VG_(threads)[err->tid].stack_highest_word);
+      err->m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
+      err->m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
+      err->m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
+   } else {
+      err->where = VG_(get_ExeContext) ( tst );
+      err->tid   = tst->tid;
+      err->m_eip = tst->m_eip;
+      err->m_esp = tst->m_esp;
+      err->m_ebp = tst->m_ebp;
+   }
+
+   /* SkinError parts */
+   err->skin_err.ekind  = ekind;
+   err->skin_err.addr   = a;
+   err->skin_err.string = s;
+   err->skin_err.extra  = extra;
+
+   /* sanity... */
+   vg_assert(err->tid >= 0 && err->tid < VG_N_THREADS);
+}
+
+/* Top-level entry point to the error management subsystem.
+   All detected errors are notified here; this routine decides if/when the
+   user should see the error. */
+void VG_(maybe_record_error) ( ThreadState* tst, 
+                               ErrorKind ekind, Addr a, Char* s, void* extra )
+{
+   CoreError   err;
+   CoreError*  p;
+   CoreError*  p_prev;
+   VgRes       exe_res                = Vg_MedRes;
    static Bool is_first_shown_context = True;
    static Bool stopping_message       = False;
    static Bool slowdown_message       = False;
    static Int  vg_n_errs_shown        = 0;
 
-   vg_assert(ec->tid >= 0 && ec->tid < VG_N_THREADS);
-
    /* After M_VG_COLLECT_NO_ERRORS_AFTER_SHOWN different errors have
       been found, or M_VG_COLLECT_NO_ERRORS_AFTER_FOUND total errors
       have been found, just refuse to collect any more.  This stops
@@ -520,12 +254,11 @@
          VG_(message)(Vg_UserMsg, 
             "Rerun with --error-limit=no to disable this cutoff.  Note");
          VG_(message)(Vg_UserMsg, 
-            "that your program may now segfault without prior warning from");
+            "that errors may occur in your program without prior warning from");
          VG_(message)(Vg_UserMsg, 
             "Valgrind, because errors are no longer being displayed.");
          VG_(message)(Vg_UserMsg, "");
          stopping_message = True;
-         vg_ignore_errors = True;
       }
       return;
    }
@@ -534,7 +267,7 @@
       been found, be much more conservative about collecting new
       ones. */
    if (vg_n_errs_shown >= M_VG_COLLECT_ERRORS_SLOWLY_AFTER) {
-      cheap_addr_cmp = True;
+      exe_res = Vg_LowRes;
       if (!slowdown_message) {
          VG_(message)(Vg_UserMsg, "");
          VG_(message)(Vg_UserMsg, 
@@ -546,12 +279,14 @@
       }
    }
 
+   /* Build ourselves the error */
+   construct_error ( &err, tst, ekind, a, s, extra );
 
    /* First, see if we've got an error record matching this one. */
-   p      = vg_err_contexts;
+   p      = vg_errors;
    p_prev = NULL;
    while (p != NULL) {
-      if (eq_ErrContext(cheap_addr_cmp, p, ec)) {
+      if (eq_CoreError(exe_res, p, &err)) {
          /* Found it. */
          p->count++;
 	 if (p->supp != NULL) {
@@ -567,8 +302,8 @@
          if (p_prev != NULL) {
             vg_assert(p_prev->next == p);
             p_prev->next    = p->next;
-            p->next         = vg_err_contexts;
-            vg_err_contexts = p;
+            p->next         = vg_errors;
+            vg_errors = p;
 	 }
          return;
       }
@@ -578,27 +313,37 @@
 
    /* Didn't see it.  Copy and add. */
 
-   /* OK, we're really going to collect it.  First, describe any addr
-      info in the error. */
-   if (ec->addrinfo.akind == Undescribed)
-      VG_(describe_addr) ( ec->addr, &ec->addrinfo );
+   /* OK, we're really going to collect it.  First make a copy,
+      because the error context is on the stack and will disappear shortly.
+      We can duplicate the main part ourselves, but use
+      SK_(dup_extra_and_update) to duplicate the 'extra' part (unless it's
+      NULL).
+     
+      SK_(dup_extra_and_update) can also update the SkinError.  This is
+      for when there are more details to fill in which take time to work out
+      but don't affect our earlier decision to include the error -- by
+      postponing those details until now, we avoid the extra work in the
+      case where we ignore the error.
+    */
+   p = VG_(arena_malloc)(VG_AR_ERRORS, sizeof(CoreError));
+   *p = err;
+   if (NULL != err.skin_err.extra)
+      SK_(dup_extra_and_update)(&p->skin_err);
 
-   p = VG_(malloc)(VG_AR_ERRCTXT, sizeof(ErrContext));
-   *p = *ec;
-   p->next = vg_err_contexts;
-   p->supp = is_suppressible_error(ec);
-   vg_err_contexts = p;
+   p->next = vg_errors;
+   p->supp = is_suppressible_error(&err);
+   vg_errors = p;
    if (p->supp == NULL) {
       vg_n_errs_found++;
       if (!is_first_shown_context)
          VG_(message)(Vg_UserMsg, "");
-      pp_ErrContext(p, False);      
+      pp_CoreError(p, False);      
       is_first_shown_context = False;
       vg_n_errs_shown++;
       /* Perhaps we want a GDB attach at this point? */
       if (vg_is_GDB_attach_requested()) {
          VG_(swizzle_esp_then_start_GDB)(
-            ec->m_eip, ec->m_esp, ec->m_ebp);
+            err.m_eip, err.m_esp, err.m_ebp);
       }
    } else {
       vg_n_errs_suppressed++;
@@ -607,202 +352,34 @@
 }
 
 
-
-
 /*------------------------------------------------------------*/
 /*--- Exported fns                                         ---*/
 /*------------------------------------------------------------*/
 
-/* These two are called from generated code, so that the %EIP/%EBP
-   values that we need in order to create proper error messages are
-   picked up out of VG_(baseBlock) rather than from the thread table
-   (vg_threads in vg_scheduler.c). */
+/* These are called not from generated code but from the scheduler */
 
-void VG_(record_value_error) ( Int size )
+void VG_(record_pthread_error) ( ThreadId tid, Char* msg )
 {
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count = 1;
-   ec.next  = NULL;
-   ec.where = VG_(get_ExeContext)( False, VG_(baseBlock)[VGOFF_(m_eip)], 
-                                          VG_(baseBlock)[VGOFF_(m_ebp)] );
-   ec.ekind = ValueErr;
-   ec.size  = size;
-   ec.tid   = VG_(get_current_tid)();
-   ec.m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
-   ec.m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   ec.m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
-   VG_(maybe_add_context) ( &ec );
+   if (! VG_(needs).core_errors) return;
+   VG_(maybe_record_error)( &VG_(threads)[tid], PThreadErr, /*addr*/0, msg, 
+                            /*extra*/NULL );
 }
 
-void VG_(record_address_error) ( Addr a, Int size, Bool isWrite )
-{
-   ErrContext ec;
-   Bool       just_below_esp;
-   if (vg_ignore_errors) return;
-
-   just_below_esp 
-      = VG_(is_just_below_ESP)( VG_(baseBlock)[VGOFF_(m_esp)], a );
-
-   /* If this is caused by an access immediately below %ESP, and the
-      user asks nicely, we just ignore it. */
-   if (VG_(clo_workaround_gcc296_bugs) && just_below_esp)
-      return;
-
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, VG_(baseBlock)[VGOFF_(m_eip)], 
-                                            VG_(baseBlock)[VGOFF_(m_ebp)] );
-   ec.ekind   = AddrErr;
-   ec.axskind = isWrite ? WriteAxs : ReadAxs;
-   ec.size    = size;
-   ec.addr    = a;
-   ec.tid     = VG_(get_current_tid)();
-   ec.m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
-   ec.m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   ec.m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
-   ec.addrinfo.akind     = Undescribed;
-   ec.addrinfo.maybe_gcc = just_below_esp;
-   VG_(maybe_add_context) ( &ec );
-}
-
-
-/* These five are called not from generated code but in response to
-   requests passed back to the scheduler.  So we pick up %EIP/%EBP
-   values from the stored thread state, not from VG_(baseBlock).  */
-
-void VG_(record_free_error) ( ThreadState* tst, Addr a )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp );
-   ec.ekind   = FreeErr;
-   ec.addr    = a;
-   ec.tid     = tst->tid;
-   ec.m_eip   = tst->m_eip;
-   ec.m_esp   = tst->m_esp;
-   ec.m_ebp   = tst->m_ebp;
-   ec.addrinfo.akind = Undescribed;
-   VG_(maybe_add_context) ( &ec );
-}
-
-void VG_(record_freemismatch_error) ( ThreadState* tst, Addr a )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp );
-   ec.ekind   = FreeMismatchErr;
-   ec.addr    = a;
-   ec.tid     = tst->tid;
-   ec.m_eip   = tst->m_eip;
-   ec.m_esp   = tst->m_esp;
-   ec.m_ebp   = tst->m_ebp;
-   ec.addrinfo.akind = Undescribed;
-   VG_(maybe_add_context) ( &ec );
-}
-
-void VG_(record_jump_error) ( ThreadState* tst, Addr a )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp );
-   ec.ekind   = AddrErr;
-   ec.axskind = ExecAxs;
-   ec.addr    = a;
-   ec.tid     = tst->tid;
-   ec.m_eip   = tst->m_eip;
-   ec.m_esp   = tst->m_esp;
-   ec.m_ebp   = tst->m_ebp;
-   ec.addrinfo.akind = Undescribed;
-   VG_(maybe_add_context) ( &ec );
-}
-
-void VG_(record_param_err) ( ThreadState* tst, Addr a, Bool isWriteLack, 
-                             Char* msg )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp );
-   ec.ekind   = ParamErr;
-   ec.addr    = a;
-   ec.tid     = tst->tid;
-   ec.m_eip   = tst->m_eip;
-   ec.m_esp   = tst->m_esp;
-   ec.m_ebp   = tst->m_ebp;
-   ec.addrinfo.akind = Undescribed;
-   ec.syscall_param = msg;
-   ec.isWriteableLack = isWriteLack;
-   VG_(maybe_add_context) ( &ec );
-}
-
-void VG_(record_user_err) ( ThreadState* tst, Addr a, Bool isWriteLack )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp );
-   ec.ekind   = UserErr;
-   ec.addr    = a;
-   ec.tid     = tst->tid;
-   ec.m_eip   = tst->m_eip;
-   ec.m_esp   = tst->m_esp;
-   ec.m_ebp   = tst->m_ebp;
-   ec.addrinfo.akind = Undescribed;
-   ec.isWriteableLack = isWriteLack;
-   VG_(maybe_add_context) ( &ec );
-}
-
-void VG_(record_pthread_err) ( ThreadId tid, Char* msg )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   if (!VG_(clo_instrument)) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, VG_(threads)[tid].m_eip, 
-                                            VG_(threads)[tid].m_ebp );
-   ec.ekind   = PThreadErr;
-   ec.tid     = tid;
-   ec.syscall_param = msg;
-   ec.m_eip   = VG_(threads)[tid].m_eip;
-   ec.m_esp   = VG_(threads)[tid].m_esp;
-   ec.m_ebp   = VG_(threads)[tid].m_ebp;
-   VG_(maybe_add_context) ( &ec );
-}
-
-
 /*------------------------------*/
 
 void VG_(show_all_errors) ( void )
 {
-   Int         i, n_min;
-   Int         n_err_contexts, n_supp_contexts;
-   ErrContext  *p, *p_min;
-   Suppression *su;
-   Bool        any_supp;
+   Int        i, n_min;
+   Int        n_err_contexts, n_supp_contexts;
+   CoreError *p, *p_min;
+   CoreSupp   *su;
+   Bool       any_supp;
 
    if (VG_(clo_verbosity) == 0)
       return;
 
    n_err_contexts = 0;
-   for (p = vg_err_contexts; p != NULL; p = p->next) {
+   for (p = vg_errors; p != NULL; p = p->next) {
       if (p->supp == NULL)
          n_err_contexts++;
    }
@@ -826,20 +403,20 @@
    for (i = 0; i < n_err_contexts; i++) {
       n_min = (1 << 30) - 1;
       p_min = NULL;
-      for (p = vg_err_contexts; p != NULL; p = p->next) {
+      for (p = vg_errors; p != NULL; p = p->next) {
          if (p->supp != NULL) continue;
          if (p->count < n_min) {
             n_min = p->count;
             p_min = p;
          }
       }
-      if (p_min == NULL) VG_(panic)("pp_AllErrContexts");
+      if (p_min == NULL) VG_(panic)("show_all_errors()");
 
       VG_(message)(Vg_UserMsg, "");
       VG_(message)(Vg_UserMsg, "%d errors in context %d of %d:",
                    p_min->count,
                    i+1, n_err_contexts);
-      pp_ErrContext( p_min, False );
+      pp_CoreError( p_min, False );
 
       if ((i+1 == VG_(clo_dump_error))) {
 	VG_(translate) ( 0 /* dummy ThreadId; irrelevant due to below NULLs */,
@@ -855,8 +432,7 @@
    for (su = vg_suppressions; su != NULL; su = su->next) {
       if (su->count > 0) {
          any_supp = True;
-         VG_(message)(Vg_DebugMsg, "supp: %4d %s", su->count, 
-                                   su->sname);
+         VG_(message)(Vg_DebugMsg, "supp: %4d %s", su->count, su->sname);
       }
    }
 
@@ -883,7 +459,7 @@
 
 #define VG_ISSPACE(ch) (((ch)==' ') || ((ch)=='\n') || ((ch)=='\t'))
 
-static Bool getLine ( Int fd, Char* buf, Int nBuf )
+Bool VG_(getLine) ( Int fd, Char* buf, Int nBuf )
 {
    Char ch;
    Int  n, i;
@@ -924,7 +500,7 @@
    (fun: or obj:) part.
    Returns False if failed.
 */
-static Bool setLocationTy ( Char** p_caller, SuppressionLocTy* p_ty )
+static Bool setLocationTy ( Char** p_caller, SuppLocTy* p_ty )
 {
    if (VG_(strncmp)(*p_caller, "fun:", 4) == 0) {
       (*p_caller) += 4;
@@ -948,107 +524,95 @@
 #define STREQ(s1,s2) (s1 != NULL && s2 != NULL \
                       && VG_(strcmp)((s1),(s2))==0)
 
-static Char* copyStr ( Char* str )
-{
-   Int   n, i;
-   Char* str2;
-   n    = VG_(strlen)(str);
-   str2 = VG_(malloc)(VG_AR_PRIVATE, n+1);
-   vg_assert(n > 0);
-   for (i = 0; i < n+1; i++) str2[i] = str[i];
-   return str2;
-}
-
 static void load_one_suppressions_file ( Char* filename )
 {
 #  define N_BUF 200
-   Int  fd;
+   Int  fd, i;
    Bool eof;
+   Bool is_unrecognised_suppressions = False;
    Char buf[N_BUF+1];
-   fd = VG_(open_read)( filename );
+   fd = VG_(open)( filename, VKI_O_RDONLY, 0 );
    if (fd == -1) {
-      VG_(message)(Vg_UserMsg, 
-                   "FATAL: can't open suppressions file `%s'", 
+      VG_(message)(Vg_UserMsg, "FATAL: can't open suppressions file `%s'", 
                    filename );
       VG_(exit)(1);
    }
 
    while (True) {
-      Suppression* supp;
-      supp = VG_(malloc)(VG_AR_PRIVATE, sizeof(Suppression));
+      /* Assign and initialise the two suppression halves (core and skin) */
+      CoreSupp* supp;
+      supp            = VG_(arena_malloc)(VG_AR_CORE, sizeof(CoreSupp));
       supp->count = 0;
-      supp->param = supp->caller0 = supp->caller1 
-                  = supp->caller2 = supp->caller3 = NULL;
+      for (i = 0; i < VG_N_SUPP_CALLERS; i++) supp->caller[i] = NULL;
+      supp->skin_supp.string = supp->skin_supp.extra = NULL;
 
-      eof = getLine ( fd, buf, N_BUF );
+      eof = VG_(getLine) ( fd, buf, N_BUF );
       if (eof) break;
 
       if (!STREQ(buf, "{")) goto syntax_error;
       
-      eof = getLine ( fd, buf, N_BUF );
+      eof = VG_(getLine) ( fd, buf, N_BUF );
       if (eof || STREQ(buf, "}")) goto syntax_error;
-      supp->sname = copyStr(buf);
+      supp->sname = VG_(arena_strdup)(VG_AR_CORE, buf);
 
-      eof = getLine ( fd, buf, N_BUF );
+      eof = VG_(getLine) ( fd, buf, N_BUF );
+
       if (eof) goto syntax_error;
-      else if (STREQ(buf, "Param"))  supp->skind = Param;
-      else if (STREQ(buf, "Value0")) supp->skind = Value0; /* backwards compat */
-      else if (STREQ(buf, "Cond"))   supp->skind = Value0;
-      else if (STREQ(buf, "Value1")) supp->skind = Value1;
-      else if (STREQ(buf, "Value2")) supp->skind = Value2;
-      else if (STREQ(buf, "Value4")) supp->skind = Value4;
-      else if (STREQ(buf, "Value8")) supp->skind = Value8;
-      else if (STREQ(buf, "Addr1"))  supp->skind = Addr1;
-      else if (STREQ(buf, "Addr2"))  supp->skind = Addr2;
-      else if (STREQ(buf, "Addr4"))  supp->skind = Addr4;
-      else if (STREQ(buf, "Addr8"))  supp->skind = Addr8;
-      else if (STREQ(buf, "Free"))   supp->skind = FreeS;
-      else if (STREQ(buf, "PThread")) supp->skind = PThread;
-      else goto syntax_error;
 
-      if (supp->skind == Param) {
-         eof = getLine ( fd, buf, N_BUF );
-         if (eof) goto syntax_error;
-         supp->param = copyStr(buf);
+      /* Is it a core suppression? */
+      else if (VG_(needs).core_errors && STREQ(buf, "PThread")) 
+         supp->skin_supp.skind = PThreadSupp;
+
+      /* Is it a skin suppression? */
+      else if (VG_(needs).skin_errors && 
+               SK_(recognised_suppression)(buf, &(supp->skin_supp.skind))) {
+         /* do nothing, function fills in supp->skin_supp.skind */
+      }
+      //else goto syntax_error;
+      else {
+         /* SSS: if we don't recognise the suppression name, ignore entire
+          * entry.  Not sure if this is a good long-term approach -- makes
+          * it impossible to spot incorrect suppression names?  (apart
+          * from the warning given) */
+         if (! is_unrecognised_suppressions) {
+            is_unrecognised_suppressions = True;
+            VG_(start_msg)(Vg_DebugMsg);
+            VG_(add_to_msg)("Ignoring unrecognised suppressions: ");
+            VG_(add_to_msg)("'%s'", buf);
+         } else {
+            VG_(add_to_msg)(", '%s'", buf);
+         }
+         while (True) {
+            eof = VG_(getLine) ( fd, buf, N_BUF );
+            if (eof) goto syntax_error;
+            if (STREQ(buf, "}"))
+               break;
+         }
+         continue;
       }
 
-      eof = getLine ( fd, buf, N_BUF );
-      if (eof) goto syntax_error;
-      supp->caller0 = copyStr(buf);
-      if (!setLocationTy(&(supp->caller0), &(supp->caller0_ty)))
+      if (VG_(needs).skin_errors && 
+          !SK_(read_extra_suppression_info)(fd, buf, N_BUF, &supp->skin_supp)) 
          goto syntax_error;
 
-      eof = getLine ( fd, buf, N_BUF );
-      if (eof) goto syntax_error;
-      if (!STREQ(buf, "}")) {
-         supp->caller1 = copyStr(buf);
-         if (!setLocationTy(&(supp->caller1), &(supp->caller1_ty)))
-            goto syntax_error;
-      
-         eof = getLine ( fd, buf, N_BUF );
+      /* "i > 0" ensures at least one caller read. */
+      for (i = 0; i < VG_N_SUPP_CALLERS; i++) {
+         eof = VG_(getLine) ( fd, buf, N_BUF );
          if (eof) goto syntax_error;
-         if (!STREQ(buf, "}")) {
-            supp->caller2 = copyStr(buf);
-            if (!setLocationTy(&(supp->caller2), &(supp->caller2_ty)))
-               goto syntax_error;
-
-            eof = getLine ( fd, buf, N_BUF );
-            if (eof) goto syntax_error;
-            if (!STREQ(buf, "}")) {
-               supp->caller3 = copyStr(buf);
-              if (!setLocationTy(&(supp->caller3), &(supp->caller3_ty)))
-                 goto syntax_error;
-
-               eof = getLine ( fd, buf, N_BUF );
-               if (eof || !STREQ(buf, "}")) goto syntax_error;
-	    }
-         }
+         if (i > 0 && STREQ(buf, "}")) 
+            break;
+         supp->caller[i] = VG_(arena_strdup)(VG_AR_CORE, buf);
+         if (!setLocationTy(&(supp->caller[i]), &(supp->caller_ty[i])))
+            goto syntax_error;
       }
 
       supp->next = vg_suppressions;
       vg_suppressions = supp;
    }
-
+   if (is_unrecognised_suppressions) {
+      /* Print out warning about any ignored suppressions */
+      //VG_(end_msg)();
+   }
    VG_(close)(fd);
    return;
 
@@ -1083,148 +647,102 @@
    }
 }
 
+/* Return the name of an erring fn in a way which is useful
+   for comparing against the contents of a suppressions file. 
+   Doesn't demangle the fn name, because we want to refer to 
+   mangled names in the suppressions file.
+*/    
+static
+void get_objname_fnname ( Addr a,
+                          Char* obj_buf, Int n_obj_buf,
+                          Char* fun_buf, Int n_fun_buf )
+{     
+   (void)VG_(get_objname)          ( a, obj_buf, n_obj_buf );
+   (void)VG_(get_fnname_nodemangle)( a, fun_buf, n_fun_buf );
+}     
+
+static __inline__
+Bool supp_matches_error(CoreSupp* su, CoreError* err)
+{
+   switch (su->skin_supp.skind) {
+      case PThreadSupp:
+         return (err->skin_err.ekind == PThreadErr);
+      default:
+         if (VG_(needs).skin_errors) {
+            return (SK_(error_matches_suppression)(&err->skin_err, 
+                                                    &su->skin_supp));
+         } else {
+            VG_(printf)(
+               "\nUnhandled suppression type: %u.  VG_(needs).skin_errors\n"
+               "probably needs to be set.\n",
+               err->skin_err.ekind);
+            VG_(skin_error)("unhandled suppression type");
+         }
+   }
+}
+
+static __inline__
+Bool supp_matches_callers(CoreSupp* su, Char caller_obj[][M_VG_ERRTXT], 
+                                        Char caller_fun[][M_VG_ERRTXT])
+{
+   Int i;
+
+   for (i = 0; su->caller[i] != NULL; i++) {
+      switch (su->caller_ty[i]) {
+         case ObjName: if (VG_(stringMatch)(su->caller[i],
+                                            caller_obj[i])) break;
+                       return False;
+         case FunName: if (VG_(stringMatch)(su->caller[i], 
+                                            caller_fun[i])) break;
+                       return False;
+         default: VG_(panic)("is_suppressible_error");
+      }
+   }
+
+   /* If we reach here, it's a match */
+   return True;
+}
 
 /* Does an error context match a suppression?  ie is this a
-   suppressible error?  If so, return a pointer to the Suppression
+   suppressible error?  If so, return a pointer to the CoreSupp
    record, otherwise NULL.
-   Tries to minimise the number of calls to what_fn_is_this since they
-   are expensive.  
+   Tries to minimise the number of symbol searches since they are expensive.  
 */
-static Suppression* is_suppressible_error ( ErrContext* ec )
+static CoreSupp* is_suppressible_error ( CoreError* err )
 {
 #  define STREQ(s1,s2) (s1 != NULL && s2 != NULL \
                         && VG_(strcmp)((s1),(s2))==0)
+   Int i;
 
-   Char caller0_obj[M_VG_ERRTXT];
-   Char caller0_fun[M_VG_ERRTXT];
-   Char caller1_obj[M_VG_ERRTXT];
-   Char caller1_fun[M_VG_ERRTXT];
-   Char caller2_obj[M_VG_ERRTXT];
-   Char caller2_fun[M_VG_ERRTXT];
-   Char caller3_obj[M_VG_ERRTXT];
-   Char caller3_fun[M_VG_ERRTXT];
+   Char caller_obj[VG_N_SUPP_CALLERS][M_VG_ERRTXT];
+   Char caller_fun[VG_N_SUPP_CALLERS][M_VG_ERRTXT];
 
-   Suppression* su;
-   Int          su_size;
+   CoreSupp* su;
 
-   /* vg_what_fn_or_object_is_this returns:
-         <function_name>      or
-         <object_name>        or
-         ???
-      so the strings in the suppression file should match these.
+   /* get_objname_fnname() writes the function name and object name if
+      it finds them in the debug info.  so the strings in the suppression
+      file should match these.
    */
 
    /* Initialise these strs so they are always safe to compare, even
-      if what_fn_or_object_is_this doesn't write anything to them. */
-   caller0_obj[0] = caller1_obj[0] = caller2_obj[0] = caller3_obj[0] = 0;
-   caller0_fun[0] = caller1_fun[0] = caller2_obj[0] = caller3_obj[0] = 0;
+      if get_objname_fnname doesn't write anything to them. */
+   for (i = 0; i < VG_N_SUPP_CALLERS; i++)
+      caller_obj[i][0] = caller_fun[i][0] = 0;
 
-   VG_(what_obj_and_fun_is_this)
-      ( ec->where->eips[0], caller0_obj, M_VG_ERRTXT,
-                            caller0_fun, M_VG_ERRTXT );
-   VG_(what_obj_and_fun_is_this)
-      ( ec->where->eips[1], caller1_obj, M_VG_ERRTXT,
-                            caller1_fun, M_VG_ERRTXT );
-
-   if (VG_(clo_backtrace_size) > 2) {
-      VG_(what_obj_and_fun_is_this)
-         ( ec->where->eips[2], caller2_obj, M_VG_ERRTXT,
-                               caller2_fun, M_VG_ERRTXT );
-
-      if (VG_(clo_backtrace_size) > 3) {
-         VG_(what_obj_and_fun_is_this)
-            ( ec->where->eips[3], caller3_obj, M_VG_ERRTXT,
-                                  caller3_fun, M_VG_ERRTXT );
-      }
+   for (i = 0; i < VG_N_SUPP_CALLERS && i < VG_(clo_backtrace_size); i++) {
+      get_objname_fnname ( err->where->eips[i], 
+                           caller_obj[i], M_VG_ERRTXT,
+                           caller_fun[i], M_VG_ERRTXT );
    }
 
    /* See if the error context matches any suppression. */
    for (su = vg_suppressions; su != NULL; su = su->next) {
-      switch (su->skind) {
-         case FreeS:  case PThread:
-         case Param:  case Value0: su_size = 0; break;
-         case Value1: case Addr1:  su_size = 1; break;
-         case Value2: case Addr2:  su_size = 2; break;
-         case Value4: case Addr4:  su_size = 4; break;
-         case Value8: case Addr8:  su_size = 8; break;
-         default: VG_(panic)("errcontext_matches_suppression");
+      if (supp_matches_error(su, err) &&
+          supp_matches_callers(su, caller_obj, caller_fun)) {
+         return su;
       }
-      switch (su->skind) {
-         case Param:
-            if (ec->ekind != ParamErr) continue;
-            if (!STREQ(su->param, ec->syscall_param)) continue;
-            break;
-         case Value0: case Value1: case Value2: case Value4: case Value8:
-            if (ec->ekind != ValueErr) continue;
-            if (ec->size  != su_size)  continue;
-            break;
-         case Addr1: case Addr2: case Addr4: case Addr8:
-            if (ec->ekind != AddrErr) continue;
-            if (ec->size  != su_size) continue;
-            break;
-         case FreeS:
-            if (ec->ekind != FreeErr 
-                && ec->ekind != FreeMismatchErr) continue;
-            break;
-         case PThread:
-            if (ec->ekind != PThreadErr) continue;
-            break;
-      }
-
-      switch (su->caller0_ty) {
-         case ObjName: if (!VG_(stringMatch)(su->caller0, 
-                                             caller0_obj)) continue;
-                       break;
-         case FunName: if (!VG_(stringMatch)(su->caller0, 
-                                             caller0_fun)) continue;
-                       break;
-         default: goto baaaad;
-      }
-
-      if (su->caller1 != NULL) {
-         vg_assert(VG_(clo_backtrace_size) >= 2);
-         switch (su->caller1_ty) {
-            case ObjName: if (!VG_(stringMatch)(su->caller1, 
-                                                caller1_obj)) continue;
-                          break;
-            case FunName: if (!VG_(stringMatch)(su->caller1, 
-                                                caller1_fun)) continue;
-                          break;
-            default: goto baaaad;
-         }
-      }
-
-      if (VG_(clo_backtrace_size) > 2 && su->caller2 != NULL) {
-         switch (su->caller2_ty) {
-            case ObjName: if (!VG_(stringMatch)(su->caller2, 
-                                                caller2_obj)) continue;
-                          break;
-            case FunName: if (!VG_(stringMatch)(su->caller2, 
-                                                caller2_fun)) continue;
-                          break;
-            default: goto baaaad;
-         }
-      }
-
-      if (VG_(clo_backtrace_size) > 3 && su->caller3 != NULL) {
-         switch (su->caller3_ty) {
-            case ObjName: if (!VG_(stringMatch)(su->caller3,
-                                                caller3_obj)) continue;
-                          break;
-            case FunName: if (!VG_(stringMatch)(su->caller3, 
-                                                caller3_fun)) continue;
-                          break;
-            default: goto baaaad;
-         }
-      }
-
-      return su;
    }
-
-   return NULL;
-
-  baaaad:
-   VG_(panic)("is_suppressible_error");
+   return NULL;      /* no matches */
 
 #  undef STREQ
 }
diff --git a/coregrind/vg_execontext.c b/coregrind/vg_execontext.c
index 4da1b31..fe85fa0 100644
--- a/coregrind/vg_execontext.c
+++ b/coregrind/vg_execontext.c
@@ -26,11 +26,10 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
 
 
 /*------------------------------------------------------------*/
@@ -109,39 +108,40 @@
 
 
 /* Compare two ExeContexts, comparing all callers. */
-Bool VG_(eq_ExeContext_all) ( ExeContext* e1, ExeContext* e2 )
+Bool VG_(eq_ExeContext) ( VgRes res, ExeContext* e1, ExeContext* e2 )
 {
-   vg_ec_cmpAlls++;
-   /* Just do pointer comparison. */
-   if (e1 != e2) return False;
-   return True;
-}
+   if (e1 == NULL || e2 == NULL) 
+      return False;
+   switch (res) {
+   case Vg_LowRes:
+      /* Just compare the top two callers. */
+      vg_ec_cmp2s++;
+      if (e1->eips[0] != e2->eips[0]
+          || e1->eips[1] != e2->eips[1]) return False;
+      return True;
 
+   case Vg_MedRes:
+      /* Just compare the top four callers. */
+      vg_ec_cmp4s++;
+      if (e1->eips[0] != e2->eips[0]
+          || e1->eips[1] != e2->eips[1]) return False;
 
-/* Compare two ExeContexts, just comparing the top two callers. */
-Bool VG_(eq_ExeContext_top2) ( ExeContext* e1, ExeContext* e2 )
-{
-   vg_ec_cmp2s++;
-   if (e1->eips[0] != e2->eips[0]
-       || e1->eips[1] != e2->eips[1]) return False;
-   return True;
-}
+      if (VG_(clo_backtrace_size) < 3) return True;
+      if (e1->eips[2] != e2->eips[2]) return False;
 
+      if (VG_(clo_backtrace_size) < 4) return True;
+      if (e1->eips[3] != e2->eips[3]) return False;
+      return True;
 
-/* Compare two ExeContexts, just comparing the top four callers. */
-Bool VG_(eq_ExeContext_top4) ( ExeContext* e1, ExeContext* e2 )
-{
-   vg_ec_cmp4s++;
-   if (e1->eips[0] != e2->eips[0]
-       || e1->eips[1] != e2->eips[1]) return False;
+   case Vg_HighRes:
+      vg_ec_cmpAlls++;
+      /* Compare them all -- just do pointer comparison. */
+      if (e1 != e2) return False;
+      return True;
 
-   if (VG_(clo_backtrace_size) < 3) return True;
-   if (e1->eips[2] != e2->eips[2]) return False;
-
-   if (VG_(clo_backtrace_size) < 4) return True;
-   if (e1->eips[3] != e2->eips[3]) return False;
-
-   return True;
+   default:
+      VG_(panic)("VG_(eq_ExeContext): unrecognised VgRes");
+   }
 }
 
 
@@ -156,11 +156,12 @@
 
    In order to be thread-safe, we pass in the thread's %EIP and %EBP.
 */
-ExeContext* VG_(get_ExeContext) ( Bool skip_top_frame,
-                                  Addr eip, Addr ebp )
+ExeContext* VG_(get_ExeContext2) ( Addr eip, Addr ebp,
+                                   Addr ebp_min, Addr ebp_max_orig )
 {
    Int         i;
    Addr        eips[VG_DEEPEST_BACKTRACE];
+   Addr        ebp_max;
    Bool        same;
    UInt        hash;
    ExeContext* new_ec;
@@ -173,29 +174,53 @@
 
    /* First snaffle %EIPs from the client's stack into eips[0
       .. VG_(clo_backtrace_size)-1], putting zeroes in when the trail
-      goes cold. */
+      goes cold, which we guess to be when %ebp is not a reasonable
+      stack location.  We also assert that %ebp increases down the chain. */
 
-   for (i = 0; i < VG_(clo_backtrace_size); i++)
+   // Gives shorter stack trace for tests/badjump.c
+   // JRS 2002-aug-16: I don't think this is a big deal; looks ok for
+   // most "normal" backtraces.
+   // NJN 2002-sep-05: traces for pthreaded programs are particularly bad.
+
+   // JRS 2002-sep-17: hack, to round up ebp_max to the end of the
+   // current page, at least.  Dunno if it helps.
+   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
+   ebp_max = (ebp_max_orig + VKI_BYTES_PER_PAGE - 1) 
+                & ~(VKI_BYTES_PER_PAGE - 1);
+   ebp_max -= sizeof(Addr);
+
+   /* Assertion broken before main() is reached in pthreaded programs;  the
+    * offending stack traces only have one item.  --njn, 2002-aug-16 */
+   /* vg_assert(ebp_min <= ebp_max);*/
+
+   /* Checks the stack isn't riduculously big */
+   vg_assert(ebp_min + 4000000 > ebp_max);
+
+   //   VG_(printf)("%p -> %p\n", ebp_max_orig, ebp_max);
+   eips[0] = eip;
+   //   VG_(printf)("\nSNAP: %p .. %p, EBP=%p\n", ebp_min, ebp_max, ebp  );
+   //   VG_(printf)("   : %p\n", eips[0]);
+   /* Get whatever we safely can ... */
+   for (i = 1; i < VG_(clo_backtrace_size); i++) {
+      if (!(ebp_min <= ebp && ebp <= ebp_max)) {
+         //VG_(printf)("... out of range %p\n", ebp);
+         break; /* ebp gone baaaad */
+      }
+      // NJN 2002-sep-17: monotonicity doesn't work -- gives wrong traces...
+      //     if (ebp >= ((UInt*)ebp)[0]) {
+      //   VG_(printf)("nonmonotonic\n");
+      //    break; /* ebp gone nonmonotonic */
+      // }
+      eips[i] = ((UInt*)ebp)[1];  /* ret addr */
+      ebp     = ((UInt*)ebp)[0];  /* old ebp */
+      //VG_(printf)("     %p\n", eips[i]);
+   }
+
+   /* Put zeroes in the rest. */
+   for (;  i < VG_(clo_backtrace_size); i++) {
       eips[i] = 0;
-   
-#  define GET_CALLER(lval)                                        \
-   if (ebp != 0 && VGM_(check_readable)(ebp, 8, NULL)) {          \
-      lval = ((UInt*)ebp)[1];  /* ret addr */                     \
-      ebp  = ((UInt*)ebp)[0];  /* old ebp */                      \
-   } else {                                                       \
-      lval = ebp = 0;                                             \
    }
 
-   if (skip_top_frame) {
-      for (i = 0; i < VG_(clo_backtrace_size); i++)
-         GET_CALLER(eips[i]);
-   } else {
-      eips[0] = eip;
-      for (i = 1; i < VG_(clo_backtrace_size); i++)
-         GET_CALLER(eips[i]);
-   }
-#  undef GET_CALLER
-
    /* Now figure out if we've seen this one before.  First hash it so
       as to determine the list number. */
 
@@ -228,19 +253,16 @@
 
    if (list != NULL) {
       /* Yay!  We found it.  */
-      VGP_POPCC;
+      VGP_POPCC(VgpExeContext);
       return list;
    }
 
    /* Bummer.  We have to allocate a new context record. */
    vg_ec_totstored++;
 
-   new_ec 
-      = VG_(malloc)( 
-           VG_AR_EXECTXT, 
-           sizeof(struct _ExeContextRec *) 
-              + VG_(clo_backtrace_size) * sizeof(Addr) 
-        );
+   new_ec = VG_(arena_malloc)( VG_AR_EXECTXT, 
+                               sizeof(struct _ExeContext *) 
+                               + VG_(clo_backtrace_size) * sizeof(Addr) );
 
    for (i = 0; i < VG_(clo_backtrace_size); i++)
       new_ec->eips[i] = eips[i];
@@ -248,10 +270,16 @@
    new_ec->next = vg_ec_list[hash];
    vg_ec_list[hash] = new_ec;
 
-   VGP_POPCC;
+   VGP_POPCC(VgpExeContext);
    return new_ec;
 }
 
+ExeContext* VG_(get_ExeContext) ( ThreadState *tst )
+{
+   return VG_(get_ExeContext2)( tst->m_eip, tst->m_ebp, tst->m_esp, 
+                                tst->stack_highest_word );
+}
+
 
 /*--------------------------------------------------------------------*/
 /*--- end                                          vg_execontext.c ---*/
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c
index 26f1613..e99bfaa 100644
--- a/coregrind/vg_from_ucode.c
+++ b/coregrind/vg_from_ucode.c
@@ -25,7 +25,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -35,10 +35,10 @@
 /*--- Renamings of frequently-used global functions.       ---*/
 /*------------------------------------------------------------*/
 
-#define dis       VG_(disassemble)
 #define nameIReg  VG_(nameOfIntReg)
 #define nameISize VG_(nameOfIntSize)
 
+#define dis       VG_(print_codegen)
 
 /*------------------------------------------------------------*/
 /*--- Instruction emission -- turning final uinstrs back   ---*/
@@ -52,7 +52,7 @@
    do this, calls and jmps to fixed addresses must specify the address
    by first loading it into a register, and jump to/call that
    register.  Fortunately, the only jump to a literal is the jump back
-   to vg_dispatch, and only %eax is live then, conveniently.  Ucode
+   to vg_dispatch, and only %eax is live then, conveniently.  UCode
    call insns may only have a register as target anyway, so there's no
    need to do anything fancy for them.
 
@@ -71,19 +71,104 @@
 static Int    emitted_code_used;
 static Int    emitted_code_size;
 
+/* Statistics about C functions called from generated code. */
+static UInt ccalls                 = 0;
+static UInt ccall_reg_saves        = 0;
+static UInt ccall_args             = 0;
+static UInt ccall_arg_setup_instrs = 0;
+static UInt ccall_stack_clears     = 0;
+static UInt ccall_retvals          = 0;
+static UInt ccall_retval_movs      = 0;
+
+/* Statistics about frequency of each UInstr */
+typedef
+   struct {
+      UInt counts;
+      UInt size;
+   } Histogram;
+
+/* Automatically zeroed because it's static. */
+static Histogram histogram[100];     
+
+void VG_(print_ccall_stats)(void)
+{
+   VG_(message)(Vg_DebugMsg,
+                "   ccalls: %u C calls, %u%% saves+restores avoided"
+                " (%d bytes)",
+                ccalls, 
+                100-(UInt)(ccall_reg_saves/(double)(ccalls*3)*100),
+                ((ccalls*3) - ccall_reg_saves)*2);
+   VG_(message)(Vg_DebugMsg,
+                "           %u args, avg 0.%d setup instrs each (%d bytes)", 
+                ccall_args, 
+               (UInt)(ccall_arg_setup_instrs/(double)ccall_args*100),
+               (ccall_args - ccall_arg_setup_instrs)*2);
+   VG_(message)(Vg_DebugMsg,
+                "           %d%% clear the stack (%d bytes)", 
+               (UInt)(ccall_stack_clears/(double)ccalls*100),
+               (ccalls - ccall_stack_clears)*3);
+   VG_(message)(Vg_DebugMsg,
+                "           %u retvals, %u%% of reg-reg movs avoided (%d bytes)",
+                ccall_retvals,
+                ( ccall_retvals == 0 
+                ? 100
+                : 100-(UInt)(ccall_retval_movs / 
+                             (double)ccall_retvals*100)),
+                (ccall_retvals-ccall_retval_movs)*2);
+}
+
+void VG_(print_UInstr_histogram)(void)
+{
+   Int i, j;
+   UInt total_counts = 0;
+   UInt total_size   = 0;
+   
+   for (i = 0; i < 100; i++) {
+      total_counts += histogram[i].counts;
+      total_size   += histogram[i].size;
+   }
+
+   VG_(printf)("-- UInstr frequencies -----------\n");
+   for (i = 0; i < 100; i++) {
+      if (0 != histogram[i].counts) {
+
+         UInt count_pc = 
+            (UInt)(histogram[i].counts/(double)total_counts*100 + 0.5);
+         UInt size_pc  = 
+            (UInt)(histogram[i].size  /(double)total_size  *100 + 0.5);
+         UInt avg_size =
+            (UInt)(histogram[i].size / (double)histogram[i].counts + 0.5);
+
+         VG_(printf)("%-7s:%8u (%2u%%), avg %2dB (%2u%%) |", 
+                     VG_(nameUOpcode)(True, i), 
+                     histogram[i].counts, count_pc, 
+                     avg_size, size_pc);
+
+         for (j = 0; j < size_pc; j++) VG_(printf)("O");
+         VG_(printf)("\n");
+
+      } else {
+         vg_assert(0 == histogram[i].size);
+      }
+   }
+
+   VG_(printf)("total UInstrs %u, total size %u\n", total_counts, total_size);
+}
+
 static void expandEmittedCode ( void )
 {
    Int    i;
-   UChar* tmp = VG_(jitmalloc)(2 * emitted_code_size);
+   UChar *tmp = VG_(arena_malloc)(VG_AR_JITTER, 2 * emitted_code_size);
    /* VG_(printf)("expand to %d\n", 2 * emitted_code_size); */
    for (i = 0; i < emitted_code_size; i++)
       tmp[i] = emitted_code[i];
-   VG_(jitfree)(emitted_code);
+   VG_(arena_free)(VG_AR_JITTER, emitted_code);
    emitted_code = tmp;
    emitted_code_size *= 2;
 }
 
-static __inline__ void emitB ( UInt b )
+/* Local calls will be inlined, cross-module ones not */
+__inline__ void VG_(emitB) ( UInt b )
 {
    if (dis) {
       if (b < 16) VG_(printf)("0%x ", b); else VG_(printf)("%2x ", b);
@@ -95,29 +180,26 @@
    emitted_code_used++;
 }
 
-static __inline__ void emitW ( UInt l )
+__inline__ void VG_(emitW) ( UInt l )
 {
-   emitB ( (l) & 0x000000FF );
-   emitB ( (l >> 8) & 0x000000FF );
+   VG_(emitB) ( (l) & 0x000000FF );
+   VG_(emitB) ( (l >> 8) & 0x000000FF );
 }
 
-static __inline__ void emitL ( UInt l )
+__inline__ void VG_(emitL) ( UInt l )
 {
-   emitB ( (l) & 0x000000FF );
-   emitB ( (l >> 8) & 0x000000FF );
-   emitB ( (l >> 16) & 0x000000FF );
-   emitB ( (l >> 24) & 0x000000FF );
+   VG_(emitB) ( (l) & 0x000000FF );
+   VG_(emitB) ( (l >> 8) & 0x000000FF );
+   VG_(emitB) ( (l >> 16) & 0x000000FF );
+   VG_(emitB) ( (l >> 24) & 0x000000FF );
 }
 
-static __inline__ void newEmit ( void )
+__inline__ void VG_(newEmit) ( void )
 {
    if (dis)
       VG_(printf)("\t       %4d: ", emitted_code_used );
 }
 
-/* Is this a callee-save register, in the normal C calling convention?  */
-#define VG_CALLEE_SAVED(reg) (reg == R_EBX || reg == R_ESI || reg == R_EDI)
-
 
 /*----------------------------------------------------*/
 /*--- Addressing modes                             ---*/
@@ -144,8 +226,8 @@
 static __inline__ void emit_amode_litmem_reg ( Addr addr, Int reg )
 {
    /* ($ADDR), reg */
-   emitB ( mkModRegRM(0, reg, 5) );
-   emitL ( addr );
+   VG_(emitB) ( mkModRegRM(0, reg, 5) );
+   VG_(emitL) ( addr );
 }
 
 static __inline__ void emit_amode_regmem_reg ( Int regmem, Int reg )
@@ -154,26 +236,26 @@
    if (regmem == R_ESP) 
       VG_(panic)("emit_amode_regmem_reg");
    if (regmem == R_EBP) {
-      emitB ( mkModRegRM(1, reg, 5) );
-      emitB ( 0x00 );
+      VG_(emitB) ( mkModRegRM(1, reg, 5) );
+      VG_(emitB) ( 0x00 );
    } else {
-      emitB( mkModRegRM(0, reg, regmem) );
+      VG_(emitB)( mkModRegRM(0, reg, regmem) );
    }
 }
 
-static __inline__ void emit_amode_offregmem_reg ( Int off, Int regmem, Int reg )
+void VG_(emit_amode_offregmem_reg) ( Int off, Int regmem, Int reg )
 {
    if (regmem == R_ESP)
       VG_(panic)("emit_amode_offregmem_reg(ESP)");
    if (off < -128 || off > 127) {
       /* Use a large offset */
       /* d32(regmem), reg */
-      emitB ( mkModRegRM(2, reg, regmem) );
-      emitL ( off );
+      VG_(emitB) ( mkModRegRM(2, reg, regmem) );
+      VG_(emitL) ( off );
    } else {
       /* d8(regmem), reg */
-      emitB ( mkModRegRM(1, reg, regmem) );
-      emitB ( off & 0xFF );
+      VG_(emitB) ( mkModRegRM(1, reg, regmem) );
+      VG_(emitB) ( off & 0xFF );
    }
 }
 
@@ -184,27 +266,27 @@
       VG_(panic)("emit_amode_sib_reg(ESP)");
    if (off < -128 || off > 127) {
       /* Use a 32-bit offset */
-      emitB ( mkModRegRM(2, reg, 4) ); /* SIB with 32-bit displacement */
-      emitB ( mkSIB( scale, regindex, regbase ) );
-      emitL ( off );
+      VG_(emitB) ( mkModRegRM(2, reg, 4) ); /* SIB with 32-bit displacement */
+      VG_(emitB) ( mkSIB( scale, regindex, regbase ) );
+      VG_(emitL) ( off );
    } else {
       /* Use an 8-bit offset */
-      emitB ( mkModRegRM(1, reg, 4) ); /* SIB with 8-bit displacement */
-      emitB ( mkSIB( scale, regindex, regbase ) );
-      emitB ( off & 0xFF );
+      VG_(emitB) ( mkModRegRM(1, reg, 4) ); /* SIB with 8-bit displacement */
+      VG_(emitB) ( mkSIB( scale, regindex, regbase ) );
+      VG_(emitB) ( off & 0xFF );
    }
 }
 
-static __inline__ void emit_amode_ereg_greg ( Int e_reg, Int g_reg )
+void VG_(emit_amode_ereg_greg) ( Int e_reg, Int g_reg )
 {
    /* other_reg, reg */
-   emitB ( mkModRegRM(3, g_reg, e_reg) );
+   VG_(emitB) ( mkModRegRM(3, g_reg, e_reg) );
 }
 
 static __inline__ void emit_amode_greg_ereg ( Int g_reg, Int e_reg )
 {
    /* other_reg, reg */
-   emitB ( mkModRegRM(3, g_reg, e_reg) );
+   VG_(emitB) ( mkModRegRM(3, g_reg, e_reg) );
 }
 
 
@@ -285,23 +367,23 @@
 /*--- v-size (4, or 2 with OSO) insn emitters      ---*/
 /*----------------------------------------------------*/
 
-static void emit_movv_offregmem_reg ( Int sz, Int off, Int areg, Int reg )
+void VG_(emit_movv_offregmem_reg) ( Int sz, Int off, Int areg, Int reg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0x8B ); /* MOV Ev, Gv */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0x8B ); /* MOV Ev, Gv */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t0x%x(%s), %s\n", 
                    nameISize(sz), off, nameIReg(4,areg), nameIReg(sz,reg));
 }
 
-static void emit_movv_reg_offregmem ( Int sz, Int reg, Int off, Int areg )
+void VG_(emit_movv_reg_offregmem) ( Int sz, Int reg, Int off, Int areg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0x89 ); /* MOV Gv, Ev */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0x89 ); /* MOV Gv, Ev */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t%s, 0x%x(%s)\n", 
                    nameISize(sz), nameIReg(sz,reg), off, nameIReg(4,areg));
@@ -309,9 +391,9 @@
 
 static void emit_movv_regmem_reg ( Int sz, Int reg1, Int reg2 )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0x8B ); /* MOV Ev, Gv */
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0x8B ); /* MOV Ev, Gv */
    emit_amode_regmem_reg ( reg1, reg2 );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t(%s), %s\n",
@@ -320,40 +402,39 @@
 
 static void emit_movv_reg_regmem ( Int sz, Int reg1, Int reg2 )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0x89 ); /* MOV Gv, Ev */
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0x89 ); /* MOV Gv, Ev */
    emit_amode_regmem_reg ( reg2, reg1 );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t%s, (%s)\n", 
                    nameISize(sz), nameIReg(sz,reg1), nameIReg(4,reg2));
 }
 
-static void emit_movv_reg_reg ( Int sz, Int reg1, Int reg2 )
+void VG_(emit_movv_reg_reg) ( Int sz, Int reg1, Int reg2 )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0x89 ); /* MOV Gv, Ev */
-   emit_amode_ereg_greg ( reg2, reg1 );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0x89 ); /* MOV Gv, Ev */
+   VG_(emit_amode_ereg_greg) ( reg2, reg1 );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t%s, %s\n", 
                    nameISize(sz), nameIReg(sz,reg1), nameIReg(sz,reg2));
 }
 
-static void emit_nonshiftopv_lit_reg ( Int sz, Opcode opc, 
-                                       UInt lit, Int reg )
+void VG_(emit_nonshiftopv_lit_reg) ( Int sz, Opcode opc, UInt lit, Int reg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
    if (lit == VG_(extend_s_8to32)(lit & 0x000000FF)) {
       /* short form OK */
-      emitB ( 0x83 ); /* Grp1 Ib,Ev */
-      emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) );
-      emitB ( lit & 0x000000FF );
+      VG_(emitB) ( 0x83 ); /* Grp1 Ib,Ev */
+      VG_(emit_amode_ereg_greg) ( reg, mkGrp1opcode(opc) );
+      VG_(emitB) ( lit & 0x000000FF );
    } else {
-      emitB ( 0x81 ); /* Grp1 Iv,Ev */
-      emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) );
-      if (sz == 2) emitW ( lit ); else emitL ( lit );
+      VG_(emitB) ( 0x81 ); /* Grp1 Iv,Ev */
+      VG_(emit_amode_ereg_greg) ( reg, mkGrp1opcode(opc) );
+      if (sz == 2) VG_(emitW) ( lit ); else VG_(emitL) ( lit );
    }
    if (dis)
       VG_(printf)( "\n\t\t%s%c\t$0x%x, %s\n", 
@@ -361,13 +442,13 @@
                    lit, nameIReg(sz,reg));
 }
 
-static void emit_shiftopv_lit_reg ( Int sz, Opcode opc, UInt lit, Int reg )
+void VG_(emit_shiftopv_lit_reg) ( Int sz, Opcode opc, UInt lit, Int reg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0xC1 ); /* Grp2 Ib,Ev */
-   emit_amode_ereg_greg ( reg, mkGrp2opcode(opc) );
-   emitB ( lit );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0xC1 ); /* Grp2 Ib,Ev */
+   VG_(emit_amode_ereg_greg) ( reg, mkGrp2opcode(opc) );
+   VG_(emitB) ( lit );
    if (dis)
       VG_(printf)( "\n\t\t%s%c\t$%d, %s\n", 
                    VG_(nameUOpcode)(False,opc), nameISize(sz), 
@@ -376,12 +457,12 @@
 
 static void emit_shiftopv_cl_stack0 ( Int sz, Opcode opc )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0xD3 ); /* Grp2 CL,Ev */
-   emitB ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
-   emitB ( 0x24 ); /* a SIB, I think `d8(%esp)' */
-   emitB ( 0x00 ); /* the d8 displacement */
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0xD3 ); /* Grp2 CL,Ev */
+   VG_(emitB) ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
+   VG_(emitB) ( 0x24 ); /* a SIB, I think `d8(%esp)' */
+   VG_(emitB) ( 0x00 ); /* the d8 displacement */
    if (dis)
       VG_(printf)("\n\t\t%s%c %%cl, 0(%%esp)\n",
                   VG_(nameUOpcode)(False,opc), nameISize(sz) );
@@ -389,11 +470,11 @@
 
 static void emit_shiftopb_cl_stack0 ( Opcode opc )
 {
-   newEmit();
-   emitB ( 0xD2 ); /* Grp2 CL,Eb */
-   emitB ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
-   emitB ( 0x24 ); /* a SIB, I think `d8(%esp)' */
-   emitB ( 0x00 ); /* the d8 displacement */
+   VG_(newEmit)();
+   VG_(emitB) ( 0xD2 ); /* Grp2 CL,Eb */
+   VG_(emitB) ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
+   VG_(emitB) ( 0x24 ); /* a SIB, I think `d8(%esp)' */
+   VG_(emitB) ( 0x00 ); /* the d8 displacement */
    if (dis)
       VG_(printf)("\n\t\t%s%c %%cl, 0(%%esp)\n",
                   VG_(nameUOpcode)(False,opc), nameISize(1) );
@@ -402,28 +483,28 @@
 static void emit_nonshiftopv_offregmem_reg ( Int sz, Opcode opc, 
                                              Int off, Int areg, Int reg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\t%s%c\t0x%x(%s), %s\n", 
                    VG_(nameUOpcode)(False,opc), nameISize(sz),
                    off, nameIReg(4,areg), nameIReg(sz,reg));
 }
 
-static void emit_nonshiftopv_reg_reg ( Int sz, Opcode opc, 
+void VG_(emit_nonshiftopv_reg_reg) ( Int sz, Opcode opc, 
                                        Int reg1, Int reg2 )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
 #  if 0
    /* Perfectly correct, but the GNU assembler uses the other form.
       Therefore we too use the other form, to aid verification. */
-   emitB ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
-   emit_amode_ereg_greg ( reg1, reg2 );
+   VG_(emitB) ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
+   VG_(emit_amode_ereg_greg) ( reg1, reg2 );
 #  else
-   emitB ( 1 + mkPrimaryOpcode(opc) ); /* op Gv, Ev */
+   VG_(emitB) ( 1 + mkPrimaryOpcode(opc) ); /* op Gv, Ev */
    emit_amode_greg_ereg ( reg1, reg2 );
 #  endif
    if (dis)
@@ -432,134 +513,134 @@
                    nameIReg(sz,reg1), nameIReg(sz,reg2));
 }
 
-static void emit_movv_lit_reg ( Int sz, UInt lit, Int reg )
+void VG_(emit_movv_lit_reg) ( Int sz, UInt lit, Int reg )
 {
    if (lit == 0) {
-      emit_nonshiftopv_reg_reg ( sz, XOR, reg, reg );
+      VG_(emit_nonshiftopv_reg_reg) ( sz, XOR, reg, reg );
       return;
    }
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0xB8+reg ); /* MOV imm, Gv */
-   if (sz == 2) emitW ( lit ); else emitL ( lit );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0xB8+reg ); /* MOV imm, Gv */
+   if (sz == 2) VG_(emitW) ( lit ); else VG_(emitL) ( lit );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t$0x%x, %s\n", 
                    nameISize(sz), lit, nameIReg(sz,reg));
 }
 
-static void emit_unaryopv_reg ( Int sz, Opcode opc, Int reg )
+void VG_(emit_unaryopv_reg) ( Int sz, Opcode opc, Int reg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
    switch (opc) {
       case NEG:
-         emitB ( 0xF7 );
-         emit_amode_ereg_greg ( reg, mkGrp3opcode(NEG) );
+         VG_(emitB) ( 0xF7 );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp3opcode(NEG) );
          if (dis)
             VG_(printf)( "\n\t\tneg%c\t%s\n", 
                          nameISize(sz), nameIReg(sz,reg));
          break;
       case NOT:
-         emitB ( 0xF7 );
-         emit_amode_ereg_greg ( reg, mkGrp3opcode(NOT) );
+         VG_(emitB) ( 0xF7 );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp3opcode(NOT) );
          if (dis)
             VG_(printf)( "\n\t\tnot%c\t%s\n", 
                          nameISize(sz), nameIReg(sz,reg));
          break;
       case DEC:
-         emitB ( 0x48 + reg );
+         VG_(emitB) ( 0x48 + reg );
          if (dis)
             VG_(printf)( "\n\t\tdec%c\t%s\n", 
                          nameISize(sz), nameIReg(sz,reg));
          break;
       case INC:
-         emitB ( 0x40 + reg );
+         VG_(emitB) ( 0x40 + reg );
          if (dis)
             VG_(printf)( "\n\t\tinc%c\t%s\n", 
                          nameISize(sz), nameIReg(sz,reg));
          break;
       default: 
-         VG_(panic)("emit_unaryopv_reg");
+         VG_(panic)("VG_(emit_unaryopv_reg)");
    }
 }
 
-static void emit_pushv_reg ( Int sz, Int reg )
+void VG_(emit_pushv_reg) ( Int sz, Int reg )
 {
-   newEmit();
+   VG_(newEmit)();
    if (sz == 2) {
-      emitB ( 0x66 ); 
+      VG_(emitB) ( 0x66 ); 
    } else {
       vg_assert(sz == 4);
    }
-   emitB ( 0x50 + reg );
+   VG_(emitB) ( 0x50 + reg );
    if (dis)
       VG_(printf)("\n\t\tpush%c %s\n", nameISize(sz), nameIReg(sz,reg));
 }
 
-static void emit_popv_reg ( Int sz, Int reg )
+void VG_(emit_popv_reg) ( Int sz, Int reg )
 {
-   newEmit();
+   VG_(newEmit)();
    if (sz == 2) {
-      emitB ( 0x66 ); 
+      VG_(emitB) ( 0x66 ); 
    } else {
       vg_assert(sz == 4);
    }
-   emitB ( 0x58 + reg );
+   VG_(emitB) ( 0x58 + reg );
    if (dis)
       VG_(printf)("\n\t\tpop%c %s\n", nameISize(sz), nameIReg(sz,reg));
 }
 
-static void emit_pushl_lit8 ( Int lit8 )
+void VG_(emit_pushl_lit32) ( UInt int32 )
+{  
+   VG_(newEmit)();
+   VG_(emitB) ( 0x68 );
+   VG_(emitL) ( int32 );
+   if (dis)
+      VG_(printf)("\n\t\tpushl $0x%x\n", int32 );
+}  
+
+void VG_(emit_pushl_lit8) ( Int lit8 )
 {
    vg_assert(lit8 >= -128 && lit8 < 128);
-   newEmit();
-   emitB ( 0x6A );
-   emitB ( (UChar)((UInt)lit8) );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x6A );
+   VG_(emitB) ( (UChar)((UInt)lit8) );
    if (dis)
       VG_(printf)("\n\t\tpushl $%d\n", lit8 );
 }
 
-static void emit_pushl_lit32 ( UInt int32 )
+void VG_(emit_cmpl_zero_reg) ( Int reg )
 {
-   newEmit();
-   emitB ( 0x68 );
-   emitL ( int32 );
-   if (dis)
-      VG_(printf)("\n\t\tpushl $0x%x\n", int32 );
-}
-
-static void emit_cmpl_zero_reg ( Int reg )
-{
-   newEmit();
-   emitB ( 0x83 );
-   emit_amode_ereg_greg ( reg, 7 /* Grp 3 opcode for CMP */ );
-   emitB ( 0x00 );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x83 );
+   VG_(emit_amode_ereg_greg) ( reg, 7 /* Grp 3 opcode for CMP */ );
+   VG_(emitB) ( 0x00 );
    if (dis)
       VG_(printf)("\n\t\tcmpl $0, %s\n", nameIReg(4,reg));
 }
 
 static void emit_swapl_reg_ECX ( Int reg )
 {
-   newEmit();
-   emitB ( 0x87 ); /* XCHG Gv,Ev */
-   emit_amode_ereg_greg ( reg, R_ECX );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x87 ); /* XCHG Gv,Ev */
+   VG_(emit_amode_ereg_greg) ( reg, R_ECX );
    if (dis) 
       VG_(printf)("\n\t\txchgl %%ecx, %s\n", nameIReg(4,reg));
 }
 
-static void emit_swapl_reg_EAX ( Int reg )
+void VG_(emit_swapl_reg_EAX) ( Int reg )
 {
-   newEmit();
-   emitB ( 0x90 + reg ); /* XCHG Gv,eAX */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x90 + reg ); /* XCHG Gv,eAX */
    if (dis) 
       VG_(printf)("\n\t\txchgl %%eax, %s\n", nameIReg(4,reg));
 }
 
 static void emit_swapl_reg_reg ( Int reg1, Int reg2 )
 {
-   newEmit();
-   emitB ( 0x87 ); /* XCHG Gv,Ev */
-   emit_amode_ereg_greg ( reg1, reg2 );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x87 ); /* XCHG Gv,Ev */
+   VG_(emit_amode_ereg_greg) ( reg1, reg2 );
    if (dis) 
       VG_(printf)("\n\t\txchgl %s, %s\n", nameIReg(4,reg1), 
                   nameIReg(4,reg2));
@@ -567,65 +648,33 @@
 
 static void emit_bswapl_reg ( Int reg )
 {
-   newEmit();
-   emitB ( 0x0F );
-   emitB ( 0xC8 + reg ); /* BSWAP r32 */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F );
+   VG_(emitB) ( 0xC8 + reg ); /* BSWAP r32 */
    if (dis) 
       VG_(printf)("\n\t\tbswapl %s\n", nameIReg(4,reg));
 }
 
 static void emit_movl_reg_reg ( Int regs, Int regd )
 {
-   newEmit();
-   emitB ( 0x89 ); /* MOV Gv,Ev */
-   emit_amode_ereg_greg ( regd, regs );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x89 ); /* MOV Gv,Ev */
+   VG_(emit_amode_ereg_greg) ( regd, regs );
    if (dis) 
       VG_(printf)("\n\t\tmovl %s, %s\n", nameIReg(4,regs), nameIReg(4,regd));
 }
 
-static void emit_testv_lit_reg ( Int sz, UInt lit, Int reg )
+void VG_(emit_movv_lit_offregmem) ( Int sz, UInt lit, Int off, Int memreg )
 {
-   newEmit();
+   VG_(newEmit)();
    if (sz == 2) {
-      emitB ( 0x66 );
+      VG_(emitB) ( 0x66 );
    } else {
       vg_assert(sz == 4);
    }
-   emitB ( 0xF7 ); /* Grp3 Ev */
-   emit_amode_ereg_greg ( reg, 0 /* Grp3 subopcode for TEST */ );
-   if (sz == 2) emitW ( lit ); else emitL ( lit );
-   if (dis)
-      VG_(printf)("\n\t\ttest%c $0x%x, %s\n", nameISize(sz), 
-                                            lit, nameIReg(sz,reg));
-}
-
-static void emit_testv_lit_offregmem ( Int sz, UInt lit, Int off, Int reg )
-{
-   newEmit();
-   if (sz == 2) {
-      emitB ( 0x66 );
-   } else {
-      vg_assert(sz == 4);
-   }
-   emitB ( 0xF7 ); /* Grp3 Ev */
-   emit_amode_offregmem_reg ( off, reg, 0 /* Grp3 subopcode for TEST */ );
-   if (sz == 2) emitW ( lit ); else emitL ( lit );
-   if (dis)
-      VG_(printf)("\n\t\ttest%c $%d, 0x%x(%s)\n", 
-                  nameISize(sz), lit, off, nameIReg(4,reg) );
-}
-
-static void emit_movv_lit_offregmem ( Int sz, UInt lit, Int off, Int memreg )
-{
-   newEmit();
-   if (sz == 2) {
-      emitB ( 0x66 );
-   } else {
-      vg_assert(sz == 4);
-   }
-   emitB ( 0xC7 ); /* Grp11 Ev */
-   emit_amode_offregmem_reg ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
-   if (sz == 2) emitW ( lit ); else emitL ( lit );
+   VG_(emitB) ( 0xC7 ); /* Grp11 Ev */
+   VG_(emit_amode_offregmem_reg) ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
+   if (sz == 2) VG_(emitW) ( lit ); else VG_(emitL) ( lit );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t$0x%x, 0x%x(%s)\n", 
                    nameISize(sz), lit, off, nameIReg(4,memreg) );
@@ -638,35 +687,35 @@
 
 /* There is some doubt as to whether C6 (Grp 11) is in the
    486 insn set.  ToDo: investigate. */
-static void emit_movb_lit_offregmem ( UInt lit, Int off, Int memreg )
-{
-   newEmit();
-   emitB ( 0xC6 ); /* Grp11 Eb */
-   emit_amode_offregmem_reg ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
-   emitB ( lit );
+void VG_(emit_movb_lit_offregmem) ( UInt lit, Int off, Int memreg )
+{                                     
+   VG_(newEmit)();
+   VG_(emitB) ( 0xC6 ); /* Grp11 Eb */
+   VG_(emit_amode_offregmem_reg) ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
+   VG_(emitB) ( lit ); 
    if (dis)
       VG_(printf)( "\n\t\tmovb\t$0x%x, 0x%x(%s)\n", 
                    lit, off, nameIReg(4,memreg) );
-}
-
+}              
+              
 static void emit_nonshiftopb_offregmem_reg ( Opcode opc, 
                                              Int off, Int areg, Int reg )
 {
-   newEmit();
-   emitB ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   VG_(emitB) ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\t%sb\t0x%x(%s), %s\n", 
                    VG_(nameUOpcode)(False,opc), off, nameIReg(4,areg), 
                    nameIReg(1,reg));
 }
 
-static void emit_movb_reg_offregmem ( Int reg, Int off, Int areg )
+void VG_(emit_movb_reg_offregmem) ( Int reg, Int off, Int areg )
 {
    /* Could do better when reg == %al. */
-   newEmit();
-   emitB ( 0x88 ); /* MOV G1, E1 */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x88 ); /* MOV G1, E1 */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\tmovb\t%s, 0x%x(%s)\n", 
                    nameIReg(1,reg), off, nameIReg(4,areg));
@@ -674,9 +723,9 @@
 
 static void emit_nonshiftopb_reg_reg ( Opcode opc, Int reg1, Int reg2 )
 {
-   newEmit();
-   emitB ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
-   emit_amode_ereg_greg ( reg1, reg2 );
+   VG_(newEmit)();
+   VG_(emitB) ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
+   VG_(emit_amode_ereg_greg) ( reg1, reg2 );
    if (dis)
       VG_(printf)( "\n\t\t%sb\t%s, %s\n", 
                    VG_(nameUOpcode)(False,opc),
@@ -685,8 +734,8 @@
 
 static void emit_movb_reg_regmem ( Int reg1, Int reg2 )
 {
-   newEmit();
-   emitB ( 0x88 ); /* MOV G1, E1 */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x88 ); /* MOV G1, E1 */
    emit_amode_regmem_reg ( reg2, reg1 );
    if (dis)
       VG_(printf)( "\n\t\tmovb\t%s, (%s)\n", nameIReg(1,reg1), 
@@ -695,10 +744,10 @@
 
 static void emit_nonshiftopb_lit_reg ( Opcode opc, UInt lit, Int reg )
 {
-   newEmit();
-   emitB ( 0x80 ); /* Grp1 Ib,Eb */
-   emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) );
-   emitB ( lit & 0x000000FF );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x80 ); /* Grp1 Ib,Eb */
+   VG_(emit_amode_ereg_greg) ( reg, mkGrp1opcode(opc) );
+   VG_(emitB) ( lit & 0x000000FF );
    if (dis)
       VG_(printf)( "\n\t\t%sb\t$0x%x, %s\n", VG_(nameUOpcode)(False,opc),
                                              lit, nameIReg(1,reg));
@@ -706,69 +755,68 @@
 
 static void emit_shiftopb_lit_reg ( Opcode opc, UInt lit, Int reg )
 {
-   newEmit();
-   emitB ( 0xC0 ); /* Grp2 Ib,Eb */
-   emit_amode_ereg_greg ( reg, mkGrp2opcode(opc) );
-   emitB ( lit );
+   VG_(newEmit)();
+   VG_(emitB) ( 0xC0 ); /* Grp2 Ib,Eb */
+   VG_(emit_amode_ereg_greg) ( reg, mkGrp2opcode(opc) );
+   VG_(emitB) ( lit );
    if (dis)
       VG_(printf)( "\n\t\t%sb\t$%d, %s\n", 
                    VG_(nameUOpcode)(False,opc),
                    lit, nameIReg(1,reg));
 }
 
-static void emit_unaryopb_reg ( Opcode opc, Int reg )
+void VG_(emit_unaryopb_reg) ( Opcode opc, Int reg )
 {
-   newEmit();
+   VG_(newEmit)();
    switch (opc) {
       case INC:
-         emitB ( 0xFE );
-         emit_amode_ereg_greg ( reg, mkGrp4opcode(INC) );
+         VG_(emitB) ( 0xFE );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp4opcode(INC) );
          if (dis)
             VG_(printf)( "\n\t\tincb\t%s\n", nameIReg(1,reg));
          break;
       case DEC:
-         emitB ( 0xFE );
-         emit_amode_ereg_greg ( reg, mkGrp4opcode(DEC) );
+         VG_(emitB) ( 0xFE );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp4opcode(DEC) );
          if (dis)
             VG_(printf)( "\n\t\tdecb\t%s\n", nameIReg(1,reg));
          break;
       case NOT:
-         emitB ( 0xF6 );
-         emit_amode_ereg_greg ( reg, mkGrp3opcode(NOT) );
+         VG_(emitB) ( 0xF6 );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp3opcode(NOT) );
          if (dis)
             VG_(printf)( "\n\t\tnotb\t%s\n", nameIReg(1,reg));
          break;
       case NEG:
-         emitB ( 0xF6 );
-         emit_amode_ereg_greg ( reg, mkGrp3opcode(NEG) );
+         VG_(emitB) ( 0xF6 );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp3opcode(NEG) );
          if (dis)
             VG_(printf)( "\n\t\tnegb\t%s\n", nameIReg(1,reg));
          break;
       default: 
-         VG_(panic)("emit_unaryopb_reg");
+         VG_(panic)("VG_(emit_unaryopb_reg)");
    }
 }
 
-static void emit_testb_lit_reg ( UInt lit, Int reg )
+void VG_(emit_testb_lit_reg) ( UInt lit, Int reg )
 {
-   newEmit();
-   emitB ( 0xF6 ); /* Grp3 Eb */
-   emit_amode_ereg_greg ( reg, 0 /* Grp3 subopcode for TEST */ );
-   emitB ( lit );
+   VG_(newEmit)();
+   VG_(emitB) ( 0xF6 ); /* Grp3 Eb */
+   VG_(emit_amode_ereg_greg) ( reg, 0 /* Grp3 subopcode for TEST */ );
+   VG_(emitB) ( lit );
    if (dis)
       VG_(printf)("\n\t\ttestb $0x%x, %s\n", lit, nameIReg(1,reg));
 }
 
-
 /*----------------------------------------------------*/
 /*--- zero-extended load emitters                  ---*/
 /*----------------------------------------------------*/
 
-static void emit_movzbl_offregmem_reg ( Int off, Int regmem, Int reg )
+void VG_(emit_movzbl_offregmem_reg) ( Int off, Int regmem, Int reg )
 {
-   newEmit();
-   emitB ( 0x0F ); emitB ( 0xB6 ); /* MOVZBL */
-   emit_amode_offregmem_reg ( off, regmem, reg );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB6 ); /* MOVZBL */
+   VG_(emit_amode_offregmem_reg) ( off, regmem, reg );
    if (dis)
       VG_(printf)( "\n\t\tmovzbl\t0x%x(%s), %s\n", 
                    off, nameIReg(4,regmem), nameIReg(4,reg));
@@ -776,19 +824,19 @@
 
 static void emit_movzbl_regmem_reg ( Int reg1, Int reg2 )
 {
-   newEmit();
-   emitB ( 0x0F ); emitB ( 0xB6 ); /* MOVZBL */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB6 ); /* MOVZBL */
    emit_amode_regmem_reg ( reg1, reg2 );
    if (dis)
       VG_(printf)( "\n\t\tmovzbl\t(%s), %s\n", nameIReg(4,reg1), 
                                                nameIReg(4,reg2));
 }
 
-static void emit_movzwl_offregmem_reg ( Int off, Int areg, Int reg )
+void VG_(emit_movzwl_offregmem_reg) ( Int off, Int areg, Int reg )
 {
-   newEmit();
-   emitB ( 0x0F ); emitB ( 0xB7 ); /* MOVZWL */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB7 ); /* MOVZWL */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\tmovzwl\t0x%x(%s), %s\n",
                    off, nameIReg(4,areg), nameIReg(4,reg));
@@ -796,8 +844,8 @@
 
 static void emit_movzwl_regmem_reg ( Int reg1, Int reg2 )
 {
-   newEmit();
-   emitB ( 0x0F ); emitB ( 0xB7 ); /* MOVZWL */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB7 ); /* MOVZWL */
    emit_amode_regmem_reg ( reg1, reg2 );
    if (dis)
       VG_(printf)( "\n\t\tmovzwl\t(%s), %s\n", nameIReg(4,reg1), 
@@ -811,9 +859,9 @@
 static void emit_get_fpu_state ( void )
 {
    Int off = 4 * VGOFF_(m_fpustate);
-   newEmit();
-   emitB ( 0xDD ); emitB ( 0xA5 ); /* frstor d32(%ebp) */
-   emitL ( off );
+   VG_(newEmit)();
+   VG_(emitB) ( 0xDD ); VG_(emitB) ( 0xA5 ); /* frstor d32(%ebp) */
+   VG_(emitL) ( off );
    if (dis)
       VG_(printf)("\n\t\tfrstor\t%d(%%ebp)\n", off );
 }
@@ -821,9 +869,9 @@
 static void emit_put_fpu_state ( void )
 {
    Int off = 4 * VGOFF_(m_fpustate);
-   newEmit();
-   emitB ( 0xDD ); emitB ( 0xB5 ); /* fnsave d32(%ebp) */
-   emitL ( off );
+   VG_(newEmit)();
+   VG_(emitB) ( 0xDD ); VG_(emitB) ( 0xB5 ); /* fnsave d32(%ebp) */
+   VG_(emitL) ( off );
    if (dis)
       VG_(printf)("\n\t\tfnsave\t%d(%%ebp)\n", off );
 }
@@ -831,9 +879,9 @@
 static void emit_fpu_no_mem ( UChar first_byte, 
                               UChar second_byte )
 {
-   newEmit();
-   emitB ( first_byte );
-   emitB ( second_byte );
+   VG_(newEmit)();
+   VG_(emitB) ( first_byte );
+   VG_(emitB) ( second_byte );
    if (dis)
       VG_(printf)("\n\t\tfpu-0x%x:0x%x\n", 
                   (UInt)first_byte, (UInt)second_byte );
@@ -843,8 +891,8 @@
                               UChar second_byte_masked, 
                               Int reg )
 {
-   newEmit();
-   emitB ( first_byte );
+   VG_(newEmit)();
+   VG_(emitB) ( first_byte );
    emit_amode_regmem_reg ( reg, second_byte_masked >> 3 );
    if (dis)
       VG_(printf)("\n\t\tfpu-0x%x:0x%x-(%s)\n", 
@@ -857,27 +905,26 @@
 /*--- misc instruction emitters                    ---*/
 /*----------------------------------------------------*/
 
-static void emit_call_reg ( Int reg )
-{
-   newEmit();
-   emitB ( 0xFF ); /* Grp5 */
-   emit_amode_ereg_greg ( reg, mkGrp5opcode(CALLM) );
-   if (dis)
+void VG_(emit_call_reg) ( Int reg )
+{           
+   VG_(newEmit)();
+   VG_(emitB) ( 0xFF ); /* Grp5 */
+   VG_(emit_amode_ereg_greg) ( reg, mkGrp5opcode(CALLM) );
+   if (dis) 
       VG_(printf)( "\n\t\tcall\t*%s\n", nameIReg(4,reg) );
-}
-
-
+}              
+         
 static void emit_call_star_EBP_off ( Int byte_off )
 {
-  newEmit();
+  VG_(newEmit)();
   if (byte_off < -128 || byte_off > 127) {
-     emitB ( 0xFF );
-     emitB ( 0x95 );
-     emitL ( byte_off );
+     VG_(emitB) ( 0xFF );
+     VG_(emitB) ( 0x95 );
+     VG_(emitL) ( byte_off );
   } else {
-     emitB ( 0xFF );
-     emitB ( 0x55 );
-     emitB ( byte_off );
+     VG_(emitB) ( 0xFF );
+     VG_(emitB) ( 0x55 );
+     VG_(emitB) ( byte_off );
   }
   if (dis)
      VG_(printf)( "\n\t\tcall * %d(%%ebp)\n", byte_off );
@@ -887,24 +934,24 @@
 static void emit_addlit8_offregmem ( Int lit8, Int regmem, Int off )
 {
    vg_assert(lit8 >= -128 && lit8 < 128);
-   newEmit();
-   emitB ( 0x83 ); /* Grp1 Ib,Ev */
-   emit_amode_offregmem_reg ( off, regmem, 
+   VG_(newEmit)();
+   VG_(emitB) ( 0x83 ); /* Grp1 Ib,Ev */
+   VG_(emit_amode_offregmem_reg) ( off, regmem, 
                               0 /* Grp1 subopcode for ADD */ );
-   emitB ( lit8 & 0xFF );
+   VG_(emitB) ( lit8 & 0xFF );
    if (dis)
       VG_(printf)( "\n\t\taddl $%d, %d(%s)\n", lit8, off, 
                                                nameIReg(4,regmem));
 }
 
 
-static void emit_add_lit_to_esp ( Int lit )
+void VG_(emit_add_lit_to_esp) ( Int lit )
 {
-   if (lit < -128 || lit > 127) VG_(panic)("emit_add_lit_to_esp");
-   newEmit();
-   emitB ( 0x83 );
-   emitB ( 0xC4 );
-   emitB ( lit & 0xFF );
+   if (lit < -128 || lit > 127) VG_(panic)("VG_(emit_add_lit_to_esp)");
+   VG_(newEmit)();
+   VG_(emitB) ( 0x83 );
+   VG_(emitB) ( 0xC4 );
+   VG_(emitB) ( lit & 0xFF );
    if (dis)
       VG_(printf)( "\n\t\taddl $%d, %%esp\n", lit );
 }
@@ -914,11 +961,11 @@
 {
    /* movb %al, 0(%esp) */
    /* 88442400              movb    %al, 0(%esp) */
-   newEmit();
-   emitB ( 0x88 );
-   emitB ( 0x44 );
-   emitB ( 0x24 );
-   emitB ( 0x00 );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x88 );
+   VG_(emitB) ( 0x44 );
+   VG_(emitB) ( 0x24 );
+   VG_(emitB) ( 0x00 );
    if (dis)
       VG_(printf)( "\n\t\tmovb %%al, 0(%%esp)\n" );
 }
@@ -927,11 +974,11 @@
 {
    /* movb 0(%esp), %al */
    /* 8A442400              movb    0(%esp), %al */
-   newEmit();
-   emitB ( 0x8A );
-   emitB ( 0x44 );
-   emitB ( 0x24 );
-   emitB ( 0x00 );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x8A );
+   VG_(emitB) ( 0x44 );
+   VG_(emitB) ( 0x24 );
+   VG_(emitB) ( 0x00 );
    if (dis)
       VG_(printf)( "\n\t\tmovb 0(%%esp), %%al\n" );
 }
@@ -940,12 +987,12 @@
 /* Emit a jump short with an 8-bit signed offset.  Note that the
    offset is that which should be added to %eip once %eip has been
    advanced over this insn.  */
-static void emit_jcondshort_delta ( Condcode cond, Int delta )
+void VG_(emit_jcondshort_delta) ( Condcode cond, Int delta )
 {
    vg_assert(delta >= -128 && delta <= 127);
-   newEmit();
-   emitB ( 0x70 + (UInt)cond );
-   emitB ( (UChar)delta );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x70 + (UInt)cond );
+   VG_(emitB) ( (UChar)delta );
    if (dis)
       VG_(printf)( "\n\t\tj%s-8\t%%eip+%d\n", 
                    VG_(nameCondcode)(cond), delta );
@@ -955,11 +1002,11 @@
 {
    Int off = 4 * VGOFF_(m_eflags);
    vg_assert(off >= 0 && off < 128);
-   newEmit();
-   emitB ( 0xFF ); /* PUSHL off(%ebp) */
-   emitB ( 0x75 );
-   emitB ( off );
-   emitB ( 0x9D ); /* POPFL */
+   VG_(newEmit)();
+   VG_(emitB) ( 0xFF ); /* PUSHL off(%ebp) */
+   VG_(emitB) ( 0x75 );
+   VG_(emitB) ( off );
+   VG_(emitB) ( 0x9D ); /* POPFL */
    if (dis)
       VG_(printf)( "\n\t\tpushl %d(%%ebp) ; popfl\n", off );
 }
@@ -968,20 +1015,20 @@
 {
    Int off = 4 * VGOFF_(m_eflags);
    vg_assert(off >= 0 && off < 128);
-   newEmit();
-   emitB ( 0x9C ); /* PUSHFL */
-   emitB ( 0x8F ); /* POPL vg_m_state.m_eflags */
-   emitB ( 0x45 );
-   emitB ( off );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x9C ); /* PUSHFL */
+   VG_(emitB) ( 0x8F ); /* POPL vg_m_state.m_eflags */
+   VG_(emitB) ( 0x45 );
+   VG_(emitB) ( off );
    if (dis)
       VG_(printf)( "\n\t\tpushfl ; popl %d(%%ebp)\n", off );
 }
 
 static void emit_setb_reg ( Int reg, Condcode cond )
 {
-   newEmit();
-   emitB ( 0x0F ); emitB ( 0x90 + (UChar)cond );
-   emit_amode_ereg_greg ( reg, 0 );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F ); VG_(emitB) ( 0x90 + (UChar)cond );
+   VG_(emit_amode_ereg_greg) ( reg, 0 );
    if (dis)
       VG_(printf)("\n\t\tset%s %s\n", 
                   VG_(nameCondcode)(cond), nameIReg(1,reg));
@@ -989,33 +1036,33 @@
 
 static void emit_ret ( void )
 {
-   newEmit();
-   emitB ( 0xC3 ); /* RET */
+   VG_(newEmit)();
+   VG_(emitB) ( 0xC3 ); /* RET */
    if (dis)
       VG_(printf)("\n\t\tret\n");
 }
 
-static void emit_pushal ( void )
+void VG_(emit_pushal) ( void )
 {
-   newEmit();
-   emitB ( 0x60 ); /* PUSHAL */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x60 ); /* PUSHAL */
    if (dis)
       VG_(printf)("\n\t\tpushal\n");
 }
 
-static void emit_popal ( void )
+void VG_(emit_popal) ( void )
 {
-   newEmit();
-   emitB ( 0x61 ); /* POPAL */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x61 ); /* POPAL */
    if (dis)
       VG_(printf)("\n\t\tpopal\n");
 }
 
 static void emit_lea_litreg_reg ( UInt lit, Int regmem, Int reg )
 {
-   newEmit();
-   emitB ( 0x8D ); /* LEA M,Gv */
-   emit_amode_offregmem_reg ( (Int)lit, regmem, reg );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x8D ); /* LEA M,Gv */
+   VG_(emit_amode_offregmem_reg) ( (Int)lit, regmem, reg );
    if (dis)
       VG_(printf)("\n\t\tleal 0x%x(%s), %s\n",
                   lit, nameIReg(4,regmem), nameIReg(4,reg) );
@@ -1024,8 +1071,8 @@
 static void emit_lea_sib_reg ( UInt lit, Int scale,
 			       Int regbase, Int regindex, Int reg )
 {
-   newEmit();
-   emitB ( 0x8D ); /* LEA M,Gv */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x8D ); /* LEA M,Gv */
    emit_amode_sib_reg ( (Int)lit, scale, regbase, regindex, reg );
    if (dis)
       VG_(printf)("\n\t\tleal 0x%x(%s,%s,%d), %s\n",
@@ -1034,17 +1081,51 @@
                        nameIReg(4,reg) );
 }
 
-static void emit_AMD_prefetch_reg ( Int reg )
+void VG_(emit_AMD_prefetch_reg) ( Int reg )
 {
-   newEmit();
-   emitB ( 0x0F );
-   emitB ( 0x0D );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F );
+   VG_(emitB) ( 0x0D );
    emit_amode_regmem_reg ( reg, 1 /* 0 is prefetch; 1 is prefetchw */ );
    if (dis)
       VG_(printf)("\n\t\tamd-prefetch (%s)\n", nameIReg(4,reg) );
 }
 
 /*----------------------------------------------------*/
+/*--- Helper offset -> addr translation            ---*/
+/*----------------------------------------------------*/
+
+/* Finds the baseBlock offset of a skin-specified helper.
+ * Searches through compacts first, then non-compacts. */
+Int VG_(helper_offset)(Addr a)
+{
+   Int i;
+
+   for (i = 0; i < VG_(n_compact_helpers); i++)
+      if (VG_(compact_helper_addrs)[i] == a)
+         return VG_(compact_helper_offsets)[i];
+   for (i = 0; i < VG_(n_noncompact_helpers); i++)
+      if (VG_(noncompact_helper_addrs)[i] == a)
+         return VG_(noncompact_helper_offsets)[i];
+
+   /* Shouldn't get here */
+   VG_(printf)(
+      "\nCouldn't find offset of helper from its address (%p).\n"
+      "A helper function probably used hasn't been registered?\n\n", a);
+
+   VG_(printf)("      compact helpers: ");
+   for (i = 0; i < VG_(n_compact_helpers); i++)
+      VG_(printf)("%p ", VG_(compact_helper_addrs)[i]);
+
+   VG_(printf)("\n  non-compact helpers: ");
+   for (i = 0; i < VG_(n_noncompact_helpers); i++)
+      VG_(printf)("%p ", VG_(noncompact_helper_addrs)[i]);
+
+   VG_(printf)("\n");
+   VG_(skin_error)("Unfound helper");
+}
+
+/*----------------------------------------------------*/
 /*--- Instruction synthesisers                     ---*/
 /*----------------------------------------------------*/
 
@@ -1057,8 +1138,7 @@
 /* Synthesise a call to *baseBlock[offset], ie,
    call * (4 x offset)(%ebp).
 */
-static void synth_call_baseBlock_method ( Bool ensure_shortform, 
-                                          Int word_offset )
+void VG_(synth_call) ( Bool ensure_shortform, Int word_offset )
 {
    vg_assert(word_offset >= 0);
    vg_assert(word_offset < VG_BASEBLOCK_WORDS);
@@ -1067,42 +1147,237 @@
    emit_call_star_EBP_off ( 4 * word_offset );
 }
 
-static void synth_ccall_saveRegs ( void )
+static void maybe_emit_movl_reg_reg ( UInt src, UInt dst )
 {
-   emit_pushv_reg ( 4, R_EAX ); 
-   emit_pushv_reg ( 4, R_ECX ); 
-   emit_pushv_reg ( 4, R_EDX ); 
+   if (src != dst) {
+      VG_(emit_movv_reg_reg) ( 4, src, dst );
+      ccall_arg_setup_instrs++;
+   }
 }
+
+/* 'maybe' because it is sometimes skipped eg. for "movl %eax,%eax" */
+static void maybe_emit_movl_litOrReg_reg ( UInt litOrReg, Tag tag, UInt reg )
+{
+   if (RealReg == tag) {
+      maybe_emit_movl_reg_reg ( litOrReg, reg );
+   } else if (Literal == tag) {
+      VG_(emit_movv_lit_reg) ( 4, litOrReg, reg );
+      ccall_arg_setup_instrs++;
+   }
+   else
+      VG_(panic)("emit_movl_litOrReg_reg: unexpected tag");
+}
+
+static
+void emit_swapl_arg_regs ( UInt reg1, UInt reg2 )
+{
+   if        (R_EAX == reg1) {
+      VG_(emit_swapl_reg_EAX) ( reg2 );
+   } else if (R_EAX == reg2) {
+      VG_(emit_swapl_reg_EAX) ( reg1 );
+   } else {
+      emit_swapl_reg_reg ( reg1, reg2 );
+   }
+   ccall_arg_setup_instrs++;
+}
+
+static
+void emit_two_regs_args_setup ( UInt src1, UInt src2, UInt dst1, UInt dst2)
+{
+   if        (dst1 != src2) {
+      maybe_emit_movl_reg_reg ( src1, dst1 );
+      maybe_emit_movl_reg_reg ( src2, dst2 );
+
+   } else if (dst2 != src1) {
+      maybe_emit_movl_reg_reg ( src2, dst2 );
+      maybe_emit_movl_reg_reg ( src1, dst1 );
+
+   } else {
+      /* swap to break cycle */
+      emit_swapl_arg_regs ( dst1, dst2 );
+   }
+}
+
+static
+void emit_three_regs_args_setup ( UInt src1, UInt src2, UInt src3,
+                                  UInt dst1, UInt dst2, UInt dst3)
+{
+   if        (dst1 != src2 && dst1 != src3) {
+      maybe_emit_movl_reg_reg ( src1, dst1 );
+      emit_two_regs_args_setup ( src2, src3, dst2, dst3 );
+
+   } else if (dst2 != src1 && dst2 != src3) {
+      maybe_emit_movl_reg_reg ( src2, dst2 );
+      emit_two_regs_args_setup ( src1, src3, dst1, dst3 );
+
+   } else if (dst3 != src1 && dst3 != src2) {
+      maybe_emit_movl_reg_reg ( src3, dst3 );
+      emit_two_regs_args_setup ( src1, src2, dst1, dst2 );
+      
+   } else {
+      /* break cycle */
+      if        (dst1 == src2 && dst2 == src3 && dst3 == src1) {
+         emit_swapl_arg_regs ( dst1, dst2 );
+         emit_swapl_arg_regs ( dst1, dst3 );
+
+      } else if (dst1 == src3 && dst2 == src1 && dst3 == src2) {
+         emit_swapl_arg_regs ( dst1, dst3 );
+         emit_swapl_arg_regs ( dst1, dst2 );
+
+      } else {
+         VG_(panic)("impossible 3-cycle");
+      }
+   }
+}
+
+static
+void emit_two_regs_or_lits_args_setup ( UInt argv[], Tag tagv[],
+                                        UInt src1, UInt src2,
+                                        UInt dst1, UInt dst2)
+{
+   /* If either are lits, order doesn't matter */
+   if (Literal == tagv[src1] || Literal == tagv[src2]) {
+      maybe_emit_movl_litOrReg_reg ( argv[src1], tagv[src1], dst1 );
+      maybe_emit_movl_litOrReg_reg ( argv[src2], tagv[src2], dst2 );
+
+   } else {
+      emit_two_regs_args_setup ( argv[src1], argv[src2], dst1, dst2 );
+   }
+}
+
+static
+void emit_three_regs_or_lits_args_setup ( UInt argv[], Tag tagv[],
+                                          UInt src1, UInt src2, UInt src3,
+                                          UInt dst1, UInt dst2, UInt dst3)
+{
+   // SSS: fix this eventually -- make STOREV use two RealRegs?
+   /* Not supporting literals for 3-arg C functions -- they're only used
+      by STOREV which has 2 args */
+   vg_assert(RealReg == tagv[src1] &&
+             RealReg == tagv[src2] &&
+             RealReg == tagv[src3]);
+   emit_three_regs_args_setup ( argv[src1], argv[src2], argv[src3],
+                                dst1, dst2, dst3 );
+}
+
+/* Synthesise a call to a C function `fn' (which must be registered in
+   baseBlock) doing all the reg saving and arg handling work.
+ 
+   WARNING:  a UInstr should *not* be translated with synth_ccall followed
+   by some other x86 assembly code;  vg_liveness_analysis() doesn't expect
+   such behaviour and everything will fall over.
+ */
+void VG_(synth_ccall) ( Addr fn, Int argc, Int regparms_n, UInt argv[],
+                        Tag tagv[], Int ret_reg,
+                        RRegSet regs_live_before, RRegSet regs_live_after )
+{
+   Int  i;
+   Int  stack_used = 0;
+   Bool preserve_eax, preserve_ecx, preserve_edx;
+
+   vg_assert(0 <= regparms_n && regparms_n <= 3);
+
+   ccalls++;
+
+   /* If %e[acd]x is live before and after the C call, save/restore it.
+      Unless the return values clobbers the reg;  in this case we must not
+      save/restore the reg, because the restore would clobber the return
+      value.  (Before and after the UInstr really constitute separate live
+      ranges, but you miss this if you don't consider what happens during
+      the UInstr.) */
+#  define PRESERVE_REG(realReg)   \
+   (IS_RREG_LIVE(VG_(realRegNumToRank)(realReg), regs_live_before) &&   \
+    IS_RREG_LIVE(VG_(realRegNumToRank)(realReg), regs_live_after)  &&   \
+    ret_reg != realReg)
+
+   preserve_eax = PRESERVE_REG(R_EAX);
+   preserve_ecx = PRESERVE_REG(R_ECX);
+   preserve_edx = PRESERVE_REG(R_EDX);
+
+#  undef PRESERVE_REG
+
+   /* Save caller-save regs as required */
+   if (preserve_eax) { VG_(emit_pushv_reg) ( 4, R_EAX ); ccall_reg_saves++; }
+   if (preserve_ecx) { VG_(emit_pushv_reg) ( 4, R_ECX ); ccall_reg_saves++; }
+   if (preserve_edx) { VG_(emit_pushv_reg) ( 4, R_EDX ); ccall_reg_saves++; }
+
+   /* Args are passed in two groups: (a) via stack (b) via regs.  regparms_n
+      is the number of args passed in regs (maximum 3 for GCC on x86). */
+
+   ccall_args += argc;
    
-static void synth_ccall_pushOneArg ( Int r1 )
-{
-   emit_pushv_reg ( 4, r1 );
-}
+   /* First push stack args (RealRegs or Literals) in reverse order. */
+   for (i = argc-1; i >= regparms_n; i--) {
+      switch (tagv[i]) {
+      case RealReg:
+         VG_(emit_pushv_reg) ( 4, argv[i] );
+         break;
+      case Literal:
+         /* Use short form of pushl if possible. */
+         if (argv[i] == VG_(extend_s_8to32) ( argv[i] ))
+            VG_(emit_pushl_lit8) ( VG_(extend_s_8to32)(argv[i]) );
+         else
+            VG_(emit_pushl_lit32)( argv[i] );
+         break;
+      default:
+         VG_(printf)("tag=%d\n", tagv[i]);
+         VG_(panic)("VG_(synth_ccall): bad tag");
+      }
+      stack_used += 4;
+      ccall_arg_setup_instrs++;
+   }
 
-static void synth_ccall_pushTwoArgs ( Int r1, Int r2 )
-{
-   /* must push in reverse order */
-   emit_pushv_reg ( 4, r2 );
-   emit_pushv_reg ( 4, r1 );
-}
+   /* Then setup args in registers (arg[123] --> %e[adc]x;  note order!).
+      If moving values between registers, be careful not to clobber any on
+      the way.  Happily we can use xchgl to swap registers.
+   */
+   switch (regparms_n) {
 
-/* Synthesise a call to *baseBlock[offset], ie,
-   call * (4 x offset)(%ebp) with arguments
-*/
-static void synth_ccall_call_clearStack_restoreRegs ( Int word_offset, 
-                                                      UInt n_args_bytes )
-{
-   vg_assert(word_offset >= 0);
-   vg_assert(word_offset < VG_BASEBLOCK_WORDS);
-   vg_assert(n_args_bytes <= 12);           /* Max 3 word-sized args */
-   vg_assert(0 == (n_args_bytes & 0x3));    /* Divisible by four */
+   /* Trickiest.  Args passed in %eax, %edx, and %ecx. */
+   case 3:
+      emit_three_regs_or_lits_args_setup ( argv, tagv, 0, 1, 2,
+                                           R_EAX, R_EDX, R_ECX );
+      break;
 
-   emit_call_star_EBP_off ( 4 * word_offset );
-   if ( 0 != n_args_bytes )
-      emit_add_lit_to_esp ( n_args_bytes );
-   emit_popv_reg ( 4, R_EDX ); 
-   emit_popv_reg ( 4, R_ECX ); 
-   emit_popv_reg ( 4, R_EAX ); 
+   /* Less-tricky.  Args passed in %eax and %edx. */
+   case 2:
+      emit_two_regs_or_lits_args_setup ( argv, tagv, 0, 1, R_EAX, R_EDX );
+      break;
+      
+   /* Easy.  Just move arg1 into %eax (if not already in there). */
+   case 1:  
+      maybe_emit_movl_litOrReg_reg ( argv[0], tagv[0], R_EAX );
+      break;
+
+   case 0:
+      break;
+
+   default:
+      VG_(panic)("VG_(synth_call): regparms_n value not in range 0..3");
+   }
+   
+   /* Call the function */
+   VG_(synth_call) ( False, VG_(helper_offset) ( fn ) );
+
+   /* Clear any args from stack */
+   if (0 != stack_used) {
+      VG_(emit_add_lit_to_esp) ( stack_used );
+      ccall_stack_clears++;
+   }
+
+   /* Move return value into ret_reg if necessary and not already there */
+   if (INVALID_REALREG != ret_reg) {
+      ccall_retvals++;
+      if (R_EAX != ret_reg) {
+         VG_(emit_movv_reg_reg) ( 4, R_EAX, ret_reg );
+         ccall_retval_movs++;
+      }
+   }
+
+   /* Restore live caller-save regs as required */
+   if (preserve_edx) VG_(emit_popv_reg) ( 4, R_EDX ); 
+   if (preserve_ecx) VG_(emit_popv_reg) ( 4, R_ECX ); 
+   if (preserve_eax) VG_(emit_popv_reg) ( 4, R_EAX ); 
 }
 
 static void load_ebp_from_JmpKind ( JmpKind jmpkind )
@@ -1110,15 +1385,15 @@
    switch (jmpkind) {
       case JmpBoring: 
          break;
-      case JmpCall:
       case JmpRet: 
-         emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_STKADJ, R_EBP );
+         break;
+      case JmpCall:
          break;
       case JmpSyscall: 
-         emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_SYSCALL, R_EBP );
+         VG_(emit_movv_lit_reg) ( 4, VG_TRC_EBP_JMP_SYSCALL, R_EBP );
          break;
       case JmpClientReq: 
-         emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_CLIENTREQ, R_EBP );
+         VG_(emit_movv_lit_reg) ( 4, VG_TRC_EBP_JMP_CLIENTREQ, R_EBP );
          break;
       default: 
          VG_(panic)("load_ebp_from_JmpKind");
@@ -1133,7 +1408,7 @@
 {
    load_ebp_from_JmpKind ( jmpkind );
    if (reg != R_EAX)
-      emit_movv_reg_reg ( 4, reg, R_EAX );
+      VG_(emit_movv_reg_reg) ( 4, reg, R_EAX );
    emit_ret();
 }
 
@@ -1142,7 +1417,7 @@
 static void synth_jmp_lit ( Addr addr, JmpKind jmpkind )
 {
    load_ebp_from_JmpKind ( jmpkind );
-   emit_movv_lit_reg ( 4, addr, R_EAX );
+   VG_(emit_movv_lit_reg) ( 4, addr, R_EAX );
    emit_ret();
 }
 
@@ -1163,7 +1438,7 @@
    6                    xyxyxy:
   */
    emit_get_eflags();
-   emit_jcondshort_delta ( invertCondition(cond), 5+1 );
+   VG_(emit_jcondshort_delta) ( invertCondition(cond), 5+1 );
    synth_jmp_lit ( addr, JmpBoring );
 }
 
@@ -1176,8 +1451,8 @@
       000a C3                    ret
       next:
    */
-   emit_cmpl_zero_reg ( reg );
-   emit_jcondshort_delta ( CondNZ, 5+1 );
+   VG_(emit_cmpl_zero_reg) ( reg );
+   VG_(emit_jcondshort_delta) ( CondNZ, 5+1 );
    synth_jmp_lit ( addr, JmpBoring );
 }
 
@@ -1186,7 +1461,7 @@
 {
    /* Load the zero-extended literal into reg, at size l,
       regardless of the request size. */
-   emit_movv_lit_reg ( 4, lit, reg );
+   VG_(emit_movv_lit_reg) ( 4, lit, reg );
 }
 
 
@@ -1204,9 +1479,9 @@
 static void synth_mov_offregmem_reg ( Int size, Int off, Int areg, Int reg ) 
 {
    switch (size) {
-      case 4: emit_movv_offregmem_reg ( 4, off, areg, reg ); break;
-      case 2: emit_movzwl_offregmem_reg ( off, areg, reg ); break;
-      case 1: emit_movzbl_offregmem_reg ( off, areg, reg ); break;
+      case 4: VG_(emit_movv_offregmem_reg) ( 4, off, areg, reg ); break;
+      case 2: VG_(emit_movzwl_offregmem_reg) ( off, areg, reg ); break;
+      case 1: VG_(emit_movzbl_offregmem_reg) ( off, areg, reg ); break;
       default: VG_(panic)("synth_mov_offregmem_reg");
    }  
 }
@@ -1216,15 +1491,15 @@
                                       Int off, Int areg )
 {
    switch (size) {
-      case 4: emit_movv_reg_offregmem ( 4, reg, off, areg ); break;
-      case 2: emit_movv_reg_offregmem ( 2, reg, off, areg ); break;
+      case 4: VG_(emit_movv_reg_offregmem) ( 4, reg, off, areg ); break;
+      case 2: VG_(emit_movv_reg_offregmem) ( 2, reg, off, areg ); break;
       case 1: if (reg < 4) {
-                 emit_movb_reg_offregmem ( reg, off, areg ); 
+                 VG_(emit_movb_reg_offregmem) ( reg, off, areg ); 
               }
               else {
-                 emit_swapl_reg_EAX ( reg );
-                 emit_movb_reg_offregmem ( R_AL, off, areg );
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
+                 VG_(emit_movb_reg_offregmem) ( R_AL, off, areg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
               }
               break;
       default: VG_(panic)("synth_mov_reg_offregmem");
@@ -1261,23 +1536,23 @@
    /* NB! opcode is a uinstr opcode, not an x86 one! */
    switch (size) {
       case 4: //if (rd_cc) emit_get_eflags();   (never needed --njn)
-              emit_unaryopv_reg ( 4, opcode, reg );
+              VG_(emit_unaryopv_reg) ( 4, opcode, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 2: //if (rd_cc) emit_get_eflags();   (never needed --njn)
-              emit_unaryopv_reg ( 2, opcode, reg );
+              VG_(emit_unaryopv_reg) ( 2, opcode, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 1: if (reg < 4) {
                  //if (rd_cc) emit_get_eflags();    (never needed --njn)
-                 emit_unaryopb_reg ( opcode, reg );
+                 VG_(emit_unaryopb_reg) ( opcode, reg );
                  if (wr_cc) emit_put_eflags();
               } else {
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
                  //if (rd_cc) emit_get_eflags();    (never needed --njn)
-                 emit_unaryopb_reg ( opcode, R_AL );
+                 VG_(emit_unaryopb_reg) ( opcode, R_AL );
                  if (wr_cc) emit_put_eflags();
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
               }
               break;
       default: VG_(panic)("synth_unaryop_reg");
@@ -1293,11 +1568,11 @@
    /* NB! opcode is a uinstr opcode, not an x86 one! */
    switch (size) {
       case 4: if (rd_cc) emit_get_eflags();
-              emit_nonshiftopv_reg_reg ( 4, opcode, reg1, reg2 );
+              VG_(emit_nonshiftopv_reg_reg) ( 4, opcode, reg1, reg2 );
               if (wr_cc) emit_put_eflags();
               break;
       case 2: if (rd_cc) emit_get_eflags();
-              emit_nonshiftopv_reg_reg ( 2, opcode, reg1, reg2 );
+              VG_(emit_nonshiftopv_reg_reg) ( 2, opcode, reg1, reg2 );
               if (wr_cc) emit_put_eflags();
               break;
       case 1: { /* Horrible ... */
@@ -1377,11 +1652,11 @@
             emit_nonshiftopb_offregmem_reg ( opcode, off, areg, reg );
             if (wr_cc) emit_put_eflags();
          } else {
-            emit_swapl_reg_EAX ( reg );
+            VG_(emit_swapl_reg_EAX) ( reg );
             if (rd_cc) emit_get_eflags();
             emit_nonshiftopb_offregmem_reg ( opcode, off, areg, R_AL );
             if (wr_cc) emit_put_eflags();
-            emit_swapl_reg_EAX ( reg );
+            VG_(emit_swapl_reg_EAX) ( reg );
          }
          break;
       default: 
@@ -1396,11 +1671,11 @@
 {
    switch (size) {
       case 4: if (rd_cc) emit_get_eflags();
-              emit_nonshiftopv_lit_reg ( 4, opcode, lit, reg );
+              VG_(emit_nonshiftopv_lit_reg) ( 4, opcode, lit, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 2: if (rd_cc) emit_get_eflags();
-              emit_nonshiftopv_lit_reg ( 2, opcode, lit, reg );
+              VG_(emit_nonshiftopv_lit_reg) ( 2, opcode, lit, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 1: if (reg < 4) {
@@ -1408,11 +1683,11 @@
                  emit_nonshiftopb_lit_reg ( opcode, lit, reg );
                  if (wr_cc) emit_put_eflags();
               } else {
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
                  if (rd_cc) emit_get_eflags();
                  emit_nonshiftopb_lit_reg ( opcode, lit, R_AL );
                  if (wr_cc) emit_put_eflags();
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
               }
               break;
       default: VG_(panic)("synth_nonshiftop_lit_reg");
@@ -1424,19 +1699,19 @@
 {
    switch (size) {
       case 4: 
-         emit_pushv_reg ( 4, reg ); 
+         VG_(emit_pushv_reg) ( 4, reg ); 
          break;
       case 2: 
-         emit_pushv_reg ( 2, reg ); 
+         VG_(emit_pushv_reg) ( 2, reg ); 
          break;
       /* Pray that we don't have to generate this really cruddy bit of
          code very often.  Could do better, but can I be bothered? */
       case 1: 
          vg_assert(reg != R_ESP); /* duh */
-         emit_add_lit_to_esp(-1);
-         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         VG_(emit_add_lit_to_esp)(-1);
+         if (reg != R_EAX) VG_(emit_swapl_reg_EAX) ( reg );
          emit_movb_AL_zeroESPmem();
-         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         if (reg != R_EAX) VG_(emit_swapl_reg_EAX) ( reg );
          break;
      default: 
          VG_(panic)("synth_push_reg");
@@ -1448,18 +1723,18 @@
 {
    switch (size) {
       case 4: 
-         emit_popv_reg ( 4, reg ); 
+         VG_(emit_popv_reg) ( 4, reg ); 
          break;
       case 2: 
-         emit_popv_reg ( 2, reg ); 
+         VG_(emit_popv_reg) ( 2, reg ); 
          break;
       case 1:
          /* Same comment as above applies. */
          vg_assert(reg != R_ESP); /* duh */
-         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         if (reg != R_EAX) VG_(emit_swapl_reg_EAX) ( reg );
          emit_movb_zeroESPmem_AL();
-         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
-         emit_add_lit_to_esp(1);
+         if (reg != R_EAX) VG_(emit_swapl_reg_EAX) ( reg );
+         VG_(emit_add_lit_to_esp)(1);
          break;
       default: VG_(panic)("synth_pop_reg");
    }
@@ -1491,11 +1766,11 @@
 {
    switch (size) {
       case 4: if (rd_cc) emit_get_eflags();
-              emit_shiftopv_lit_reg ( 4, opcode, lit, reg );
+              VG_(emit_shiftopv_lit_reg) ( 4, opcode, lit, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 2: if (rd_cc) emit_get_eflags();
-              emit_shiftopv_lit_reg ( 2, opcode, lit, reg );
+              VG_(emit_shiftopv_lit_reg) ( 2, opcode, lit, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 1: if (reg < 4) {
@@ -1503,11 +1778,11 @@
                  emit_shiftopb_lit_reg ( opcode, lit, reg );
                  if (wr_cc) emit_put_eflags();
               } else {
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
                  if (rd_cc) emit_get_eflags();
                  emit_shiftopb_lit_reg ( opcode, lit, R_AL );
                  if (wr_cc) emit_put_eflags();
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
               }
               break;
       default: VG_(panic)("synth_shiftop_lit_reg");
@@ -1521,9 +1796,9 @@
    if (reg < 4) {
       emit_setb_reg ( reg, cond );
    } else {
-      emit_swapl_reg_EAX ( reg );
+      VG_(emit_swapl_reg_EAX) ( reg );
       emit_setb_reg ( R_AL, cond );
-      emit_swapl_reg_EAX ( reg );
+      VG_(emit_swapl_reg_EAX) ( reg );
    }
 }
 
@@ -1555,42 +1830,18 @@
 static void synth_cmovl_reg_reg ( Condcode cond, Int src, Int dst )
 {
    emit_get_eflags();
-   emit_jcondshort_delta ( invertCondition(cond), 
+   VG_(emit_jcondshort_delta) ( invertCondition(cond), 
                            2 /* length of the next insn */ );
    emit_movl_reg_reg ( src, dst );
 }
 
 
-/* Synthesise a minimal test (and which discards result) of reg32
-   against lit.  It's always safe do simply
-      emit_testv_lit_reg ( 4, lit, reg32 )
-   but we try to do better when possible.
-*/
-static void synth_minimal_test_lit_reg ( UInt lit, Int reg32 )
-{
-   if ((lit & 0xFFFFFF00) == 0 && reg32 < 4) {
-      /* We can get away with a byte insn. */
-      emit_testb_lit_reg ( lit, reg32 );
-   }
-   else 
-   if ((lit & 0xFFFF0000) == 0) {
-      /* Literal fits in 16 bits; do a word insn. */
-      emit_testv_lit_reg ( 2, lit, reg32 );
-   }
-   else {
-      /* Totally general ... */
-      emit_testv_lit_reg ( 4, lit, reg32 );
-   }
-}
-
-
 /*----------------------------------------------------*/
 /*--- Top level of the uinstr -> x86 translation.  ---*/
 /*----------------------------------------------------*/
 
 /* Return the byte offset from %ebp (ie, into baseBlock)
    for the specified ArchReg or SpillNo. */
-
 static Int spillOrArchOffset ( Int size, Tag tag, UInt value )
 {
    if (tag == SpillNo) {
@@ -1621,14 +1872,15 @@
    VG_(panic)("spillOrArchOffset");
 }
 
-
 static Int eflagsOffset ( void )
 {
    return 4 * VGOFF_(m_eflags);
 }
 
 
-static Int shadowOffset ( Int arch )
+/* Return the byte offset from %ebp (ie, into baseBlock)
+   for the specified shadow register */
+Int VG_(shadowRegOffset) ( Int arch )
 {
    switch (arch) {
       case R_EAX: return 4 * VGOFF_(sh_eax);
@@ -1643,539 +1895,44 @@
    }
 }
 
-
-static Int shadowFlagsOffset ( void )
+Int VG_(shadowFlagsOffset) ( void )
 {
    return 4 * VGOFF_(sh_eflags);
 }
 
 
-static void synth_LOADV ( Int sz, Int a_reg, Int tv_reg )
-{
-   Int i, j, helper_offw;
-   Int pushed[VG_MAX_REALREGS+2];
-   Int n_pushed;
-   switch (sz) {
-      case 4: helper_offw = VGOFF_(helperc_LOADV4); break;
-      case 2: helper_offw = VGOFF_(helperc_LOADV2); break;
-      case 1: helper_offw = VGOFF_(helperc_LOADV1); break;
-      default: VG_(panic)("synth_LOADV");
-   }
-   n_pushed = 0;
-   for (i = 0; i < VG_MAX_REALREGS; i++) {
-      j = VG_(rankToRealRegNo) ( i );
-      if (VG_CALLEE_SAVED(j)) continue;
-      if (j == tv_reg || j == a_reg) continue;
-      emit_pushv_reg ( 4, j );
-      pushed[n_pushed++] = j;
-   }
-   emit_pushv_reg ( 4, a_reg );
-   pushed[n_pushed++] = a_reg;
-   vg_assert(n_pushed <= VG_MAX_REALREGS+1);
-
-   synth_call_baseBlock_method ( False, helper_offw );
-   /* Result is in %eax; we need to get it to tv_reg. */
-   if (tv_reg != R_EAX)
-      emit_movv_reg_reg ( 4, R_EAX, tv_reg );
-
-   while (n_pushed > 0) {
-      n_pushed--;
-      if (pushed[n_pushed] == tv_reg) {
-         emit_add_lit_to_esp ( 4 );
-      } else {
-         emit_popv_reg ( 4, pushed[n_pushed] );
-      }
-   }
-}
-
-
-static void synth_STOREV ( Int sz,
-                           Int tv_tag, Int tv_val,
-                           Int a_reg )
-{
-   Int i, j, helper_offw;
-   vg_assert(tv_tag == RealReg || tv_tag == Literal);
-   switch (sz) {
-      case 4: helper_offw = VGOFF_(helperc_STOREV4); break;
-      case 2: helper_offw = VGOFF_(helperc_STOREV2); break;
-      case 1: helper_offw = VGOFF_(helperc_STOREV1); break;
-      default: VG_(panic)("synth_STOREV");
-   }
-   for (i = 0; i < VG_MAX_REALREGS; i++) {
-      j = VG_(rankToRealRegNo) ( i );
-      if (VG_CALLEE_SAVED(j)) continue;
-      if ((tv_tag == RealReg && j == tv_val) || j == a_reg) continue;
-      emit_pushv_reg ( 4, j );
-   }
-   if (tv_tag == RealReg) {
-      emit_pushv_reg ( 4, tv_val );
-   } else {
-     if (tv_val == VG_(extend_s_8to32)(tv_val))
-        emit_pushl_lit8 ( VG_(extend_s_8to32)(tv_val) );
-     else
-        emit_pushl_lit32(tv_val);
-   }
-   emit_pushv_reg ( 4, a_reg );
-   synth_call_baseBlock_method ( False, helper_offw );
-   emit_popv_reg ( 4, a_reg );
-   if (tv_tag == RealReg) {
-      emit_popv_reg ( 4, tv_val );
-   } else {
-      emit_add_lit_to_esp ( 4 );
-   }
-   for (i = VG_MAX_REALREGS-1; i >= 0; i--) {
-      j = VG_(rankToRealRegNo) ( i );
-      if (VG_CALLEE_SAVED(j)) continue;
-      if ((tv_tag == RealReg && j == tv_val) || j == a_reg) continue;
-      emit_popv_reg ( 4, j );
-   }
-}
-
 
 static void synth_WIDEN_signed ( Int sz_src, Int sz_dst, Int reg )
 {
    if (sz_src == 1 && sz_dst == 4) {
-      emit_shiftopv_lit_reg ( 4, SHL, 24, reg );
-      emit_shiftopv_lit_reg ( 4, SAR, 24, reg );
+      VG_(emit_shiftopv_lit_reg) ( 4, SHL, 24, reg );
+      VG_(emit_shiftopv_lit_reg) ( 4, SAR, 24, reg );
    }
    else if (sz_src == 2 && sz_dst == 4) {
-      emit_shiftopv_lit_reg ( 4, SHL, 16, reg );
-      emit_shiftopv_lit_reg ( 4, SAR, 16, reg );
+      VG_(emit_shiftopv_lit_reg) ( 4, SHL, 16, reg );
+      VG_(emit_shiftopv_lit_reg) ( 4, SAR, 16, reg );
    }
    else if (sz_src == 1 && sz_dst == 2) {
-      emit_shiftopv_lit_reg ( 2, SHL, 8, reg );
-      emit_shiftopv_lit_reg ( 2, SAR, 8, reg );
+      VG_(emit_shiftopv_lit_reg) ( 2, SHL, 8, reg );
+      VG_(emit_shiftopv_lit_reg) ( 2, SAR, 8, reg );
    }
    else
       VG_(panic)("synth_WIDEN");
 }
 
 
-static void synth_SETV ( Int sz, Int reg )
+static void synth_handle_esp_assignment ( Int i, Int reg,
+                                          RRegSet regs_live_before,
+                                          RRegSet regs_live_after )
 {
-   UInt val;
-   switch (sz) {
-      case 4: val = 0x00000000; break;
-      case 2: val = 0xFFFF0000; break;
-      case 1: val = 0xFFFFFF00; break;
-      case 0: val = 0xFFFFFFFE; break;
-      default: VG_(panic)("synth_SETV");
-   }
-   emit_movv_lit_reg ( 4, val, reg );
+   UInt argv[] = { reg };
+   Tag  tagv[] = { RealReg };
+
+   VG_(synth_ccall) ( (Addr) VG_(handle_esp_assignment), 1, 1, argv, tagv, 
+                      INVALID_REALREG, regs_live_before, regs_live_after);
 }
 
 
-static void synth_TESTV ( Int sz, Int tag, Int val )
-{
-   vg_assert(tag == ArchReg || tag == RealReg);
-   if (tag == ArchReg) {
-      switch (sz) {
-         case 4: 
-            emit_testv_lit_offregmem ( 
-               4, 0xFFFFFFFF, shadowOffset(val), R_EBP );
-            break;
-         case 2: 
-            emit_testv_lit_offregmem ( 
-               4, 0x0000FFFF, shadowOffset(val), R_EBP );
-            break;
-         case 1:
-            if (val < 4) {
-               emit_testv_lit_offregmem ( 
-                  4, 0x000000FF, shadowOffset(val), R_EBP );
-            } else {
-               emit_testv_lit_offregmem ( 
-                  4, 0x0000FF00, shadowOffset(val-4), R_EBP );
-            }
-            break;
-         case 0: 
-            /* should never happen */
-         default: 
-            VG_(panic)("synth_TESTV(ArchReg)");
-      }
-   } else {
-      switch (sz) {
-         case 4:
-            /* Works, but holds the entire 32-bit literal, hence
-               generating a 6-byte insn.  We want to know if any bits
-               in the reg are set, but since this is for the full reg,
-               we might as well compare it against zero, which can be
-               done with a shorter insn. */
-            /* synth_minimal_test_lit_reg ( 0xFFFFFFFF, val ); */
-            emit_cmpl_zero_reg ( val );
-            break;
-         case 2:
-            synth_minimal_test_lit_reg ( 0x0000FFFF, val );
-            break;
-         case 1:
-            synth_minimal_test_lit_reg ( 0x000000FF, val );
-            break;
-         case 0:
-            synth_minimal_test_lit_reg ( 0x00000001, val );
-            break;
-         default: 
-            VG_(panic)("synth_TESTV(RealReg)");
-      }
-   }
-   emit_jcondshort_delta ( CondZ, 3 );
-   synth_call_baseBlock_method (
-      True, /* needed to guarantee that this insn is indeed 3 bytes long */
-      (sz==4 ? VGOFF_(helper_value_check4_fail)
-             : (sz==2 ? VGOFF_(helper_value_check2_fail)
-                      : sz == 1 ? VGOFF_(helper_value_check1_fail)
-                                : VGOFF_(helper_value_check0_fail)))
-   );
-}
-
-
-static void synth_GETV ( Int sz, Int arch, Int reg )
-{
-   /* VG_(printf)("synth_GETV %d of Arch %s\n", sz, nameIReg(sz, arch)); */
-   switch (sz) {
-      case 4: 
-         emit_movv_offregmem_reg ( 4, shadowOffset(arch), R_EBP, reg );
-         break;
-      case 2: 
-         emit_movzwl_offregmem_reg ( shadowOffset(arch), R_EBP, reg );
-         emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFF0000, reg );
-         break;
-      case 1: 
-         if (arch < 4) {
-            emit_movzbl_offregmem_reg ( shadowOffset(arch), R_EBP, reg );
-         } else {
-            emit_movzbl_offregmem_reg ( shadowOffset(arch-4)+1, R_EBP, reg );
-         }
-         emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFFFF00, reg );
-         break;
-      default: 
-         VG_(panic)("synth_GETV");
-   }
-}
-
-
-static void synth_PUTV ( Int sz, Int srcTag, UInt lit_or_reg, Int arch )
-{
-   if (srcTag == Literal) {
-     /* PUTV with a Literal is only ever used to set the corresponding
-        ArchReg to `all valid'.  Should really be a kind of SETV. */
-      UInt lit = lit_or_reg;
-      switch (sz) {
-         case 4:
-            vg_assert(lit == 0x00000000);
-            emit_movv_lit_offregmem ( 4, 0x00000000, 
-                                      shadowOffset(arch), R_EBP );
-            break;
-         case 2:
-            vg_assert(lit == 0xFFFF0000);
-            emit_movv_lit_offregmem ( 2, 0x0000, 
-                                      shadowOffset(arch), R_EBP );
-            break;
-         case 1:
-            vg_assert(lit == 0xFFFFFF00);
-            if (arch < 4) {
-               emit_movb_lit_offregmem ( 0x00, 
-                                         shadowOffset(arch), R_EBP );
-            } else {
-               emit_movb_lit_offregmem ( 0x00, 
-                                         shadowOffset(arch-4)+1, R_EBP );
-            }
-            break;
-         default: 
-            VG_(panic)("synth_PUTV(lit)");
-      }
-
-   } else {
-
-      UInt reg;
-      vg_assert(srcTag == RealReg);
-
-      if (sz == 1 && lit_or_reg >= 4) {
-         emit_swapl_reg_EAX ( lit_or_reg );
-         reg = R_EAX;
-      } else {
-         reg = lit_or_reg;
-      }
-
-      if (sz == 1) vg_assert(reg < 4);
-
-      switch (sz) {
-         case 4:
-            emit_movv_reg_offregmem ( 4, reg,
-                                      shadowOffset(arch), R_EBP );
-            break;
-         case 2:
-            emit_movv_reg_offregmem ( 2, reg,
-                                      shadowOffset(arch), R_EBP );
-            break;
-         case 1:
-            if (arch < 4) {
-               emit_movb_reg_offregmem ( reg,
-                                         shadowOffset(arch), R_EBP );
-	    } else {
-               emit_movb_reg_offregmem ( reg,
-                                         shadowOffset(arch-4)+1, R_EBP );
-            }
-            break;
-         default: 
-            VG_(panic)("synth_PUTV(reg)");
-      }
-
-      if (sz == 1 && lit_or_reg >= 4) {
-         emit_swapl_reg_EAX ( lit_or_reg );
-      }
-   }
-}
-
-
-static void synth_GETVF ( Int reg )
-{
-   emit_movv_offregmem_reg ( 4, shadowFlagsOffset(), R_EBP, reg );
-   /* paranoia only; should be unnecessary ... */
-   /* emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFFFFFE, reg ); */
-}
-
-
-static void synth_PUTVF ( UInt reg )
-{
-   emit_movv_reg_offregmem ( 4, reg, shadowFlagsOffset(), R_EBP );
-}
-
-
-static void synth_handle_esp_assignment ( Int reg )
-{
-   emit_pushal();
-   emit_pushv_reg ( 4, reg );
-   synth_call_baseBlock_method ( False, VGOFF_(handle_esp_assignment) );
-   emit_add_lit_to_esp ( 4 );
-   emit_popal();
-}
-
-
-static void synth_fpu_mem_check_actions ( Bool isWrite, 
-                                          Int size, Int a_reg )
-{
-   Int helper_offw
-     = isWrite ? VGOFF_(fpu_write_check)
-               : VGOFF_(fpu_read_check);
-   emit_pushal();
-   emit_pushl_lit8 ( size );
-   emit_pushv_reg ( 4, a_reg );
-   synth_call_baseBlock_method ( False, helper_offw );
-   emit_add_lit_to_esp ( 8 );   
-   emit_popal();
-}
-
-
-#if 0
-/* FixMe.  Useful for debugging. */
-void VG_(oink) ( Int n )
-{
-   VG_(printf)("OiNk(%d): ", n );
-   VG_(show_reg_tags)( &VG_(m_shadow) );
-}
-
-static void synth_OINK ( Int n )
-{
-   emit_pushal();
-   emit_movv_lit_reg ( 4, n, R_EBP );
-   emit_pushl_reg ( R_EBP );
-   emit_movv_lit_reg ( 4, (Addr)&VG_(oink), R_EBP );
-   emit_call_reg ( R_EBP );
-   emit_add_lit_to_esp ( 4 );
-   emit_popal();
-}
-#endif
-
-static void synth_TAG1_op ( VgTagOp op, Int reg )
-{
-   switch (op) {
-
-      /* Scheme is
-            neg<sz> %reg          -- CF = %reg==0 ? 0 : 1
-            sbbl %reg, %reg       -- %reg = -CF
-            or 0xFFFFFFFE, %reg   -- invalidate all bits except lowest
-      */
-      case VgT_PCast40:
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg);
-         break;
-      case VgT_PCast20:
-         emit_unaryopv_reg(2, NEG, reg);
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg);
-         break;
-      case VgT_PCast10:
-         if (reg >= 4) {
-            emit_swapl_reg_EAX(reg);
-            emit_unaryopb_reg(NEG, R_EAX);
-            emit_swapl_reg_EAX(reg);
-         } else {
-            emit_unaryopb_reg(NEG, reg);
-         }
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg);
-         break;
-
-      /* Scheme is
-            andl $1, %reg -- %reg is 0 or 1
-            negl %reg -- %reg is 0 or 0xFFFFFFFF
-            and possibly an OR to invalidate unused bits.
-      */
-      case VgT_PCast04:
-         emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         break;
-      case VgT_PCast02:
-         emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
-         break;
-      case VgT_PCast01:
-         emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, reg);
-         break;
-
-      /* Scheme is
-            shl $24, %reg -- make irrelevant bits disappear
-            negl %reg             -- CF = %reg==0 ? 0 : 1
-            sbbl %reg, %reg       -- %reg = -CF
-            and possibly an OR to invalidate unused bits.
-      */
-      case VgT_PCast14:
-         emit_shiftopv_lit_reg(4, SHL, 24, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         break;
-      case VgT_PCast12:
-         emit_shiftopv_lit_reg(4, SHL, 24, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
-         break;
-      case VgT_PCast11:
-         emit_shiftopv_lit_reg(4, SHL, 24, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, reg);
-         break;
-
-      /* We steal %ebp (a non-allocable reg) as a temporary:
-            pushl %ebp
-            movl %reg, %ebp
-            negl %ebp
-            orl %ebp, %reg
-            popl %ebp
-         This sequence turns out to be correct regardless of the 
-         operation width.
-      */
-      case VgT_Left4:
-      case VgT_Left2:
-      case VgT_Left1:
-         vg_assert(reg != R_EDI);
-         emit_movv_reg_reg(4, reg, R_EDI);
-         emit_unaryopv_reg(4, NEG, R_EDI);
-         emit_nonshiftopv_reg_reg(4, OR, R_EDI, reg);
-         break;
-
-      /* These are all fairly obvious; do the op and then, if
-         necessary, invalidate unused bits. */
-      case VgT_SWiden14:
-         emit_shiftopv_lit_reg(4, SHL, 24, reg);
-         emit_shiftopv_lit_reg(4, SAR, 24, reg);
-         break;
-      case VgT_SWiden24:
-         emit_shiftopv_lit_reg(4, SHL, 16, reg);
-         emit_shiftopv_lit_reg(4, SAR, 16, reg);
-         break;
-      case VgT_SWiden12:
-         emit_shiftopv_lit_reg(4, SHL, 24, reg);
-         emit_shiftopv_lit_reg(4, SAR, 24, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
-         break;
-      case VgT_ZWiden14:
-         emit_nonshiftopv_lit_reg(4, AND, 0x000000FF, reg);
-         break;
-      case VgT_ZWiden24:
-         emit_nonshiftopv_lit_reg(4, AND, 0x0000FFFF, reg);
-         break;
-      case VgT_ZWiden12:
-         emit_nonshiftopv_lit_reg(4, AND, 0x000000FF, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
-         break;
-
-      default:
-         VG_(panic)("synth_TAG1_op");
-   }
-}
-
-
-static void synth_TAG2_op ( VgTagOp op, Int regs, Int regd )
-{
-   switch (op) {
-
-      /* UifU is implemented by OR, since 1 means Undefined. */
-      case VgT_UifU4:
-      case VgT_UifU2:
-      case VgT_UifU1:
-      case VgT_UifU0:
-         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
-         break;
-
-      /* DifD is implemented by AND, since 0 means Defined. */
-      case VgT_DifD4:
-      case VgT_DifD2:
-      case VgT_DifD1:
-         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
-         break;
-
-      /* ImproveAND(value, tags) = value OR tags.
-	 Defined (0) value 0s give defined (0); all other -> undefined (1).
-         value is in regs; tags is in regd. 
-         Be paranoid and invalidate unused bits; I don't know whether 
-         or not this is actually necessary. */
-      case VgT_ImproveAND4_TQ:
-         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
-         break;
-      case VgT_ImproveAND2_TQ:
-         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, regd);
-         break;
-      case VgT_ImproveAND1_TQ:
-         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, regd);
-         break;
-
-      /* ImproveOR(value, tags) = (not value) OR tags.
-	 Defined (0) value 1s give defined (0); all other -> undefined (1).
-         value is in regs; tags is in regd. 
-         To avoid trashing value, this is implemented (re de Morgan) as
-               not (value AND (not tags))
-         Be paranoid and invalidate unused bits; I don't know whether 
-         or not this is actually necessary. */
-      case VgT_ImproveOR4_TQ:
-         emit_unaryopv_reg(4, NOT, regd);
-         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
-         emit_unaryopv_reg(4, NOT, regd);
-         break;
-      case VgT_ImproveOR2_TQ:
-         emit_unaryopv_reg(4, NOT, regd);
-         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
-         emit_unaryopv_reg(4, NOT, regd);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, regd);
-         break;
-      case VgT_ImproveOR1_TQ:
-         emit_unaryopv_reg(4, NOT, regd);
-         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
-         emit_unaryopv_reg(4, NOT, regd);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, regd);
-         break;
-
-      default:
-         VG_(panic)("synth_TAG2_op");
-   }
-}
-
 /*----------------------------------------------------*/
 /*--- Generate code for a single UInstr.           ---*/
 /*----------------------------------------------------*/
@@ -2190,10 +1947,13 @@
    return (u->flags_w != FlagsEmpty); 
 }
 
-static void emitUInstr ( Int i, UInstr* u )
+static void emitUInstr ( UCodeBlock* cb, Int i, RRegSet regs_live_before )
 {
+   Int     old_emitted_code_used;
+   UInstr* u = &cb->instrs[i];
+
    if (dis)
-      VG_(ppUInstr)(i, u);
+      VG_(ppUInstrWithRegs)(i, u);
 
 #  if 0
    if (0&& VG_(translations_done) >= 600) {
@@ -2204,13 +1964,79 @@
    }
 #  endif
 
+   old_emitted_code_used = emitted_code_used;
+   
    switch (u->opcode) {
-
       case NOP: case CALLM_S: case CALLM_E: break;
 
       case INCEIP: {
-         vg_assert(u->tag1 == Lit16);
-         emit_addlit8_offregmem ( u->val1, R_EBP, 4 * VGOFF_(m_eip) );
+        /* Note: Redundant INCEIP merging.  A potentially useful
+           performance enhancementa, but currently disabled.  Reason
+           is that it needs a surefire way to know if a UInstr might
+           give rise to a stack snapshot being taken.  The logic below
+           is correct (hopefully ...) for the core UInstrs, but is
+           incorrect if a skin has its own UInstrs, since the logic
+           currently assumes that none of them can cause a stack
+           trace, and that's just wrong.  Note this isn't
+           mission-critical -- the system still functions -- but will
+           cause incorrect source locations in some situations,
+           specifically for the memcheck skin.  This is known to
+           confuse programmers, understandable.  */
+#        if 0
+         Bool    can_skip;
+         Int     j;
+
+         /* Scan forwards to see if this INCEIP dominates (in the
+            technical sense) a later one, AND there are no CCALLs in
+            between.  If so, skip this one and instead add its count
+            with the later one. */
+         can_skip = True;
+	 j = i+1;
+         while (True) {
+            if (cb->instrs[j].opcode == CCALL 
+                || cb->instrs[j].opcode == CALLM) {
+               /* CCALL -- we can't skip this INCEIP. */
+               can_skip = False; 
+               break;
+            }
+            if (cb->instrs[j].opcode == INCEIP) {
+               /* Another INCEIP.  Check that the sum will fit. */
+               if (cb->instrs[i].val1 + cb->instrs[j].val1 > 127)
+                  can_skip = False;
+               break;
+            }
+            if (cb->instrs[j].opcode == JMP || cb->instrs[j].opcode == JIFZ) {
+               /* Execution is not guaranteed to get beyond this
+                  point.  Give up. */
+               can_skip = False; 
+               break;
+            }
+            j++;
+            /* Assertion should hold because all blocks should end in an
+               unconditional JMP, so the above test should get us out of
+               the loop at the end of a block. */
+            vg_assert(j < cb->used);
+         }
+         if (can_skip) {
+            /* yay!  Accumulate the delta into the next INCEIP. */
+            // VG_(printf)("skip INCEIP %d\n", cb->instrs[i].val1);
+            vg_assert(j > i);
+            vg_assert(j < cb->used);
+            vg_assert(cb->instrs[j].opcode == INCEIP);
+            vg_assert(cb->instrs[i].opcode == INCEIP);
+            vg_assert(cb->instrs[j].tag1 == Lit16);
+            vg_assert(cb->instrs[i].tag1 == Lit16);
+            cb->instrs[j].val1 += cb->instrs[i].val1;
+            /* do nothing now */
+         } else 
+#        endif
+
+         {
+            /* no, we really have to do this, alas */
+            // VG_(printf)("  do INCEIP %d\n", cb->instrs[i].val1);
+            vg_assert(u->tag1 == Lit16);
+            emit_addlit8_offregmem ( u->val1, R_EBP, 4 * VGOFF_(m_eip) );
+         }
          break;
       }
 
@@ -2240,41 +2066,10 @@
          break;
       }
 
-      case SETV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg);
-         synth_SETV ( u->size, u->val1 );
-         break;
-      }
-
-      case STOREV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
-         vg_assert(u->tag2 == RealReg);
-         synth_STOREV ( u->size, u->tag1, 
-                                 u->tag1==Literal ? u->lit32 : u->val1, 
-                                 u->val2 );
-         break;
-      }
-
       case STORE: {
          vg_assert(u->tag1 == RealReg);
          vg_assert(u->tag2 == RealReg);
          synth_mov_reg_memreg ( u->size, u->val1, u->val2 );
-	 /* No longer possible, but retained for illustrative purposes.
-         if (u->smc_check) 
-            synth_orig_code_write_check ( u->size, u->val2 );
-	 */
-         break;
-      }
-
-      case LOADV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg);
-         vg_assert(u->tag2 == RealReg);
-         if (0 && VG_(clo_instrument))
-            emit_AMD_prefetch_reg ( u->val1 );
-         synth_LOADV ( u->size, u->val1, u->val2 );
          break;
       }
 
@@ -2285,47 +2080,6 @@
          break;
       }
 
-      case TESTV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg || u->tag1 == ArchReg);
-         synth_TESTV(u->size, u->tag1, u->val1);
-         break;
-      }
-
-      case GETV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == ArchReg);
-         vg_assert(u->tag2 == RealReg);
-         synth_GETV(u->size, u->val1, u->val2);
-         break;
-      }
-
-      case GETVF: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg);
-         vg_assert(u->size == 0);
-         synth_GETVF(u->val1);
-         break;
-      }
-
-      case PUTV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
-         vg_assert(u->tag2 == ArchReg);
-         synth_PUTV(u->size, u->tag1, 
-                             u->tag1==Literal ? u->lit32 : u->val1, 
-                             u->val2 );
-         break;
-      }
-
-      case PUTVF: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg);
-         vg_assert(u->size == 0);
-         synth_PUTVF(u->val1);
-         break;
-      }
-
       case GET: {
          vg_assert(u->tag1 == ArchReg || u->tag1 == SpillNo);
          vg_assert(u->tag2 == RealReg);
@@ -2344,15 +2098,23 @@
          if (u->tag2 == ArchReg 
              && u->val2 == R_ESP
              && u->size == 4
-             && VG_(clo_instrument)) {
-            synth_handle_esp_assignment ( u->val1 );
+             && (VG_(track_events).new_mem_stack         || 
+                 VG_(track_events).new_mem_stack_aligned ||
+                 VG_(track_events).die_mem_stack         ||
+                 VG_(track_events).die_mem_stack_aligned ||
+                 VG_(track_events).post_mem_write))
+         {
+            synth_handle_esp_assignment ( i, u->val1, regs_live_before,
+                                          u->regs_live_after );
 	 }
-         synth_mov_reg_offregmem ( 
-            u->size, 
-            u->val1, 
-            spillOrArchOffset( u->size, u->tag2, u->val2 ),
-            R_EBP
-         );
+         else {
+            synth_mov_reg_offregmem ( 
+               u->size, 
+               u->val1, 
+               spillOrArchOffset( u->size, u->tag2, u->val2 ),
+               R_EBP
+            );
+         }
          break;
       }
 
@@ -2436,7 +2198,6 @@
       case RCR:
       case RCL:
          vg_assert(u->tag2 == RealReg);
-         vg_assert(! readFlagUse ( u ));
          switch (u->tag1) {
             case Literal: synth_shiftop_lit_reg (
                              readFlagUse(u), writeFlagUse(u),
@@ -2515,55 +2276,16 @@
          synth_jmp_ifzero_reg_lit ( u->val1, u->lit32 );
          break;
 
-      case TAG1:
-         synth_TAG1_op ( u->val3, u->val1 );
-         break;
-
-      case TAG2:
-         if (u->val3 != VgT_DebugFn) {
-            synth_TAG2_op ( u->val3, u->val1, u->val2 );
-         } else {
-            /* Assume a call to VgT_DebugFn passing both args
-               and placing the result back in the second. */
-            Int j, k;
-            /* u->val2 is the reg into which the result is written.  So
-               don't save/restore it.  And it can be used at a temp for
-               the call target, too.  Since %eax is used for the return
-               value from the C procedure, it is preserved only by
-               virtue of not being mentioned as a VG_CALLEE_SAVED reg. */
-            for (k = 0; k < VG_MAX_REALREGS; k++) {
-               j = VG_(rankToRealRegNo) ( k );
-               if (VG_CALLEE_SAVED(j)) continue;
-               if (j == u->val2) continue;
-               emit_pushv_reg ( 4, j );
-            }
-            emit_pushv_reg(4, u->val2);
-            emit_pushv_reg(4, u->val1);
-            emit_movv_lit_reg ( 4, (UInt)(&VG_(DebugFn)), u->val2 );
-            emit_call_reg ( u->val2 );
-            if (u->val2 != R_EAX)
-               emit_movv_reg_reg ( 4, R_EAX, u->val2 );
-            /* nuke args */
-            emit_add_lit_to_esp(8);
-            for (k = VG_MAX_REALREGS-1; k >= 0; k--) {
-               j = VG_(rankToRealRegNo) ( k );
-               if (VG_CALLEE_SAVED(j)) continue;
-               if (j == u->val2) continue;
-               emit_popv_reg ( 4, j );
-            }
-         }
-         break;
-
       case PUSH:
          vg_assert(u->tag1 == RealReg);
          vg_assert(u->tag2 == NoValue);
-         emit_pushv_reg ( 4, u->val1 );
+         VG_(emit_pushv_reg) ( 4, u->val1 );
          break;
 
       case POP:
          vg_assert(u->tag1 == RealReg);
          vg_assert(u->tag2 == NoValue);
-         emit_popv_reg ( 4, u->val1 );
+         VG_(emit_popv_reg) ( 4, u->val1 );
          break;
 
       case CALLM:
@@ -2572,35 +2294,34 @@
          vg_assert(u->size == 0);
          if (readFlagUse ( u )) 
             emit_get_eflags();
-         synth_call_baseBlock_method ( False, u->val1 );
+         VG_(synth_call) ( False, u->val1 );
          if (writeFlagUse ( u )) 
             emit_put_eflags();
          break;
 
-      case CCALL_1_0:
-         vg_assert(u->tag1 == RealReg);
-         vg_assert(u->tag2 == NoValue);
+      case CCALL: {
+         /* Lazy: copy all three vals;  synth_ccall ignores any unnecessary
+            ones. */
+         UInt argv[]  = { u->val1, u->val2, u->val3 };
+         UInt tagv[]  = { RealReg, RealReg, RealReg };
+         UInt ret_reg = ( u->has_ret_val ? u->val3 : INVALID_REALREG );
+
+         if (u->argc >= 1)                   vg_assert(u->tag1 == RealReg);
+         else                                vg_assert(u->tag1 == NoValue);
+         if (u->argc >= 2)                   vg_assert(u->tag2 == RealReg);
+         else                                vg_assert(u->tag2 == NoValue);
+         if (u->argc == 3 || u->has_ret_val) vg_assert(u->tag3 == RealReg);
+         else                                vg_assert(u->tag3 == NoValue);
          vg_assert(u->size == 0);
 
-         synth_ccall_saveRegs();
-         synth_ccall_pushOneArg ( u->val1 );
-         synth_ccall_call_clearStack_restoreRegs ( u->lit32, 4 );
+         VG_(synth_ccall) ( u->lit32, u->argc, u->regparms_n, argv, tagv,
+                            ret_reg, regs_live_before, u->regs_live_after );
          break;
-
-      case CCALL_2_0:
-         vg_assert(u->tag1 == RealReg);
-         vg_assert(u->tag2 == RealReg);
-         vg_assert(u->size == 0);
-
-         synth_ccall_saveRegs();
-         synth_ccall_pushTwoArgs ( u->val1, u->val2 );
-         synth_ccall_call_clearStack_restoreRegs ( u->lit32, 8 );
-         break;
-
+      }
       case CLEAR:
          vg_assert(u->tag1 == Lit16);
          vg_assert(u->tag2 == NoValue);
-         emit_add_lit_to_esp ( u->val1 );
+         VG_(emit_add_lit_to_esp) ( u->val1 );
          break;
 
       case CC2VAL:
@@ -2610,23 +2331,13 @@
          synth_setb_reg ( u->val1, u->cond );
          break;
 
-      /* We assume that writes to memory done by FPU_Ws are not going
-         to be used to create new code, so there's no orig-code-write
-         checks done by default. */
       case FPU_R: 
       case FPU_W:         
          vg_assert(u->tag1 == Lit16);
          vg_assert(u->tag2 == RealReg);
-         if (VG_(clo_instrument))
-            synth_fpu_mem_check_actions ( 
-               u->opcode==FPU_W, u->size, u->val2 );
          synth_fpu_regmem ( (u->val1 >> 8) & 0xFF,
                             u->val1 & 0xFF,
                             u->val2 );
-         /* No longer possible, but retained for illustrative purposes.
-         if (u->opcode == FPU_W && u->smc_check) 
-            synth_orig_code_write_check ( u->size, u->val2 );
-         */
          break;
 
       case FPU:
@@ -2641,11 +2352,22 @@
          break;
 
       default: 
-         VG_(printf)("emitUInstr: unhandled insn:\n");
-         VG_(ppUInstr)(0,u);
-         VG_(panic)("emitUInstr: unimplemented opcode");
+         if (VG_(needs).extended_UCode)
+            SK_(emitExtUInstr)(u, regs_live_before);
+         else {
+            VG_(printf)("\nError:\n"
+                        "  unhandled opcode: %u.  Perhaps "
+                        " VG_(needs).extended_UCode should be set?\n",
+                        u->opcode);
+            VG_(ppUInstr)(0,u);
+            VG_(panic)("emitUInstr: unimplemented opcode");
+         }
    }
 
+   /* Update UInstr histogram */
+   vg_assert(u->opcode < 100);
+   histogram[u->opcode].counts++;
+   histogram[u->opcode].size += (emitted_code_used - old_emitted_code_used);
 }
 
 
@@ -2654,67 +2376,39 @@
 UChar* VG_(emit_code) ( UCodeBlock* cb, Int* nbytes )
 {
    Int i;
+   UChar regs_live_before = 0;   /* No regs live at BB start */
+   
    emitted_code_used = 0;
    emitted_code_size = 500; /* reasonable initial size */
-   emitted_code = VG_(jitmalloc)(emitted_code_size);
+   emitted_code = VG_(arena_malloc)(VG_AR_JITTER, emitted_code_size);
 
-   if (dis) VG_(printf)("Generated code:\n");
+   if (dis) VG_(printf)("Generated x86 code:\n");
 
    for (i = 0; i < cb->used; i++) {
+      UInstr* u = &cb->instrs[i];
       if (cb->instrs[i].opcode != NOP) {
-         UInstr* u = &cb->instrs[i];
-#        if 1
+
          /* Check on the sanity of this insn. */
-         Bool sane = VG_(saneUInstr)( False, u );
+         Bool sane = VG_(saneUInstr)( False, False, u );
          if (!sane) {
             VG_(printf)("\ninsane instruction\n");
-            VG_(ppUInstr)( i, u );
+            VG_(upUInstr)( i, u );
 	 }
          vg_assert(sane);
-#        endif
-#        if 0
-         /* Pass args to TAG1/TAG2 to vg_DebugFn for sanity checking.
-            Requires a suitable definition of vg_DebugFn. */
-	 if (u->opcode == TAG1) {
-            UInstr t1;
-            vg_assert(u->tag1 == RealReg);
-            VG_(emptyUInstr)( &t1 );
-            t1.opcode = TAG2;
-            t1.tag1 = t1.tag2 = RealReg;
-            t1.val1 = t1.val2 = u->val1;
-            t1.tag3 = Lit16;
-            t1.val3 = VgT_DebugFn;
-            emitUInstr( i, &t1 );
-	 }
-	 if (u->opcode == TAG2) {
-            UInstr t1;
-            vg_assert(u->tag1 == RealReg);
-            vg_assert(u->tag2 == RealReg);
-            VG_(emptyUInstr)( &t1 );
-            t1.opcode = TAG2;
-            t1.tag1 = t1.tag2 = RealReg;
-            t1.val1 = t1.val2 = u->val1;
-            t1.tag3 = Lit16;
-            t1.val3 = VgT_DebugFn;
-            if (u->val3 == VgT_UifU1 || u->val3 == VgT_UifU2 
-                || u->val3 == VgT_UifU4 || u->val3 == VgT_DifD1 
-                || u->val3 == VgT_DifD2 || u->val3 == VgT_DifD4)
-               emitUInstr( i, &t1 );
-            t1.val1 = t1.val2 = u->val2;
-            emitUInstr( i, &t1 );
-	 }
-#        endif
-         emitUInstr( i, u );
+         emitUInstr( cb, i, regs_live_before );
       }
+      regs_live_before = u->regs_live_after;
    }
+   if (dis) VG_(printf)("\n");
 
    /* Returns a pointer to the emitted code.  This will have to be
-      copied by the caller into the translation cache, and then freed
-      using VG_(jitfree). */
+      copied by the caller into the translation cache, and then freed */
    *nbytes = emitted_code_used;
    return emitted_code;
 }
 
+#undef dis
+
 /*--------------------------------------------------------------------*/
 /*--- end                                          vg_from_ucode.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_helpers.S b/coregrind/vg_helpers.S
index 8262737..2315da4 100644
--- a/coregrind/vg_helpers.S
+++ b/coregrind/vg_helpers.S
@@ -26,7 +26,7 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.
 
-  The GNU General Public License is contained in the file LICENSE.
+  The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_constants.h"
@@ -86,36 +86,6 @@
    and the incoming parameters can be modified, to return results.
 */
 
-
-.global VG_(helper_value_check0_fail)
-VG_(helper_value_check0_fail):
-	pushal
-	call	VG_(helperc_value_check0_fail)
-	popal
-	ret
-
-.global VG_(helper_value_check1_fail)
-VG_(helper_value_check1_fail):
-	pushal
-	call	VG_(helperc_value_check1_fail)
-	popal
-	ret
-
-.global VG_(helper_value_check2_fail)
-VG_(helper_value_check2_fail):
-	pushal
-	call	VG_(helperc_value_check2_fail)
-	popal
-	ret
-
-.global VG_(helper_value_check4_fail)
-VG_(helper_value_check4_fail):
-	pushal
-	call	VG_(helperc_value_check4_fail)
-	popal
-	ret
-
-
 /* Fetch the time-stamp-ctr reg.
    On entry:
 	dummy, replaced by %EAX value
diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h
index 74e1016..edf7aef 100644
--- a/coregrind/vg_include.h
+++ b/coregrind/vg_include.h
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- A header file for all parts of Valgrind.                     ---*/
+/*--- A header file for all private parts of Valgrind's core.      ---*/
 /*--- Include no other!                                            ---*/
 /*---                                                 vg_include.h ---*/
 /*--------------------------------------------------------------------*/
@@ -27,17 +27,12 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #ifndef __VG_INCLUDE_H
 #define __VG_INCLUDE_H
 
-
-#include <stdarg.h>       /* ANSI varargs stuff  */
-#include <setjmp.h>       /* for jmp_buf         */
-
-
 /* ---------------------------------------------------------------------
    Where to send bug reports to.
    ------------------------------------------------------------------ */
@@ -52,21 +47,9 @@
 
 #include "vg_constants.h"
 
-
-/* Set to 1 to enable time profiling.  Since this uses SIGPROF, we
-   don't want this permanently enabled -- only for profiling
-   builds. */
-#if 0
-#  define VG_PROFILE
-#endif
-
-
-/* Total number of integer registers available for allocation.  That's
-   all of them except %esp, %edi and %ebp.  %edi is a general spare
-   temporary.  %ebp permanently points at VG_(baseBlock).  Note that
-   it's important that this tie in with what rankToRealRegNo() says.
-   DO NOT CHANGE THIS VALUE FROM 5. !  */
-#define VG_MAX_REALREGS 5
+/* All stuff visible to core and skins goes in vg_skin.h.  Things visible
+ * to core but private to skins go here. */
+#include "vg_skin.h"
 
 /* Total number of spill slots available for allocation, if a TempReg
    doesn't make it into a RealReg.  Just bomb the entire system if
@@ -111,10 +94,6 @@
    errors at all.  Counterpart to M_VG_COLLECT_NO_ERRORS_AFTER_SHOWN. */
 #define M_VG_COLLECT_NO_ERRORS_AFTER_FOUND 30000
 
-/* These many bytes below %ESP are considered addressible if we're
-   doing the --workaround-gcc296-bugs hack. */
-#define VG_GCC296_BUG_STACK_SLOP 1024
-
 /* The maximum number of calls we're prepared to save in a
    backtrace. */
 #define VG_DEEPEST_BACKTRACE 50
@@ -132,17 +111,6 @@
    give finer interleaving but much increased scheduling overheads. */
 #define VG_SCHEDULING_QUANTUM   50000
 
-/* The maximum number of pthreads that we support.  This is
-   deliberately not very high since our implementation of some of the
-   scheduler algorithms is surely O(N) in the number of threads, since
-   that's simple, at least.  And (in practice) we hope that most
-   programs do not need many threads. */
-#define VG_N_THREADS 50
-
-/* Maximum number of pthread keys available.  Again, we start low until
-   the need for a higher number presents itself. */
-#define VG_N_THREAD_KEYS 50
-
 /* Number of file descriptors that can simultaneously be waited on for
    I/O to complete.  Perhaps this should be the same as VG_N_THREADS
    (surely a thread can't wait on more than one fd at once?.  Who
@@ -165,97 +133,43 @@
 /* Number of entries in each thread's fork-handler stack. */
 #define VG_N_FORKHANDLERSTACK 2
 
+/* Max number of callers for context in a suppression. */
+#define VG_N_SUPP_CALLERS  4
+   
 
 /* ---------------------------------------------------------------------
    Basic types
    ------------------------------------------------------------------ */
 
-typedef unsigned char          UChar;
-typedef unsigned short         UShort;
-typedef unsigned int           UInt;
-typedef unsigned long long int ULong;
-
-typedef signed char          Char;
-typedef signed short         Short;
-typedef signed int           Int;
-typedef signed long long int Long;
-
-typedef unsigned int Addr;
-
-typedef unsigned char Bool;
-#define False ((Bool)0)
-#define True ((Bool)1)
-
-#define mycat_wrk(aaa,bbb) aaa##bbb
-#define mycat(aaa,bbb) mycat_wrk(aaa,bbb)
-
 /* Just pray that gcc's constant folding works properly ... */
 #define BITS(bit7,bit6,bit5,bit4,bit3,bit2,bit1,bit0)               \
    ( ((bit7) << 7) | ((bit6) << 6) | ((bit5) << 5) | ((bit4) << 4)  \
      | ((bit3) << 3) | ((bit2) << 2) | ((bit1) << 1) | (bit0))
 
-/* For cache simulation */
-typedef struct { 
-    int size;       /* bytes */
-    int assoc;
-    int line_size;  /* bytes */
-} cache_t;
-
-#define UNDEFINED_CACHE     ((cache_t) { -1, -1, -1 })
-
-/* ---------------------------------------------------------------------
-   Now the basic types are set up, we can haul in the kernel-interface
-   definitions.
-   ------------------------------------------------------------------ */
-
-#include "./vg_kerneliface.h"
-
-
 /* ---------------------------------------------------------------------
    Command-line-settable options
    ------------------------------------------------------------------ */
 
-#define VG_CLO_SMC_NONE 0
-#define VG_CLO_SMC_SOME 1
-#define VG_CLO_SMC_ALL  2
-
 #define VG_CLO_MAX_SFILES 10
 
 /* Should we stop collecting errors if too many appear?  default: YES */
 extern Bool  VG_(clo_error_limit);
-/* Shall we V-check addrs (they are always A checked too): default: YES */
-extern Bool  VG_(clo_check_addrVs);
 /* Enquire about whether to attach to GDB at errors?   default: NO */
 extern Bool  VG_(clo_GDB_attach);
 /* Sanity-check level: 0 = none, 1 (default), > 1 = expensive. */
 extern Int   VG_(sanity_level);
-/* Verbosity level: 0 = silent, 1 (default), > 1 = more verbose. */
-extern Int   VG_(clo_verbosity);
 /* Automatically attempt to demangle C++ names?  default: YES */
 extern Bool  VG_(clo_demangle);
-/* Do leak check at exit?  default: NO */
-extern Bool  VG_(clo_leak_check);
-/* In leak check, show reachable-but-not-freed blocks?  default: NO */
-extern Bool  VG_(clo_show_reachable);
-/* How closely should we compare ExeContexts in leak records? default: 2 */
-extern Int   VG_(clo_leak_resolution);
 /* Round malloc sizes upwards to integral number of words? default:
    NO */
 extern Bool  VG_(clo_sloppy_malloc);
 /* Minimum alignment in functions that don't specify alignment explicitly.
    default: 0, i.e. use default of the machine (== 4) */
 extern Int   VG_(clo_alignment);
-/* Allow loads from partially-valid addresses?  default: YES */
-extern Bool  VG_(clo_partial_loads_ok);
 /* Simulate child processes? default: NO */
 extern Bool  VG_(clo_trace_children);
 /* The file id on which we send all messages.  default: 2 (stderr). */
 extern Int   VG_(clo_logfile_fd);
-/* Max volume of the freed blocks queue. */
-extern Int   VG_(clo_freelist_vol);
-/* Assume accesses immediately below %esp are due to gcc-2.96 bugs.
-   default: NO */
-extern Bool  VG_(clo_workaround_gcc296_bugs);
 
 /* The number of suppression files specified. */
 extern Int   VG_(clo_n_suppressions);
@@ -266,20 +180,8 @@
 extern Bool  VG_(clo_single_step);
 /* Code improvement?  default: YES */
 extern Bool  VG_(clo_optimise);
-/* Memory-check instrumentation?  default: YES */
-extern Bool  VG_(clo_instrument);
-/* DEBUG: clean up instrumented code?  default: YES */
-extern Bool  VG_(clo_cleanup);
-/* Cache simulation instrumentation?  default: NO */
-extern Bool  VG_(clo_cachesim);
-/* I1 cache configuration.  default: undefined */
-extern cache_t VG_(clo_I1_cache);
-/* D1 cache configuration.  default: undefined */
-extern cache_t VG_(clo_D1_cache);
-/* L2 cache configuration.  default: undefined */
-extern cache_t VG_(clo_L2_cache);
-/* SMC write checks?  default: SOME (1,2,4 byte movs to mem) */
-extern Int   VG_(clo_smc_check);
+/* DEBUG: print generated code?  default: 00000 ( == NO ) */
+extern Bool  VG_(clo_trace_codegen);
 /* DEBUG: print system calls?  default: NO */
 extern Bool  VG_(clo_trace_syscalls);
 /* DEBUG: print signal details?  default: NO */
@@ -308,78 +210,35 @@
    Debugging and profiling stuff
    ------------------------------------------------------------------ */
 
+/* Change to 1 to get more accurate but more expensive core profiling. */
+#if 0
+#  define VGP_ACCURATE_PROFILING
+#endif
+
 /* No, really.  I _am_ that strange. */
 #define OINK(nnn) VG_(message)(Vg_DebugMsg, "OINK %d",nnn)
 
-/* Tools for building messages from multiple parts. */
-typedef
-   enum { Vg_UserMsg, Vg_DebugMsg, Vg_DebugExtraMsg }
-   VgMsgKind;
-
-extern void VG_(start_msg)  ( VgMsgKind kind );
-extern void VG_(add_to_msg) ( Char* format, ... );
-extern void VG_(end_msg)    ( void );
-
-/* Send a simple, single-part message. */
-extern void VG_(message)    ( VgMsgKind kind, Char* format, ... );
-
 /* Create a logfile into which messages can be dumped. */
 extern void VG_(startup_logging) ( void );
-extern void VG_(shutdown_logging) ( void );
-
-
-/* Profiling stuff */
-#ifdef VG_PROFILE
-
-#define VGP_M_STACK 10
-
-#define VGP_M_CCS 26  /* == the # of elems in VGP_LIST */
-#define VGP_LIST \
-   VGP_PAIR(VgpUnc=0,      "unclassified"),           \
-   VGP_PAIR(VgpRun,        "running"),                \
-   VGP_PAIR(VgpSched,      "scheduler"),              \
-   VGP_PAIR(VgpMalloc,     "low-lev malloc/free"),    \
-   VGP_PAIR(VgpCliMalloc,  "client  malloc/free"),    \
-   VGP_PAIR(VgpTranslate,  "translate-main"),         \
-   VGP_PAIR(VgpToUCode,    "to-ucode"),               \
-   VGP_PAIR(VgpFromUcode,  "from-ucode"),             \
-   VGP_PAIR(VgpImprove,    "improve"),                \
-   VGP_PAIR(VgpInstrument, "instrument"),             \
-   VGP_PAIR(VgpCleanup,    "cleanup"),                \
-   VGP_PAIR(VgpRegAlloc,   "reg-alloc"),              \
-   VGP_PAIR(VgpDoLRU,      "do-lru"),                 \
-   VGP_PAIR(VgpSlowFindT,  "slow-search-transtab"),   \
-   VGP_PAIR(VgpInitAudit,  "init-mem-audit"),         \
-   VGP_PAIR(VgpExeContext, "exe-context"),            \
-   VGP_PAIR(VgpReadSyms,   "read-syms"),              \
-   VGP_PAIR(VgpAddToT,     "add-to-transtab"),        \
-   VGP_PAIR(VgpSARP,       "set-addr-range-perms"),   \
-   VGP_PAIR(VgpSyscall,    "syscall wrapper"),        \
-   VGP_PAIR(VgpCacheInstrument, "cache instrument"),  \
-   VGP_PAIR(VgpCacheGetBBCC,"cache get BBCC"),        \
-   VGP_PAIR(VgpCacheSimulate, "cache simulate"),      \
-   VGP_PAIR(VgpCacheDump,  "cache stats dump"),       \
-   VGP_PAIR(VgpSpare1,     "spare 1"),                \
-   VGP_PAIR(VgpSpare2,     "spare 2")
-
-#define VGP_PAIR(enumname,str) enumname
-typedef enum { VGP_LIST } VgpCC;
-#undef VGP_PAIR
+extern void VG_(shutdown_logging)( void );
 
 extern void VGP_(init_profiling) ( void );
 extern void VGP_(done_profiling) ( void );
-extern void VGP_(pushcc) ( VgpCC );
-extern void VGP_(popcc) ( void );
 
-#define VGP_PUSHCC(cc) VGP_(pushcc)(cc)
-#define VGP_POPCC      VGP_(popcc)()
+#undef  VGP_PUSHCC
+#undef  VGP_POPCC
+#define VGP_PUSHCC(x)   if (VG_(clo_profile)) VGP_(pushcc)(x)
+#define VGP_POPCC(x)    if (VG_(clo_profile)) VGP_(popcc)(x)
 
+/* Use this for ones that happen a lot and thus we don't want to put in
+   all the time, eg. for %esp assignment. */
+#ifdef VGP_ACCURATE_PROFILING
+#  define VGP_MAYBE_PUSHCC(x)   if (VG_(clo_profile)) VGP_(pushcc)(x)
+#  define VGP_MAYBE_POPCC(x)    if (VG_(clo_profile)) VGP_(popcc)(x)
 #else
-
-#define VGP_PUSHCC(cc) /* */
-#define VGP_POPCC      /* */
-
-#endif /* VG_PROFILE */
+#  define VGP_MAYBE_PUSHCC(x)
+#  define VGP_MAYBE_POPCC(x)
+#endif
 
 
 /* ---------------------------------------------------------------------
@@ -387,37 +246,40 @@
    ------------------------------------------------------------------ */
 
 /* Allocation arenas.  
+      CORE      is for the core's general use.
+      SKIN      is for the skin to use (and the only one it uses).
       SYMTAB    is for Valgrind's symbol table storage.
+      JITTER    is for small storage during translation.
       CLIENT    is for the client's mallocs/frees.
       DEMANGLE  is for the C++ demangler.
       EXECTXT   is for storing ExeContexts.
-      ERRCTXT   is for storing ErrContexts.
-      PRIVATE   is for Valgrind general stuff.
+      ERRORS    is for storing CoreErrors.
       TRANSIENT is for very short-term use.  It should be empty
                 in between uses.
-   When adding a new arena, remember also to add it
-   to ensure_mm_init(). 
+   When adding a new arena, remember also to add it to ensure_mm_init(). 
 */
 typedef Int ArenaId;
 
-#define VG_N_ARENAS 7
+#define VG_N_ARENAS 9
 
-#define VG_AR_PRIVATE   0    /* :: ArenaId */
-#define VG_AR_SYMTAB    1    /* :: ArenaId */
-#define VG_AR_CLIENT    2    /* :: ArenaId */
-#define VG_AR_DEMANGLE  3    /* :: ArenaId */
-#define VG_AR_EXECTXT   4    /* :: ArenaId */
-#define VG_AR_ERRCTXT   5    /* :: ArenaId */
-#define VG_AR_TRANSIENT 6    /* :: ArenaId */
+#define VG_AR_CORE      0    /* :: ArenaId */
+#define VG_AR_SKIN      1    /* :: ArenaId */
+#define VG_AR_SYMTAB    2    /* :: ArenaId */
+#define VG_AR_JITTER    3    /* :: ArenaId */
+#define VG_AR_CLIENT    4    /* :: ArenaId */
+#define VG_AR_DEMANGLE  5    /* :: ArenaId */
+#define VG_AR_EXECTXT   6    /* :: ArenaId */
+#define VG_AR_ERRORS    7    /* :: ArenaId */
+#define VG_AR_TRANSIENT 8    /* :: ArenaId */
 
-extern void* VG_(malloc)  ( ArenaId arena, Int nbytes );
-extern void  VG_(free)    ( ArenaId arena, void* ptr );
-extern void* VG_(calloc)  ( ArenaId arena, Int nmemb, Int nbytes );
-extern void* VG_(realloc) ( ArenaId arena, void* ptr, Int size );
-extern void* VG_(malloc_aligned) ( ArenaId aid, Int req_alignB, 
+extern void* VG_(arena_malloc)  ( ArenaId arena, Int nbytes );
+extern void  VG_(arena_free)    ( ArenaId arena, void* ptr );
+extern void* VG_(arena_calloc)  ( ArenaId arena, Int nmemb, Int nbytes );
+extern void* VG_(arena_realloc) ( ArenaId arena, void* ptr, Int alignment,
+                                  Int size );
+extern void* VG_(arena_malloc_aligned) ( ArenaId aid, Int req_alignB, 
                                                 Int req_pszB );
 
-extern void  VG_(mallocSanityCheckArena) ( ArenaId arena );
 extern void  VG_(mallocSanityCheckAll)   ( void );
 
 extern void  VG_(show_all_arena_stats) ( void );
@@ -433,13 +295,13 @@
 
 
 /* ---------------------------------------------------------------------
-   Exports of vg_clientfuns.c
+   Exports of vg_clientfuncs.c
    ------------------------------------------------------------------ */
 
 /* This doesn't export code or data that valgrind.so needs to link
    against.  However, the scheduler does need to know the following
    request codes.  A few, publically-visible, request codes are also
-   defined in valgrind.h. */
+   defined in valgrind.h, and similar headers for some skins. */
 
 #define VG_USERREQ__MALLOC              0x2001
 #define VG_USERREQ__BUILTIN_NEW         0x2002
@@ -552,16 +414,6 @@
    Exports of vg_scheduler.c
    ------------------------------------------------------------------ */
 
-/* ThreadIds are simply indices into the vg_threads[] array. */
-typedef 
-   UInt 
-   ThreadId;
-
-/* Special magic value for an invalid ThreadId.  It corresponds to
-   LinuxThreads using zero as the initial value for
-   pthread_mutex_t.__m_owner and pthread_cond_t.__c_waiting. */
-#define VG_INVALID_THREADID ((ThreadId)(0))
-
 typedef
    enum { 
       VgTs_Empty,      /* this slot is not in use */
@@ -594,140 +446,138 @@
    ForkHandlerEntry;
 
 
-typedef
-   struct {
-      /* ThreadId == 0 (and hence vg_threads[0]) is NEVER USED.
-         The thread identity is simply the index in vg_threads[].
-         ThreadId == 1 is the root thread and has the special property
-         that we don't try and allocate or deallocate its stack.  For
-         convenience of generating error message, we also put the
-         ThreadId in this tid field, but be aware that it should
-         ALWAYS == the index in vg_threads[]. */
-      ThreadId tid;
+struct _ThreadState {
+   /* ThreadId == 0 (and hence vg_threads[0]) is NEVER USED.
+      The thread identity is simply the index in vg_threads[].
+      ThreadId == 1 is the root thread and has the special property
+      that we don't try and allocate or deallocate its stack.  For
+      convenience of generating error message, we also put the
+      ThreadId in this tid field, but be aware that it should
+      ALWAYS == the index in vg_threads[]. */
+   ThreadId tid;
 
-      /* Current scheduling status. 
+   /* Current scheduling status. 
 
-         Complications: whenever this is set to VgTs_WaitMX, you
-         should also set .m_edx to whatever the required return value
-         is for pthread_mutex_lock / pthread_cond_timedwait for when
-         the mutex finally gets unblocked. */
-      ThreadStatus status;
+      Complications: whenever this is set to VgTs_WaitMX, you
+      should also set .m_edx to whatever the required return value
+      is for pthread_mutex_lock / pthread_cond_timedwait for when
+      the mutex finally gets unblocked. */
+   ThreadStatus status;
 
-      /* When .status == WaitMX, points to the mutex I am waiting for.
-         When .status == WaitCV, points to the mutex associated with
-         the condition variable indicated by the .associated_cv field.
-         In all other cases, should be NULL. */
-      void* /* pthread_mutex_t* */ associated_mx;
+   /* When .status == WaitMX, points to the mutex I am waiting for.
+      When .status == WaitCV, points to the mutex associated with
+      the condition variable indicated by the .associated_cv field.
+      In all other cases, should be NULL. */
+   void* /*pthread_mutex_t* */ associated_mx;
 
-      /* When .status == WaitCV, points to the condition variable I am
-         waiting for.  In all other cases, should be NULL. */
-      void* /* pthread_cond_t* */ associated_cv;
+   /* When .status == WaitCV, points to the condition variable I am
+      waiting for.  In all other cases, should be NULL. */
+   void* /*pthread_cond_t* */ associated_cv;
 
-      /* If VgTs_Sleeping, this is when we should wake up, measured in
-         milliseconds as supplied by VG_(read_millisecond_counter). 
- 
-         If VgTs_WaitCV, this indicates the time at which
-         pthread_cond_timedwait should wake up.  If == 0xFFFFFFFF,
-         this means infinitely far in the future, viz,
-         pthread_cond_wait. */
-      UInt awaken_at;
+   /* If VgTs_Sleeping, this is when we should wake up, measured in
+      milliseconds as supplied by VG_(read_millisecond_counter). 
 
-      /* If VgTs_WaitJoiner, return value, as generated by joinees. */
-      void* joinee_retval;
+      If VgTs_WaitCV, this indicates the time at which
+      pthread_cond_timedwait should wake up.  If == 0xFFFFFFFF,
+      this means infinitely far in the future, viz,
+      pthread_cond_wait. */
+   UInt awaken_at;
 
-      /* If VgTs_WaitJoinee, place to copy the return value to, and
-         the identity of the thread we're waiting for. */
-      void**   joiner_thread_return;
-      ThreadId joiner_jee_tid;      
+   /* If VgTs_WaitJoiner, return value, as generated by joinees. */
+   void* joinee_retval;
 
-      /* Whether or not detached. */
-      Bool detached;
+   /* If VgTs_WaitJoinee, place to copy the return value to, and
+      the identity of the thread we're waiting for. */
+   void**   joiner_thread_return;
+   ThreadId joiner_jee_tid;      
 
-      /* Cancelability state and type. */
-      Bool cancel_st; /* False==PTH_CANCEL_DISABLE; True==.._ENABLE */
-      Bool cancel_ty; /* False==PTH_CANC_ASYNCH; True==..._DEFERRED */
-     
-      /* Pointer to fn to call to do cancellation.  Indicates whether
-         or not cancellation is pending.  If NULL, not pending.  Else
-         should be &thread_exit_wrapper(), indicating that
-         cancallation is pending. */
-      void (*cancel_pend)(void*);
+   /* Whether or not detached. */
+   Bool detached;
 
-      /* The cleanup stack. */
-      Int          custack_used;
-      CleanupEntry custack[VG_N_CLEANUPSTACK];
+   /* Cancelability state and type. */
+   Bool cancel_st; /* False==PTH_CANCEL_DISABLE; True==.._ENABLE */
+   Bool cancel_ty; /* False==PTH_CANC_ASYNCH; True==..._DEFERRED */
+  
+   /* Pointer to fn to call to do cancellation.  Indicates whether
+      or not cancellation is pending.  If NULL, not pending.  Else
+      should be &thread_exit_wrapper(), indicating that
+      cancallation is pending. */
+   void (*cancel_pend)(void*);
 
-      /* thread-specific data */
-      void* specifics[VG_N_THREAD_KEYS];
+   /* The cleanup stack. */
+   Int          custack_used;
+   CleanupEntry custack[VG_N_CLEANUPSTACK];
 
-      /* This thread's blocked-signals mask.  Semantics is that for a
-         signal to be delivered to this thread, the signal must not be
-         blocked by either the process-wide signal mask nor by this
-         one.  So, if this thread is prepared to handle any signal that
-         the process as a whole is prepared to handle, this mask should
-         be made empty -- and that it is its default, starting
-         state. */
-      vki_ksigset_t sig_mask;
+   /* thread-specific data */
+   void* specifics[VG_N_THREAD_KEYS];
 
-      /* When not VgTs_WaitSIG, has no meaning.  When VgTs_WaitSIG,
-         is the set of signals for which we are sigwait()ing. */
-      vki_ksigset_t sigs_waited_for;
+   /* This thread's blocked-signals mask.  Semantics is that for a
+      signal to be delivered to this thread, the signal must not be
+      blocked by either the process-wide signal mask nor by this
+      one.  So, if this thread is prepared to handle any signal that
+      the process as a whole is prepared to handle, this mask should
+      be made empty -- and that it is its default, starting
+      state. */
+   vki_ksigset_t sig_mask;
 
-      /* Counts the number of times a signal handler for this thread
-         has returned.  This makes it easy to implement pause(), by
-         polling this value, of course interspersed with nanosleeps,
-         and waiting till it changes. */
-      UInt n_signals_returned;
+   /* When not VgTs_WaitSIG, has no meaning.  When VgTs_WaitSIG,
+      is the set of signals for which we are sigwait()ing. */
+   vki_ksigset_t sigs_waited_for;
 
-      /* Stacks.  When a thread slot is freed, we don't deallocate its
-         stack; we just leave it lying around for the next use of the
-         slot.  If the next use of the slot requires a larger stack,
-         only then is the old one deallocated and a new one
-         allocated. 
- 
-         For the main thread (threadid == 0), this mechanism doesn't
-         apply.  We don't know the size of the stack since we didn't
-         allocate it, and furthermore we never reallocate it. */
+   /* Counts the number of times a signal handler for this thread
+      has returned.  This makes it easy to implement pause(), by
+      polling this value, of course interspersed with nanosleeps,
+      and waiting till it changes. */
+   UInt n_signals_returned;
 
-      /* The allocated size of this thread's stack (permanently zero
-         if this is ThreadId == 0, since we didn't allocate its stack) */
-      UInt stack_size;
+   /* Stacks.  When a thread slot is freed, we don't deallocate its
+      stack; we just leave it lying around for the next use of the
+      slot.  If the next use of the slot requires a larger stack,
+      only then is the old one deallocated and a new one
+      allocated. 
 
-      /* Address of the lowest word in this thread's stack.  NULL means
-         not allocated yet.
-      */
-      Addr stack_base;
+      For the main thread (threadid == 0), this mechanism doesn't
+      apply.  We don't know the size of the stack since we didn't
+      allocate it, and furthermore we never reallocate it. */
 
-     /* Address of the highest legitimate word in this stack.  This is
-        used for error messages only -- not critical for execution
-        correctness.  Is is set for all stacks, specifically including
-        ThreadId == 0 (the main thread). */
-      Addr stack_highest_word;
+   /* The allocated size of this thread's stack (permanently zero
+      if this is ThreadId == 0, since we didn't allocate its stack) */
+   UInt stack_size;
 
-      /* Saved machine context. */
-      UInt m_eax;
-      UInt m_ebx;
-      UInt m_ecx;
-      UInt m_edx;
-      UInt m_esi;
-      UInt m_edi;
-      UInt m_ebp;
-      UInt m_esp;
-      UInt m_eflags;
-      UInt m_eip;
-      UInt m_fpu[VG_SIZE_OF_FPUSTATE_W];
+   /* Address of the lowest word in this thread's stack.  NULL means
+      not allocated yet.
+   */
+   Addr stack_base;
 
-      UInt sh_eax;
-      UInt sh_ebx;
-      UInt sh_ecx;
-      UInt sh_edx;
-      UInt sh_esi;
-      UInt sh_edi;
-      UInt sh_ebp;
-      UInt sh_esp;
-      UInt sh_eflags;
-   }
-   ThreadState;
+  /* Address of the highest legitimate word in this stack.  This is
+     used for error messages only -- not critical for execution
+     correctness.  Is is set for all stacks, specifically including
+     ThreadId == 0 (the main thread). */
+   Addr stack_highest_word;
+
+   /* Saved machine context. */
+   UInt m_eax;
+   UInt m_ebx;
+   UInt m_ecx;
+   UInt m_edx;
+   UInt m_esi;
+   UInt m_edi;
+   UInt m_ebp;
+   UInt m_esp;
+   UInt m_eflags;
+   UInt m_eip;
+   UInt m_fpu[VG_SIZE_OF_FPUSTATE_W];
+
+   UInt sh_eax;
+   UInt sh_ebx;
+   UInt sh_ecx;
+   UInt sh_edx;
+   UInt sh_esi;
+   UInt sh_edi;
+   UInt sh_ebp;
+   UInt sh_esp;
+   UInt sh_eflags;
+};
 
 
 /* The thread table. */
@@ -753,10 +603,6 @@
 /* Similarly ... */
 extern ThreadId VG_(get_current_tid) ( void );
 
-/* Which thread is this address in the stack of, if any?  Used for
-   error message generation. */
-extern ThreadId VG_(identify_stack_addr)( Addr a );
-
 /* Nuke all threads except tid. */
 extern void VG_(nuke_all_threads_except) ( ThreadId me );
 
@@ -795,12 +641,14 @@
    the initial stack, which we can't move, is allocated here.
    VG_(scheduler_init) checks this.  Andrea Archelangi's 2.4 kernels
    have been rumoured to start stacks at 0x80000000, so that too is
-   considered. It seems systems with longer uptimes tend to to use
-   stacks which start at 0x40000000 sometimes.  
-*/
+   considered.  It seems systems with longer uptimes tend to to use
+   stacks which start at 0x40000000 sometimes.  JRS 2002-Aug-21: I
+   also have reports of stacks starting at 0xE0000000.*/
+
 #define VG_STARTUP_STACK_BASE_1  (Addr)0xC0000000
 #define VG_STARTUP_STACK_BASE_2  (Addr)0x80000000
 #define VG_STARTUP_STACK_BASE_3  (Addr)0x40000000
+#define VG_STARTUP_STACK_BASE_4  (Addr)0xE0000000
 #define VG_STARTUP_STACK_SMALLERTHAN  0x100000 /* 1024k */
 
 #define VG_STACK_MATCHES_BASE(zzstack, zzbase)                 \
@@ -819,17 +667,24 @@
 #define VG_AR_CLIENT_STACKBASE_REDZONE_SZB \
    (VG_AR_CLIENT_STACKBASE_REDZONE_SZW * VKI_BYTES_PER_WORD)
 
+/* Junk to fill up a thread's shadow regs with when shadow regs aren't
+ * being used. */
+#define VG_UNUSED_SHADOW_REG_VALUE  0x27182818
+
+/* What we set a shadow register to when written by SET_EAX and similar
+ * things. */
+extern UInt VG_(written_shadow_reg);
 
 /* Write a value to the client's %EDX (request return value register)
    and set the shadow to indicate it is defined. */
-#define SET_EDX(zztid, zzval)                          \
-   do { VG_(threads)[zztid].m_edx = (zzval);             \
-        VG_(threads)[zztid].sh_edx = VGM_WORD_VALID;     \
+#define SET_EDX(zztid, zzval)                                  \
+   do { VG_(threads)[zztid].m_edx = (zzval);                   \
+        VG_(threads)[zztid].sh_edx = VG_(written_shadow_reg);  \
    } while (0)
 
-#define SET_EAX(zztid, zzval)                          \
-   do { VG_(threads)[zztid].m_eax = (zzval);             \
-        VG_(threads)[zztid].sh_eax = VGM_WORD_VALID;     \
+#define SET_EAX(zztid, zzval)                                  \
+   do { VG_(threads)[zztid].m_eax = (zzval);                   \
+        VG_(threads)[zztid].sh_eax = VG_(written_shadow_reg);  \
    } while (0)
 
 
@@ -875,87 +730,21 @@
    Exports of vg_mylibc.c
    ------------------------------------------------------------------ */
 
+__attribute__((noreturn))
+extern void VG_(skin_error) ( Char* s );
 
-#if !defined(NULL)
-#  define NULL ((void*)0)
-#endif
+/* VG_(brk) not public so skins cannot screw with curr_dataseg_end */
+extern void* VG_(brk) ( void* end_data_segment );
 
-extern void VG_(exit)( Int status )
-            __attribute__ ((__noreturn__));
+/* Skins use VG_(strdup)() which doesn't expose ArenaId */
+extern Char* VG_(arena_strdup) ( ArenaId aid, const Char* s);
 
-extern void VG_(printf) ( const char *format, ... );
-/* too noisy ...  __attribute__ ((format (printf, 1, 2))) ; */
-
-extern void VG_(sprintf) ( Char* buf, Char *format, ... );
-
-extern void VG_(vprintf) ( void(*send)(Char), 
-                          const Char *format, va_list vargs );
-
-extern Bool VG_(isspace) ( Char c );
-extern Bool VG_(isdigit) ( Char c );
-
-extern Int VG_(strlen) ( const Char* str );
-
-extern Long VG_(atoll) ( Char* str );
-extern Long VG_(atoll36) ( Char* str );
-
-extern Char* VG_(strcat) ( Char* dest, const Char* src );
-extern Char* VG_(strncat) ( Char* dest, const Char* src, Int n );
-extern Char* VG_(strpbrk) ( const Char* s, const Char* accept );
-
-extern Char* VG_(strcpy) ( Char* dest, const Char* src );
-
-extern Int VG_(strcmp)    ( const Char* s1, const Char* s2 );
-extern Int VG_(strcmp_ws) ( const Char* s1, const Char* s2 );
-
-extern Int VG_(strncmp)    ( const Char* s1, const Char* s2, Int nmax );
-extern Int VG_(strncmp_ws) ( const Char* s1, const Char* s2, Int nmax );
-
-extern Char* VG_(strstr) ( const Char* haystack, Char* needle );
-extern Char* VG_(strchr) ( const Char* s, Char c );
-extern Char* VG_(strdup) ( ArenaId aid, const Char* s);
-
-extern Char* VG_(getenv) ( Char* name );
-extern Int   VG_(getpid) ( void );
-
+/* Skins shouldn't need these...(?) */
 extern void VG_(start_rdtsc_calibration) ( void );
 extern void VG_(end_rdtsc_calibration) ( void );
 extern UInt VG_(read_millisecond_timer) ( void );
 
-
-extern Char VG_(toupper) ( Char c );
-
-extern void VG_(strncpy_safely) ( Char* dest, const Char* src, Int ndest );
-
-extern void VG_(strncpy) ( Char* dest, const Char* src, Int ndest );
-
-extern Bool VG_(stringMatch) ( Char* pat, Char* str );
-
-
-#define VG__STRING(__str)  #__str
-
-/* Asserts are permanently enabled.  Hurrah! */
-#define vg_assert(expr)                                               \
-  ((void) ((expr) ? 0 :						      \
-	   (VG_(assert_fail) (VG__STRING(expr),			      \
-			      __FILE__, __LINE__,                     \
-                              __PRETTY_FUNCTION__), 0)))
-
-extern void VG_(assert_fail) ( Char* expr, Char* file, 
-                               Int line, Char* fn )
-            __attribute__ ((__noreturn__));
-
-/* Reading and writing files. */
-extern Int  VG_(open_read) ( Char* pathname );
-extern Int  VG_(open_write)       ( Char* pathname );
-extern Int  VG_(create_and_write) ( Char* pathname );
-extern void VG_(close)     ( Int fd );
-extern Int  VG_(read)      ( Int fd, void* buf, Int count);
-extern Int  VG_(write)     ( Int fd, void* buf, Int count);
-extern Int  VG_(stat) ( Char* file_name, struct vki_stat* buf );
-
-extern Int  VG_(fcntl) ( Int fd, Int cmd, Int arg );
-
+extern Int VG_(fcntl) ( Int fd, Int cmd, Int arg );
 extern Int VG_(select)( Int n, 
                         vki_fd_set* readfds, 
                         vki_fd_set* writefds, 
@@ -964,306 +753,37 @@
 extern Int VG_(nanosleep)( const struct vki_timespec *req, 
                            struct vki_timespec *rem );
 
-
-/* mmap-ery ... */
-extern void* VG_(mmap)( void* start, UInt length, 
-                        UInt prot, UInt flags, UInt fd, UInt offset );
-
-extern Int  VG_(munmap)( void* start, Int length );
-
-extern void* VG_(brk) ( void* end_data_segment );
-
-
-/* Print a (panic) message, and abort. */
-extern void VG_(panic) ( Char* str )
-            __attribute__ ((__noreturn__));
-
-/* Get memory by anonymous mmap. */
-extern void* VG_(get_memory_from_mmap) ( Int nBytes, Char* who );
-
-/* Crude stand-in for the glibc system() call. */
-extern Int VG_(system) ( Char* cmd );
-
-
-/* Signal stuff.  Note that these use the vk_ (kernel) structure
-   definitions, which are different in places from those that glibc
-   defines.  Since we're operating right at the kernel interface,
-   glibc's view of the world is entirely irrelevant. */
-
-/* --- Signal set ops --- */
-extern Int  VG_(ksigfillset)( vki_ksigset_t* set );
-extern Int  VG_(ksigemptyset)( vki_ksigset_t* set );
-
-extern Bool VG_(kisfullsigset)( vki_ksigset_t* set );
-extern Bool VG_(kisemptysigset)( vki_ksigset_t* set );
-
-extern Int  VG_(ksigaddset)( vki_ksigset_t* set, Int signum );
-extern Int  VG_(ksigdelset)( vki_ksigset_t* set, Int signum );
-extern Int  VG_(ksigismember) ( vki_ksigset_t* set, Int signum );
-
-extern void VG_(ksigaddset_from_set)( vki_ksigset_t* dst, 
-                                      vki_ksigset_t* src );
-extern void VG_(ksigdelset_from_set)( vki_ksigset_t* dst, 
-                                      vki_ksigset_t* src );
-
-/* --- Mess with the kernel's sig state --- */
-extern Int VG_(ksigprocmask)( Int how, const vki_ksigset_t* set, 
-                                       vki_ksigset_t* oldset );
-extern Int VG_(ksigaction) ( Int signum,  
-                             const vki_ksigaction* act,  
-                             vki_ksigaction* oldact );
-
-extern Int VG_(ksignal)(Int signum, void (*sighandler)(Int));
-
-extern Int VG_(ksigaltstack)( const vki_kstack_t* ss, vki_kstack_t* oss );
-
-extern Int VG_(kill)( Int pid, Int signo );
-extern Int VG_(sigpending) ( vki_ksigset_t* set );
-
-
 /* ---------------------------------------------------------------------
    Definitions for the JITter (vg_translate.c, vg_to_ucode.c,
    vg_from_ucode.c).
    ------------------------------------------------------------------ */
 
-/* Tags which describe what operands are. */
-typedef
-   enum { TempReg=0, ArchReg=1, RealReg=2, 
-          SpillNo=3, Literal=4, Lit16=5, 
-          NoValue=6 }
-   Tag;
-
-
-/* Microinstruction opcodes. */
-typedef
-   enum {
-      NOP,
-      GET,
-      PUT,
-      LOAD,
-      STORE,
-      MOV,
-      CMOV, /* Used for cmpxchg and cmov */
-      WIDEN,
-      JMP,
-
-      /* Read/write the %EFLAGS register into a TempReg. */
-      GETF, PUTF,
-
-      ADD, ADC, AND, OR,  XOR, SUB, SBB,
-      SHL, SHR, SAR, ROL, ROR, RCL, RCR,
-      NOT, NEG, INC, DEC, BSWAP,
-      CC2VAL,
-
-      /* Not strictly needed, but useful for making better
-         translations of address calculations. */
-      LEA1,  /* reg2 := const + reg1 */
-      LEA2,  /* reg3 := const + reg1 + reg2 * 1,2,4 or 8 */
-
-      /* not for translating x86 calls -- only to call helpers */
-      CALLM_S, CALLM_E, /* Mark start and end of push/pop sequences
-                           for CALLM. */
-      PUSH, POP, CLEAR, /* Add/remove/zap args for helpers. */
-      CALLM,  /* call to a machine-code helper */
-
-      /* for calling C functions -- CCALL_M_N passes M arguments and returns N
-       * (0 or 1) return values */
-      CCALL_1_0, CCALL_2_0,
-
-      /* Hack for translating string (REP-) insns.  Jump to literal if
-         TempReg/RealReg is zero. */
-      JIFZ,
-
-      /* FPU ops which read/write mem or don't touch mem at all. */
-      FPU_R,
-      FPU_W,
-      FPU,
-
-      /* Advance the simulated %eip by some small (< 128) number. */
-      INCEIP,
-
-      /* uinstrs which are not needed for mere translation of x86 code,
-         only for instrumentation of it. */
-      LOADV,
-      STOREV,
-      GETV,
-      PUTV,
-      TESTV,
-      SETV,
-      /* Get/set the v-bit (and it is only one bit) for the simulated
-         %eflags register. */
-      GETVF,
-      PUTVF,
-
-      /* Do a unary or binary tag op.  Only for post-instrumented
-         code.  For TAG1, first and only arg is a TempReg, and is both
-         arg and result reg.  For TAG2, first arg is src, second is
-         dst, in the normal way; both are TempRegs.  In both cases,
-         3rd arg is a RiCHelper with a Lit16 tag.  This indicates
-         which tag op to do. */
-      TAG1,
-      TAG2
-   }
-   Opcode;
-
-
-/* Condition codes, observing the Intel encoding.  CondAlways is an
-   extra. */
-typedef
-   enum {
-      CondO      = 0,  /* overflow           */
-      CondNO     = 1,  /* no overflow        */
-      CondB      = 2,  /* below              */
-      CondNB     = 3,  /* not below          */
-      CondZ      = 4,  /* zero               */
-      CondNZ     = 5,  /* not zero           */
-      CondBE     = 6,  /* below or equal     */
-      CondNBE    = 7,  /* not below or equal */
-      CondS      = 8,  /* negative           */
-      ConsNS     = 9,  /* not negative       */
-      CondP      = 10, /* parity even        */
-      CondNP     = 11, /* not parity even    */
-      CondL      = 12, /* jump less          */
-      CondNL     = 13, /* not less           */
-      CondLE     = 14, /* less or equal      */
-      CondNLE    = 15, /* not less or equal  */
-      CondAlways = 16  /* Jump always        */
-   } 
-   Condcode;
-
-
-/* Descriptions of additional properties of *unconditional* jumps. */
-typedef
-   enum {
-     JmpBoring=0,   /* boring unconditional jump */
-     JmpCall=1,     /* jump due to an x86 call insn */
-     JmpRet=2,      /* jump due to an x86 ret insn */
-     JmpSyscall=3,  /* do a system call, then jump */
-     JmpClientReq=4 /* do a client request, then jump */
-   }
-   JmpKind;
-
-
-/* Flags.  User-level code can only read/write O(verflow), S(ign),
-   Z(ero), A(ux-carry), C(arry), P(arity), and may also write
-   D(irection).  That's a total of 7 flags.  A FlagSet is a bitset,
-   thusly: 
-      76543210
-       DOSZACP
-   and bit 7 must always be zero since it is unused.
-*/
-typedef UChar FlagSet;
-
-#define FlagD (1<<6)
-#define FlagO (1<<5)
-#define FlagS (1<<4)
-#define FlagZ (1<<3)
-#define FlagA (1<<2)
-#define FlagC (1<<1)
-#define FlagP (1<<0)
-
-#define FlagsOSZACP (FlagO | FlagS | FlagZ | FlagA | FlagC | FlagP)
-#define FlagsOSZAP  (FlagO | FlagS | FlagZ | FlagA |         FlagP)
-#define FlagsOSZCP  (FlagO | FlagS | FlagZ |         FlagC | FlagP)
-#define FlagsOSACP  (FlagO | FlagS |         FlagA | FlagC | FlagP)
-#define FlagsSZACP  (        FlagS | FlagZ | FlagA | FlagC | FlagP)
-#define FlagsSZAP   (        FlagS | FlagZ | FlagA |         FlagP)
-#define FlagsZCP    (                FlagZ         | FlagC | FlagP)
-#define FlagsOC     (FlagO |                         FlagC        )
-#define FlagsAC     (                        FlagA | FlagC        )
-
-#define FlagsALL    (FlagsOSZACP | FlagD)
-#define FlagsEmpty  (FlagSet)0
-
 #define VG_IS_FLAG_SUBSET(set1,set2) \
    (( ((FlagSet)set1) & ((FlagSet)set2) ) == ((FlagSet)set1) )
 
 #define VG_UNION_FLAG_SETS(set1,set2) \
    ( ((FlagSet)set1) | ((FlagSet)set2) )
 
-
-
-/* A Micro (u)-instruction. */
-typedef
-   struct {
-      /* word 1 */
-      UInt    lit32;      /* 32-bit literal */
-
-      /* word 2 */
-      UShort  val1;       /* first operand */
-      UShort  val2;       /* second operand */
-
-      /* word 3 */
-      UShort  val3;       /* third operand */
-      UChar   opcode;     /* opcode */
-      UChar   size;       /* data transfer size */
-
-      /* word 4 */
-      FlagSet flags_r;    /* :: FlagSet */
-      FlagSet flags_w;    /* :: FlagSet */
-      UChar   tag1:4;     /* first  operand tag */
-      UChar   tag2:4;     /* second operand tag */
-      UChar   tag3:4;     /* third  operand tag */
-      UChar   extra4b:4;  /* Spare field, used by WIDEN for src
-                             -size, and by LEA2 for scale 
-                             (1,2,4 or 8), and by unconditional JMPs for
-                             orig x86 instr size if --cachesim=yes */
-
-
-      /* word 5 */
-      UChar   cond;            /* condition, for jumps */
-      Bool    smc_check:1;     /* do a smc test, if writes memory. */
-      Bool    signed_widen:1;  /* signed or unsigned WIDEN ? */
-      JmpKind jmpkind:3;       /* additional properties of unconditional JMP */
-   }
-   UInstr;
-
-
-/* Expandable arrays of uinstrs. */
-typedef 
-   struct { 
-      Int     used; 
-      Int     size; 
-      UInstr* instrs;
-      Int     nextTemp;
-   }
-   UCodeBlock;
-
-/* Refer to `the last instruction stuffed in', including as an
-   lvalue. */
-#define LAST_UINSTR(cb) (cb)->instrs[(cb)->used-1]
-
-/* An invalid temporary number :-) */
-#define INVALID_TEMPREG 999999999
-
-
 /* ---------------------------------------------------------------------
    Exports of vg_demangle.c
    ------------------------------------------------------------------ */
 
 extern void VG_(demangle) ( Char* orig, Char* result, Int result_size );
 
-
 /* ---------------------------------------------------------------------
    Exports of vg_from_ucode.c
    ------------------------------------------------------------------ */
 
 extern UChar* VG_(emit_code) ( UCodeBlock* cb, Int* nbytes );
 
+extern void   VG_(print_ccall_stats)      ( void );
+extern void   VG_(print_UInstr_histogram) ( void );
 
 /* ---------------------------------------------------------------------
    Exports of vg_to_ucode.c
    ------------------------------------------------------------------ */
 
 extern Int   VG_(disBB)          ( UCodeBlock* cb, Addr eip0 );
-extern Char* VG_(nameOfIntReg)   ( Int size, Int reg );
-extern Char  VG_(nameOfIntSize)  ( Int size );
-extern UInt  VG_(extend_s_8to32) ( UInt x );
-extern Int   VG_(getNewTemp)     ( UCodeBlock* cb );
-extern Int   VG_(getNewShadow)   ( UCodeBlock* cb );
-
-#define SHADOW(tempreg)  ((tempreg)+1)
-
 
 /* ---------------------------------------------------------------------
    Exports of vg_translate.c
@@ -1275,41 +795,11 @@
                                Addr* trans_addr,
                                UInt* trans_size );
 
-extern void  VG_(emptyUInstr) ( UInstr* u );
-extern void  VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz );
-extern void  VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
-                               Tag tag1, UInt val1 );
-extern void  VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
-                               Tag tag1, UInt val1,
-                               Tag tag2, UInt val2 );
-extern void  VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
-                               Tag tag1, UInt val1,
-                               Tag tag2, UInt val2,
-                               Tag tag3, UInt val3 );
-extern void VG_(setFlagRW) ( UInstr* u, 
-                             FlagSet fr, FlagSet fw );
-
-extern void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 );
-extern Bool VG_(anyFlagUse) ( UInstr* u );
-
-
-
-extern void  VG_(ppUInstr)        ( Int instrNo, UInstr* u );
-extern void  VG_(ppUCodeBlock)    ( UCodeBlock* cb, Char* title );
-
-extern UCodeBlock* VG_(allocCodeBlock) ( void );
-extern void  VG_(freeCodeBlock)        ( UCodeBlock* cb );
-extern void  VG_(copyUInstr)                ( UCodeBlock* cb, UInstr* instr );
-
-extern Char* VG_(nameCondcode)    ( Condcode cond );
-extern Bool  VG_(saneUInstr)      ( Bool beforeRA, UInstr* u );
-extern Bool  VG_(saneUCodeBlock)  ( UCodeBlock* cb );
-extern Char* VG_(nameUOpcode)     ( Bool upper, Opcode opc );
-extern Int   VG_(rankToRealRegNo) ( Int rank );
-
-extern void* VG_(jitmalloc) ( Int nbytes );
-extern void  VG_(jitfree)   ( void* ptr );
-
+extern Char* VG_(nameCondcode)        ( Condcode cond );
+extern Bool  VG_(saneUInstr)          ( Bool beforeRA, Bool beforeLiveness,
+                                        UInstr* u );
+extern void  VG_(saneUCodeBlock)      ( UCodeBlock* cb );
+extern Bool  VG_(saneUCodeBlockCalls) ( UCodeBlock* cb );
 
 /* ---------------------------------------------------------------------
    Exports of vg_execontext.c.
@@ -1320,15 +810,13 @@
    comparing against suppression specifications.  The rest are purely
    informational (but often important). */
 
-typedef
-   struct _ExeContextRec {
-      struct _ExeContextRec * next;
-      /* The size of this array is VG_(clo_backtrace_size); at least
-         2, at most VG_DEEPEST_BACKTRACE.  [0] is the current %eip,
-         [1] is its caller, [2] is the caller of [1], etc. */
-      Addr eips[0];
-   }
-   ExeContext;
+struct _ExeContext {
+   struct _ExeContext * next;
+   /* Variable-length array.  The size is VG_(clo_backtrace_size); at
+      least 2, at most VG_DEEPEST_BACKTRACE.  [0] is the current %eip,
+      [1] is its caller, [2] is the caller of [1], etc. */
+   Addr eips[0];
+};
 
 
 /* Initialise the ExeContext storage mechanism. */
@@ -1337,91 +825,86 @@
 /* Print stats (informational only). */
 extern void VG_(show_ExeContext_stats) ( void );
 
-
-/* Take a snapshot of the client's stack.  Search our collection of
-   ExeContexts to see if we already have it, and if not, allocate a
-   new one.  Either way, return a pointer to the context. */
-extern ExeContext* VG_(get_ExeContext) ( Bool skip_top_frame,
-                                         Addr eip, Addr ebp );
-
-/* Print an ExeContext. */
-extern void VG_(pp_ExeContext) ( ExeContext* );
-
-/* Compare two ExeContexts, just comparing the top two callers. */
-extern Bool VG_(eq_ExeContext_top2) ( ExeContext* e1, ExeContext* e2 );
-
-/* Compare two ExeContexts, just comparing the top four callers. */
-extern Bool VG_(eq_ExeContext_top4) ( ExeContext* e1, ExeContext* e2 );
-
-/* Compare two ExeContexts, comparing all callers. */
-extern Bool VG_(eq_ExeContext_all) ( ExeContext* e1, ExeContext* e2 );
-
+/* Like VG_(get_ExeContext), but with a slightly different type */
+extern ExeContext* VG_(get_ExeContext2) ( Addr eip, Addr ebp,
+                                          Addr ebp_min, Addr ebp_max );
 
 
 /* ---------------------------------------------------------------------
    Exports of vg_errcontext.c.
    ------------------------------------------------------------------ */
 
-extern void VG_(load_suppressions)    ( void );
-extern void VG_(show_all_errors)      ( void );
-extern void VG_(record_value_error)   ( Int size );
-extern void VG_(record_free_error)    ( ThreadState* tst, Addr a );
-extern void VG_(record_freemismatch_error)    ( ThreadState* tst, Addr a );
-extern void VG_(record_address_error) ( Addr a, Int size, 
-                                        Bool isWrite );
-
-extern void VG_(record_jump_error) ( ThreadState* tst, Addr a );
-
-extern void VG_(record_param_err) ( ThreadState* tst,
-                                    Addr a, 
-                                    Bool isWriteLack, 
-                                    Char* msg );
-extern void VG_(record_user_err) ( ThreadState* tst,
-                                   Addr a, Bool isWriteLack );
-extern void VG_(record_pthread_err) ( ThreadId tid, Char* msg );
-
-
-
-/* The classification of a faulting address. */
-typedef 
-   enum { Undescribed, /* as-yet unclassified */
-          Stack, 
-          Unknown, /* classification yielded nothing useful */
-          Freed, Mallocd, 
-          UserG, UserS }
-   AddrKind;
-
-/* Records info about a faulting address. */
+/* Note: it is imperative this doesn't overlap with (0..) at all, as skins
+ * effectively extend it by defining their own enums in the (0..) range. */
 typedef
-   struct {
-      /* ALL */
-      AddrKind akind;
-      /* Freed, Mallocd */
-      Int blksize;
-      /* Freed, Mallocd */
-      Int rwoffset;
-      /* Freed, Mallocd */
-      ExeContext* lastchange;
-      /* Stack */
-      ThreadId stack_tid;
-      /* True if is just-below %esp -- could be a gcc bug. */
-      Bool maybe_gcc;
+   enum {
+      PThreadSupp = -1,    /* Matches PThreadErr */
    }
-   AddrInfo;
+   CoreSuppKind;
+
+/* For each caller specified for a suppression, record the nature of
+   the caller name.  Not of interest to skins. */
+typedef
+   enum { 
+      ObjName,    /* Name is of an shared object file. */
+      FunName     /* Name is of a function. */
+   }
+   SuppLocTy;
+
+/* Suppressions.  Skin part `SkinSupp' (which is all skins have to deal
+   with) is in vg_skin.h */
+typedef
+   struct _CoreSupp {
+      struct _CoreSupp* next;
+      /* The number of times this error has been suppressed. */
+      Int count;
+      /* The name by which the suppression is referred to. */
+      Char* sname;
+      /* First two (name of fn where err occurs, and immediate caller)
+       * are mandatory;  extra two are optional. */
+      SuppLocTy caller_ty[VG_N_SUPP_CALLERS];
+      Char*     caller   [VG_N_SUPP_CALLERS];
+      /* The skin-specific part */
+      SkinSupp  skin_supp;
+   } 
+   CoreSupp;
+
+/* Note: it is imperative this doesn't overlap with (0..) at all, as skins
+ * effectively extend it by defining their own enums in the (0..) range. */
+typedef
+   enum { 
+      PThreadErr      = -1,   /* Pthreading error */
+   }
+   CoreErrorKind;
+
+/* Errors.  Skin part `SkinError' (which is all skins have to deal
+   with) is in vg_skin.h */
+typedef
+   struct _CoreErrContext {
+      struct _CoreErrContext* next;
+      /* NULL if unsuppressed; or ptr to suppression record. */
+      CoreSupp* supp;
+      Int count;
+      ExeContext* where;
+      ThreadId tid;
+      /* These record %EIP, %ESP and %EBP at the error point.  They
+         are only used to make GDB-attaching convenient; there is no
+         other purpose; specifically they are not used to do
+         comparisons between errors. */
+      UInt m_eip;
+      UInt m_esp;
+      UInt m_ebp;
+      /* The skin-specific part */
+      SkinError skin_err;
+   } 
+   CoreError;
 
 
-/* ---------------------------------------------------------------------
-   Exports of vg_clientperms.c
-   ------------------------------------------------------------------ */
+extern void VG_(load_suppressions)    ( void );
 
-extern Bool VG_(client_perm_maybe_describe)( Addr a, AddrInfo* ai );
+extern void VG_(record_pthread_error) ( ThreadId tid, Char* msg );
 
-extern UInt VG_(handle_client_request) ( ThreadState* tst, UInt* arg_block );
-
-extern void VG_(delete_client_stack_blocks_following_ESP_change) ( void );
-
-extern void VG_(show_client_block_stats) ( void );
-
+extern void VG_(show_all_errors)      ( void );
 
 /* ---------------------------------------------------------------------
    Exports of vg_procselfmaps.c
@@ -1438,52 +921,26 @@
    ------------------------------------------------------------------ */
 
 /* We assume the executable is loaded here ... can't really find
-   out.  There is a hacky sanity check in vg_init_memory_audit()
+   out.  There is a hacky sanity check in VG_(init_memory)()
    which should trip up most stupidities.
 */
 #define VG_ASSUMED_EXE_BASE  (Addr)0x8048000
 
-extern void VG_(read_symbols) ( void );
-extern void VG_(mini_stack_dump) ( ExeContext* ec );
-extern void VG_(what_obj_and_fun_is_this)
-                                     ( Addr a,
-                                       Char* obj_buf, Int n_obj_buf,
-                                       Char* fun_buf, Int n_fun_buf );
-extern Bool VG_(what_line_is_this) ( Addr a,
-                                     UChar* filename, Int n_filename,
-                                     UInt* lineno );
-extern Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
-                                     Char* fn_name, Int n_fn_name);
+extern void VG_(maybe_read_symbols)   ( void );
+extern void VG_(read_symtab_callback) ( Addr start, UInt size, 
+                                        Char rr, Char ww, Char xx,
+                                        UInt foffset, UChar* filename );
+extern void VG_(maybe_unload_symbols) ( Addr start, UInt length );
 
-extern Bool VG_(symtab_notify_munmap) ( Addr start, UInt length );
+extern Bool VG_(get_fnname_nodemangle)( Addr a, Char* fnname, Int n_fnname );
+extern void VG_(mini_stack_dump)      ( ExeContext* ec );
 
 
 /* ---------------------------------------------------------------------
    Exports of vg_clientmalloc.c
    ------------------------------------------------------------------ */
 
-typedef
-   enum { 
-      Vg_AllocMalloc = 0,
-      Vg_AllocNew    = 1,
-      Vg_AllocNewVec = 2 
-   }
-   VgAllocKind;
-
-/* Description of a malloc'd chunk. */
-typedef 
-   struct _ShadowChunk {
-      struct _ShadowChunk* next;
-      ExeContext*   where;          /* where malloc'd/free'd */
-      UInt          size : 30;      /* size requested.       */
-      VgAllocKind   allockind : 2;  /* which wrapper did the allocation */
-      Addr          data;           /* ptr to actual block.  */
-   } 
-   ShadowChunk;
-
-extern void          VG_(clientmalloc_done) ( void );
-extern void          VG_(describe_addr) ( Addr a, AddrInfo* ai );
-extern ShadowChunk** VG_(get_malloc_shadows) ( /*OUT*/ UInt* n_shadows );
+extern void  VG_(client_malloc_init)();
 
 /* These are called from the scheduler, when it intercepts a user
    request. */
@@ -1503,11 +960,14 @@
    Exports of vg_main.c
    ------------------------------------------------------------------ */
 
+/* Sanity checks which may be done at any time.  The scheduler decides when. */
+extern void VG_(do_sanity_checks) ( Bool force_expensive );
+
 /* A structure used as an intermediary when passing the simulated
    CPU's state to some assembly fragments, particularly system calls.
    Stuff is copied from baseBlock to here, the assembly magic runs,
-   and then the inverse copy is done. */
-
+   and then the inverse copy is done. 
+ */
 extern UInt VG_(m_state_static) [8 /* int regs, in Intel order */ 
                                  + 1 /* %eflags */ 
                                  + 1 /* %eip */
@@ -1520,30 +980,27 @@
 
 /* Called when some unhandleable client behaviour is detected.
    Prints a msg and aborts. */
-extern void VG_(unimplemented) ( Char* msg );
+extern void VG_(unimplemented) ( Char* msg )
+            __attribute__((__noreturn__));
 extern void VG_(nvidia_moan) ( void );
 
 /* The stack on which Valgrind runs.  We can't use the same stack as the
    simulatee -- that's an important design decision.  */
 extern UInt VG_(stack)[10000];
 
-/* Similarly, we have to ask for signals to be delivered on an
-   alternative stack, since it is possible, although unlikely, that
-   we'll have to run client code from inside the Valgrind-installed
-   signal handler.  If this happens it will be done by
-   vg_deliver_signal_immediately(). */
+/* Similarly, we have to ask for signals to be delivered on an alternative
+   stack, since it is possible, although unlikely, that we'll have to run
+   client code from inside the Valgrind-installed signal handler.  If this
+   happens it will be done by vg_deliver_signal_immediately(). */
 extern UInt VG_(sigstack)[10000];
 
 /* Holds client's %esp at the point we gained control.  From this the
    client's argc, argv and envp are deduced. */
 extern Addr   VG_(esp_at_startup);
-extern Int    VG_(client_argc);
-extern Char** VG_(client_argv);
-extern Char** VG_(client_envp);
 
-/* Remove valgrind.so from a LD_PRELOAD=... string so child processes
-   don't get traced into.  Also mess up $libdir/valgrind so that our
-   libpthread.so disappears from view. */
+/* Remove valgrind.so and skin's .so from a LD_PRELOAD=... string so child
+   processes don't get traced into.  Also mess up $libdir/valgrind so that
+   our libpthread.so disappears from view. */
 void VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH) ( Char* ld_preload_str,
                                                 Char* ld_library_path_str );
 
@@ -1553,9 +1010,6 @@
    the client program really was running on the real cpu. */
 extern void VG_(start_GDB_whilst_on_client_stack) ( void );
 
-/* Spew out vast amounts of junk during JITting? */
-extern Bool  VG_(disassemble);
-
 /* 64-bit counter for the number of basic blocks done. */
 extern ULong VG_(bbs_done);
 /* 64-bit counter for the number of bbs to go before a debug exit. */
@@ -1573,6 +1027,11 @@
 /* This is the ThreadId of the last thread the scheduler ran. */
 extern ThreadId VG_(last_run_tid);
 
+/* This is the argument to __NR_exit() supplied by the first thread to
+   call that syscall.  We eventually pass that to __NR_exit() for
+   real. */
+extern UInt VG_(exitcode);
+
 
 /* --- Counters, for informational purposes only. --- */
 
@@ -1628,83 +1087,38 @@
    Exports of vg_memory.c
    ------------------------------------------------------------------ */
 
-extern void VGM_(init_memory_audit) ( void );
-extern Addr VGM_(curr_dataseg_end);
-extern void VG_(show_reg_tags) ( void );
-extern void VG_(detect_memory_leaks) ( void );
-extern void VG_(done_prof_mem) ( void );
+extern void VG_(init_memory)            ( void );
+extern void VG_(new_exe_segment)        ( Addr a, UInt len );
+extern void VG_(remove_if_exe_segment)  ( Addr a, UInt len );
 
-/* Set permissions for an address range.  Not speed-critical. */
-extern void VGM_(make_noaccess) ( Addr a, UInt len );
-extern void VGM_(make_writable) ( Addr a, UInt len );
-extern void VGM_(make_readable) ( Addr a, UInt len );
-/* Use with care! (read: use for shmat only) */
-extern void VGM_(make_readwritable) ( Addr a, UInt len );
-extern void VGM_(copy_address_range_perms) ( Addr src, Addr dst,
-                                             UInt len );
-
-/* Check permissions for an address range.  Not speed-critical. */
-extern Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr );
-extern Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr );
-extern Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr );
-
-/* Sanity checks which may be done at any time.  The scheduler decides
-   when. */
-extern void VG_(do_sanity_checks) ( Bool force_expensive );
-/* Very cheap ... */
-extern Bool VG_(first_and_last_secondaries_look_plausible) ( void );
-
-/* These functions are called from generated code. */
-extern void VG_(helperc_STOREV4) ( UInt, Addr );
-extern void VG_(helperc_STOREV2) ( UInt, Addr );
-extern void VG_(helperc_STOREV1) ( UInt, Addr );
-
-extern UInt VG_(helperc_LOADV1) ( Addr );
-extern UInt VG_(helperc_LOADV2) ( Addr );
-extern UInt VG_(helperc_LOADV4) ( Addr );
-
-extern void VGM_(handle_esp_assignment) ( Addr new_espA );
-extern void VGM_(fpu_write_check) ( Addr addr, Int size );
-extern void VGM_(fpu_read_check)  ( Addr addr, Int size );
-
-/* Safely (avoiding SIGSEGV / SIGBUS) scan the entire valid address
-   space and pass the addresses and values of all addressible,
-   defined, aligned words to notify_word.  This is the basis for the
-   leak detector.  Returns the number of calls made to notify_word.  */
-UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) );
-
-/* Is this address within some small distance below %ESP?  Used only
-   for the --workaround-gcc296-bugs kludge. */
-extern Bool VG_(is_just_below_ESP)( Addr esp, Addr aa );
+/* Called from generated code. */
+extern void VG_(handle_esp_assignment) ( Addr new_espA );
 
 /* Nasty kludgery to deal with applications which switch stacks,
    like netscape. */
 #define VG_PLAUSIBLE_STACK_SIZE 8000000
 
-/* Needed by the pthreads implementation. */
-#define VGM_WORD_VALID     0
-#define VGM_WORD_INVALID   0xFFFFFFFF
-
-
 /* ---------------------------------------------------------------------
-   Exports of vg_syscall_mem.c
+   Exports of vg_syscalls.c
    ------------------------------------------------------------------ */
 
+extern void VG_(init_dataseg_end_for_brk) ( void );
+
 extern void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid );
 
-extern void VG_(check_known_blocking_syscall) ( ThreadId tid, 
-                                                Int syscallno,
-                                                Int* /*IN*/ res );
+extern void* VG_(pre_known_blocking_syscall) ( ThreadId tid, Int syscallno );
+extern void  VG_(post_known_blocking_syscall)( ThreadId tid, Int syscallno,
+                                               void* pre_res, Int res );
 
 extern Bool VG_(is_kerror) ( Int res );
 
-#define KERNEL_DO_SYSCALL(thread_id, result_lvalue)        \
-         VG_(load_thread_state)(thread_id);                \
-         VG_(copy_baseBlock_to_m_state_static)();          \
-         VG_(do_syscall)();                                \
-         VG_(copy_m_state_static_to_baseBlock)();          \
-         VG_(save_thread_state)(thread_id);                \
-         VG_(threads)[thread_id].sh_eax = VGM_WORD_VALID;  \
+#define KERNEL_DO_SYSCALL(thread_id, result_lvalue)               \
+         VG_(load_thread_state)(thread_id);                       \
+         VG_(copy_baseBlock_to_m_state_static)();                 \
+         VG_(do_syscall)();                                       \
+         VG_(copy_m_state_static_to_baseBlock)();                 \
+         VG_(save_thread_state)(thread_id);                       \
+         VG_(threads)[thread_id].sh_eax = VG_(written_shadow_reg);\
          result_lvalue = VG_(threads)[thread_id].m_eax;
 
 
@@ -1726,6 +1140,9 @@
 /* The number of basic blocks in an epoch (one age-step). */
 #define VG_BBS_PER_EPOCH 20000
 
+/* The fast-cache for tt-lookup. */
+extern Addr VG_(tt_fast)[VG_TT_FAST_SIZE];
+
 extern void VG_(get_tt_tc_used) ( UInt* tt_used, UInt* tc_used );
 extern void VG_(maybe_do_lru_pass) ( void );
 extern void VG_(flush_transtab) ( void );
@@ -1742,40 +1159,6 @@
 
 
 /* ---------------------------------------------------------------------
-   Exports of vg_vtagops.c
-   ------------------------------------------------------------------ */
-
-/* Lists the names of value-tag operations used in instrumented
-   code.  These are the third argument to TAG1 and TAG2 uinsns. */
-
-typedef
-   enum { 
-     /* Unary. */
-     VgT_PCast40, VgT_PCast20, VgT_PCast10,
-     VgT_PCast01, VgT_PCast02, VgT_PCast04,
-
-     VgT_PCast14, VgT_PCast12, VgT_PCast11,
-
-     VgT_Left4, VgT_Left2, VgT_Left1,
-
-     VgT_SWiden14, VgT_SWiden24, VgT_SWiden12,
-     VgT_ZWiden14, VgT_ZWiden24, VgT_ZWiden12,
-
-     /* Binary; 1st is rd; 2nd is rd+wr */
-     VgT_UifU4, VgT_UifU2, VgT_UifU1, VgT_UifU0,
-     VgT_DifD4, VgT_DifD2, VgT_DifD1,
-
-     VgT_ImproveAND4_TQ, VgT_ImproveAND2_TQ, VgT_ImproveAND1_TQ, 
-     VgT_ImproveOR4_TQ, VgT_ImproveOR2_TQ, VgT_ImproveOR1_TQ,
-     VgT_DebugFn
-   }
-   VgTagOp;
-
-extern Char* VG_(nameOfTagOp) ( VgTagOp );
-extern UInt VG_(DebugFn) ( UInt a1, UInt a2 );
-
-
-/* ---------------------------------------------------------------------
    Exports of vg_syscall.S
    ------------------------------------------------------------------ */
 
@@ -1844,60 +1227,24 @@
 extern void VG_(helper_DAS);
 extern void VG_(helper_DAA);
 
-extern void VG_(helper_value_check4_fail);
-extern void VG_(helper_value_check2_fail);
-extern void VG_(helper_value_check1_fail);
-extern void VG_(helper_value_check0_fail);
-
 /* NOT A FUNCTION; this is a bogus RETURN ADDRESS. */
 extern void VG_(signalreturn_bogusRA)( void );
 
-
 /* ---------------------------------------------------------------------
-   Exports of vg_cachesim.c
+   Things relating to the used skin
    ------------------------------------------------------------------ */
 
-extern Int VG_(log2) ( Int x );
-
-extern UCodeBlock* VG_(cachesim_instrument) ( UCodeBlock* cb_in, 
-                                              Addr orig_addr );
-
-typedef struct  _iCC  iCC;
-typedef struct _idCC idCC;
-
-extern void VG_(init_cachesim)      ( void );
-extern void VG_(do_cachesim_results)( Int client_argc, Char** client_argv );
-
-extern void VG_(cachesim_log_non_mem_instr)(  iCC* cc );
-extern void VG_(cachesim_log_mem_instr)    ( idCC* cc, Addr data_addr );
-
-extern void VG_(cachesim_notify_discard) ( TTEntry* tte );
+#define VG_TRACK(fn, args...)          \
+   do {                                \
+      if (VG_(track_events).fn)        \
+         VG_(track_events).fn(args);   \
+   } while (0)
 
 
 /* ---------------------------------------------------------------------
    The state of the simulated CPU.
    ------------------------------------------------------------------ */
 
-/* This is the Intel register encoding. */
-#define R_EAX 0
-#define R_ECX 1
-#define R_EDX 2
-#define R_EBX 3
-#define R_ESP 4
-#define R_EBP 5
-#define R_ESI 6
-#define R_EDI 7
-
-#define R_AL (0+R_EAX)
-#define R_CL (0+R_ECX)
-#define R_DL (0+R_EDX)
-#define R_BL (0+R_EBX)
-#define R_AH (4+R_EAX)
-#define R_CH (4+R_ECX)
-#define R_DH (4+R_EDX)
-#define R_BH (4+R_EBX)
-
-
 /* ---------------------------------------------------------------------
    Offsets into baseBlock for everything which needs to referred to
    from generated code.  The order of these decls does not imply 
@@ -1948,7 +1295,6 @@
 extern Int VGOFF_(sh_edi);
 extern Int VGOFF_(sh_eflags);
 
-
 /* -----------------------------------------------------
    Read-only parts of baseBlock.
    -------------------------------------------------- */
@@ -1993,25 +1339,22 @@
 extern Int VGOFF_(helper_DAS);
 extern Int VGOFF_(helper_DAA);
 
-extern Int VGOFF_(helper_value_check4_fail);
-extern Int VGOFF_(helper_value_check2_fail);
-extern Int VGOFF_(helper_value_check1_fail);
-extern Int VGOFF_(helper_value_check0_fail);
-
-extern Int VGOFF_(helperc_STOREV4); /* :: UInt -> Addr -> void */
-extern Int VGOFF_(helperc_STOREV2); /* :: UInt -> Addr -> void */
-extern Int VGOFF_(helperc_STOREV1); /* :: UInt -> Addr -> void */
-
-extern Int VGOFF_(helperc_LOADV4); /* :: Addr -> UInt -> void */
-extern Int VGOFF_(helperc_LOADV2); /* :: Addr -> UInt -> void */
-extern Int VGOFF_(helperc_LOADV1); /* :: Addr -> UInt -> void */
-
 extern Int VGOFF_(handle_esp_assignment); /* :: Addr -> void */
-extern Int VGOFF_(fpu_write_check);       /* :: Addr -> Int -> void */
-extern Int VGOFF_(fpu_read_check);        /* :: Addr -> Int -> void */
 
-extern Int VGOFF_(cachesim_log_non_mem_instr);
-extern Int VGOFF_(cachesim_log_mem_instr);
+/* For storing extension-specific helpers, determined at runtime.  The addr 
+ * and offset arrays together form a (addr, offset) map that allows a 
+ * helper's baseBlock offset to be computed from its address.  It's done 
+ * like this so CCALL_M_Ns and other helper calls can use the function 
+ * address rather than having to much around with offsets. */
+extern UInt VG_(n_compact_helpers);
+extern UInt VG_(n_noncompact_helpers);
+
+extern Addr VG_(compact_helper_addrs)  [];
+extern Int  VG_(compact_helper_offsets)[];
+
+extern Addr VG_(noncompact_helper_addrs)  [];
+extern Int  VG_(noncompact_helper_offsets)[];
+
 
 #endif /* ndef __VG_INCLUDE_H */
 
diff --git a/coregrind/vg_instrument.c b/coregrind/vg_instrument.c
new file mode 100644
index 0000000..9a062ee
--- /dev/null
+++ b/coregrind/vg_instrument.c
@@ -0,0 +1,96 @@
+/*--------------------------------------------------------------------*/
+/*--- Higher-level UCode sequence builders                         ---*/
+/*---                                              vg_instrument.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+// SSS: should this file should eventually not be in core, but included in
+// skins that use it??  Reduces size of core, but increases size of every
+// skin that uses it...
+
+/* We only import vg_skin.h here, because this file only provides functions
+   for doing things that could be done directly by the skin -- it's just to
+   make skins' lives easier, rather than let them do something they
+   couldn't otherwise do. */
+#include "vg_skin.h"
+
+#define uInstr0   VG_(newUInstr0)
+#define uInstr1   VG_(newUInstr1)
+#define uInstr2   VG_(newUInstr2)
+#define uLiteral  VG_(setLiteralField)
+#define uCCall    VG_(setCCallFields)
+#define newTemp   VG_(getNewTemp)
+
+
+void VG_(callHelper_0_0)(UCodeBlock* cb, Addr f)
+{
+   uInstr0(cb, CCALL, 0);
+   uCCall(cb, f, 0, 0, 0);
+}
+
+void VG_(callHelper_1_0)(UCodeBlock* cb, Addr f, UInt arg1, UInt regparms_n)
+{
+   UInt t1 = newTemp(cb);
+
+   vg_assert(regparms_n <= 1);
+   uInstr2(cb, MOV,   4, Literal, 0, TempReg, t1);
+   uLiteral(cb, arg1);
+   uInstr1(cb, CCALL, 0, TempReg, t1);
+   uCCall(cb, f, 1, regparms_n, 0);
+}
+
+void VG_(callHelper_2_0)(UCodeBlock* cb, Addr f, UInt arg1, UInt arg2,
+                         UInt regparms_n)
+{
+   UInt t1 = newTemp(cb);
+   UInt t2 = newTemp(cb);
+
+   vg_assert(regparms_n <= 2);
+   uInstr2(cb, MOV,   4, Literal, 0, TempReg, t1);
+   uLiteral(cb, arg1);
+   uInstr2(cb, MOV,   4, Literal, 0, TempReg, t2);
+   uLiteral(cb, arg2);
+   uInstr2(cb, CCALL, 0, TempReg, t1, TempReg, t2);
+   uCCall(cb, f, 2, regparms_n, 0);
+}
+
+void VG_(set_global_var)(UCodeBlock* cb, Addr globvar_ptr, UInt val)
+{
+   Int t_gv  = newTemp(cb);        
+   Int t_val = newTemp(cb);        
+
+   uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_val);
+   uLiteral(cb, val);
+   uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_gv);
+   uLiteral(cb, globvar_ptr);
+   uInstr2(cb, STORE, 4, TempReg, t_val, TempReg, t_gv);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                          vg_instrument.c ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/coregrind/vg_kerneliface.h b/coregrind/vg_kerneliface.h
index bcc10f5..ede3049 100644
--- a/coregrind/vg_kerneliface.h
+++ b/coregrind/vg_kerneliface.h
@@ -27,7 +27,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #ifndef __VG_KERNELIFACE_H
@@ -139,6 +139,40 @@
 #define VKI_MAP_PRIVATE    0x02            /* Changes are private.  */
 #define VKI_MAP_FIXED      0x10            /* Interpret addr exactly */
 
+/* Copied from linux-2.4.19/include/asm-i386/fcntl.h */
+
+#define VKI_O_RDONLY             00
+#define VKI_O_WRONLY             01
+#define VKI_O_RDWR               02
+#define VKI_O_CREAT            0100 /* not fcntl */
+#define VKI_O_EXCL             0200 /* not fcntl */
+#define VKI_O_TRUNC           01000 /* not fcntl */
+#define VKI_O_APPEND          02000
+#define VKI_O_NONBLOCK        04000
+#define VKI_O_SYNC           010000
+#define VKI_FASYNC           020000 /* fcntl, for BSD compatibility */
+#define VKI_O_DIRECT         040000 /* direct disk access hint */
+#define VKI_O_LARGEFILE     0100000
+#define VKI_O_DIRECTORY     0200000 /* must be a directory */
+#define VKI_O_NOFOLLOW      0400000 /* don't follow links */
+
+/* Copied from linux-2.4.19/include/linux/stat.h */
+
+#define VKI_S_IRWXU 00700
+#define VKI_S_IRUSR 00400
+#define VKI_S_IWUSR 00200
+#define VKI_S_IXUSR 00100
+
+#define VKI_S_IRWXG 00070
+#define VKI_S_IRGRP 00040
+#define VKI_S_IWGRP 00020
+#define VKI_S_IXGRP 00010
+
+#define VKI_S_IRWXO 00007
+#define VKI_S_IROTH 00004
+#define VKI_S_IWOTH 00002
+#define VKI_S_IXOTH 00001
+
 
 /* Copied from /usr/src/linux-2.4.9-13/include/asm/errno.h */
 
diff --git a/coregrind/vg_libpthread.c b/coregrind/vg_libpthread.c
index 994cdb7..5972dfa 100644
--- a/coregrind/vg_libpthread.c
+++ b/coregrind/vg_libpthread.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 /* ALL THIS CODE RUNS ON THE SIMULATED CPU.
@@ -257,6 +257,12 @@
    return 0;
 }
 
+int pthread_attr_getdetachstate(const pthread_attr_t *attr, int *detachstate)
+{
+   *detachstate = attr->__detachstate;
+   return 0;
+}
+
 int pthread_attr_setinheritsched(pthread_attr_t *attr, int inherit)
 {
    static int moans = N_MOANS;
@@ -1044,6 +1050,7 @@
 void __my_pthread_testcancel(void)
 {
    int res;
+   ensure_valgrind("__my_pthread_testcancel");
    VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
                            VG_USERREQ__TESTCANCEL,
                            0, 0, 0, 0);
@@ -1178,7 +1185,7 @@
       if (n_now != n_orig) break;
 
       nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 52 * 1000 * 1000; /* 52 milliseconds */
+      nanosleep_interval.tv_nsec = 12 * 1000 * 1000; /* 12 milliseconds */
       /* It's critical here that valgrind's nanosleep implementation
          is nonblocking. */
       (void)my_do_syscall2(__NR_nanosleep, 
@@ -1381,13 +1388,14 @@
 /* Relies on assumption that initial private data is NULL.  This
    should be fixed somehow. */
 
-/* The allowable keys (indices) (all 2 of them). 
+/* The allowable keys (indices) (all 3 of them). 
    From sysdeps/pthread/bits/libc-tsd.h
 */
-#define N_LIBC_TSD_EXTRA_KEYS 1
+#define N_LIBC_TSD_EXTRA_KEYS 0
 
 enum __libc_tsd_key_t { _LIBC_TSD_KEY_MALLOC = 0,
                         _LIBC_TSD_KEY_DL_ERROR,
+                        _LIBC_TSD_KEY_RPC_VARS,
                         _LIBC_TSD_KEY_N };
 
 /* Auto-initialising subsystem.  libc_specifics_inited is set 
@@ -1877,6 +1885,10 @@
 }
 
 
+pid_t __vfork(void)
+{
+   return __fork();
+}
 
 
 /* ---------------------------------------------------------------------
@@ -1965,7 +1977,7 @@
    Basic idea is: modify the timeout parameter to select so that it
    returns immediately.  Poll like this until select returns non-zero,
    indicating something interesting happened, or until our time is up.
-   Space out the polls with nanosleeps of say 20 milliseconds, which
+   Space out the polls with nanosleeps of say 11 milliseconds, which
    is required to be nonblocking; this allows other threads to run.  
 
    Assumes:
@@ -2083,7 +2095,7 @@
       /* fprintf(stderr, "MY_SELECT: nanosleep\n"); */
       /* nanosleep and go round again */
       nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 50 * 1000 * 1000; /* 50 milliseconds */
+      nanosleep_interval.tv_nsec = 11 * 1000 * 1000; /* 11 milliseconds */
       /* It's critical here that valgrind's nanosleep implementation
          is nonblocking. */
       res = my_do_syscall2(__NR_nanosleep, 
@@ -2193,7 +2205,7 @@
       /* fprintf(stderr, "MY_POLL: nanosleep\n"); */
       /* nanosleep and go round again */
       nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 51 * 1000 * 1000; /* 51 milliseconds */
+      nanosleep_interval.tv_nsec = 13 * 1000 * 1000; /* 13 milliseconds */
       /* It's critical here that valgrind's nanosleep implementation
          is nonblocking. */
       (void)my_do_syscall2(__NR_nanosleep, 
@@ -2810,6 +2822,7 @@
 weak_alias (__pread64, pread64)
 weak_alias (__pwrite64, pwrite64)
 weak_alias(__fork, fork)
+weak_alias(__vfork, vfork)
 
 weak_alias (__pthread_kill_other_threads_np, pthread_kill_other_threads_np)
 
diff --git a/coregrind/vg_libpthread_unimp.c b/coregrind/vg_libpthread_unimp.c
index f413887..f3938ec 100644
--- a/coregrind/vg_libpthread_unimp.c
+++ b/coregrind/vg_libpthread_unimp.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 /* ---------------------------------------------------------------------
@@ -82,7 +82,7 @@
 //void longjmp ( void )  { unimp("longjmp"); }
 //void pthread_atfork ( void )  { unimp("pthread_atfork"); }
 //void pthread_attr_destroy ( void )  { unimp("pthread_attr_destroy"); }
-void pthread_attr_getdetachstate ( void )  { unimp("pthread_attr_getdetachstate"); }
+//void pthread_attr_getdetachstate ( void )  { unimp("pthread_attr_getdetachstate"); }
 void pthread_attr_getinheritsched ( void )  { unimp("pthread_attr_getinheritsched"); }
 //void pthread_attr_getschedparam ( void )  { unimp("pthread_attr_getschedparam"); }
 //void pthread_attr_getschedpolicy ( void )  { unimp("pthread_attr_getschedpolicy"); }
diff --git a/coregrind/vg_main.c b/coregrind/vg_main.c
index 5cce13d..582b652 100644
--- a/coregrind/vg_main.c
+++ b/coregrind/vg_main.c
@@ -26,12 +26,10 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
-
 
 /* ---------------------------------------------------------------------
    Compute offsets into baseBlock.  See comments in vg_include.h.
@@ -62,6 +60,7 @@
 Int VGOFF_(sh_esi) = INVALID_OFFSET;
 Int VGOFF_(sh_edi) = INVALID_OFFSET;
 Int VGOFF_(sh_eflags) = INVALID_OFFSET;
+
 Int VGOFF_(helper_idiv_64_32) = INVALID_OFFSET;
 Int VGOFF_(helper_div_64_32) = INVALID_OFFSET;
 Int VGOFF_(helper_idiv_32_16) = INVALID_OFFSET;
@@ -92,25 +91,25 @@
 Int VGOFF_(helper_SAHF) = INVALID_OFFSET;
 Int VGOFF_(helper_DAS) = INVALID_OFFSET;
 Int VGOFF_(helper_DAA) = INVALID_OFFSET;
-Int VGOFF_(helper_value_check4_fail) = INVALID_OFFSET;
-Int VGOFF_(helper_value_check2_fail) = INVALID_OFFSET;
-Int VGOFF_(helper_value_check1_fail) = INVALID_OFFSET;
-Int VGOFF_(helper_value_check0_fail) = INVALID_OFFSET;
-Int VGOFF_(helperc_LOADV4) = INVALID_OFFSET;
-Int VGOFF_(helperc_LOADV2) = INVALID_OFFSET;
-Int VGOFF_(helperc_LOADV1) = INVALID_OFFSET;
-Int VGOFF_(helperc_STOREV4) = INVALID_OFFSET;
-Int VGOFF_(helperc_STOREV2) = INVALID_OFFSET;
-Int VGOFF_(helperc_STOREV1) = INVALID_OFFSET;
 Int VGOFF_(handle_esp_assignment) = INVALID_OFFSET;
-Int VGOFF_(fpu_write_check) = INVALID_OFFSET;
-Int VGOFF_(fpu_read_check) = INVALID_OFFSET;
-Int VGOFF_(cachesim_log_non_mem_instr) = INVALID_OFFSET;
-Int VGOFF_(cachesim_log_mem_instr)     = INVALID_OFFSET;
+
+/* MAX_NONCOMPACT_HELPERS can be increased easily.  If MAX_COMPACT_HELPERS is
+ * increased too much, they won't really be compact any more... */
+#define  MAX_COMPACT_HELPERS     8
+#define  MAX_NONCOMPACT_HELPERS  8 
+
+UInt VG_(n_compact_helpers)    = 0;
+UInt VG_(n_noncompact_helpers) = 0;
+
+Addr VG_(compact_helper_addrs)  [MAX_COMPACT_HELPERS];
+Int  VG_(compact_helper_offsets)[MAX_COMPACT_HELPERS];
+Addr VG_(noncompact_helper_addrs)  [MAX_NONCOMPACT_HELPERS];
+Int  VG_(noncompact_helper_offsets)[MAX_NONCOMPACT_HELPERS];
 
 /* This is the actual defn of baseblock. */
 UInt VG_(baseBlock)[VG_BASEBLOCK_WORDS];
 
+
 /* Words. */
 static Int baB_off = 0;
 
@@ -133,6 +132,41 @@
    return off;
 }
 
+/* Registers a function in compact_helper_addrs;  compact_helper_offsets is
+ * filled in later.
+ */
+void VG_(register_compact_helper)(Addr a)
+{
+   if (MAX_COMPACT_HELPERS <= VG_(n_compact_helpers)) {
+      VG_(printf)("Can only register %d compact helpers\n", 
+                  MAX_COMPACT_HELPERS);
+      VG_(panic)("Too many compact helpers registered");
+   }
+   VG_(compact_helper_addrs)[VG_(n_compact_helpers)] = a;
+   VG_(n_compact_helpers)++;
+}
+
+/* Registers a function in noncompact_helper_addrs;  noncompact_helper_offsets
+ * is filled in later.
+ */
+void VG_(register_noncompact_helper)(Addr a)
+{
+   if (MAX_NONCOMPACT_HELPERS <= VG_(n_noncompact_helpers)) {
+      VG_(printf)("Can only register %d non-compact helpers\n", 
+                  MAX_NONCOMPACT_HELPERS);
+      VG_(printf)("Try increasing MAX_NON_COMPACT_HELPERS\n");
+      VG_(panic)("Too many non-compact helpers registered");
+   }
+   VG_(noncompact_helper_addrs)[VG_(n_noncompact_helpers)] = a;
+   VG_(n_noncompact_helpers)++;
+}
+
+/* Allocate offsets in baseBlock for the skin helpers */
+static void assign_helpers_in_baseBlock(UInt n, Int offsets[], Addr addrs[])
+{
+   Int i;
+   for (i = 0; i < n; i++) offsets[i] = alloc_BaB_1_set( addrs[i] );
+}
 
 /* Here we assign actual offsets.  It's important to get the most
    popular referents within 128 bytes of the start, so we can take
@@ -143,8 +177,6 @@
 
 static void vg_init_baseBlock ( void )
 {
-   baB_off = 0;
-
    /* Those with offsets under 128 are carefully chosen. */
 
    /* WORD offsets in this column */
@@ -158,82 +190,42 @@
    /* 7   */ VGOFF_(m_edi)     = alloc_BaB(1);
    /* 8   */ VGOFF_(m_eflags)  = alloc_BaB(1);
 
-   /* 9   */ VGOFF_(sh_eax)    = alloc_BaB(1);
-   /* 10  */ VGOFF_(sh_ecx)    = alloc_BaB(1);
-   /* 11  */ VGOFF_(sh_edx)    = alloc_BaB(1);
-   /* 12  */ VGOFF_(sh_ebx)    = alloc_BaB(1);
-   /* 13  */ VGOFF_(sh_esp)    = alloc_BaB(1);
-   /* 14  */ VGOFF_(sh_ebp)    = alloc_BaB(1);
-   /* 15  */ VGOFF_(sh_esi)    = alloc_BaB(1);
-   /* 16  */ VGOFF_(sh_edi)    = alloc_BaB(1);
-   /* 17  */ VGOFF_(sh_eflags) = alloc_BaB(1);
+   if (VG_(needs).shadow_regs) {
+      /* 9   */ VGOFF_(sh_eax)    = alloc_BaB(1);
+      /* 10  */ VGOFF_(sh_ecx)    = alloc_BaB(1);
+      /* 11  */ VGOFF_(sh_edx)    = alloc_BaB(1);
+      /* 12  */ VGOFF_(sh_ebx)    = alloc_BaB(1);
+      /* 13  */ VGOFF_(sh_esp)    = alloc_BaB(1);
+      /* 14  */ VGOFF_(sh_ebp)    = alloc_BaB(1);
+      /* 15  */ VGOFF_(sh_esi)    = alloc_BaB(1);
+      /* 16  */ VGOFF_(sh_edi)    = alloc_BaB(1);
+      /* 17  */ VGOFF_(sh_eflags) = alloc_BaB(1);
+   }
 
-   /* 17a */ 
-   VGOFF_(cachesim_log_non_mem_instr)  
-      = alloc_BaB_1_set( (Addr) & VG_(cachesim_log_non_mem_instr) );
-   /* 17b */ 
-   VGOFF_(cachesim_log_mem_instr)  
-      = alloc_BaB_1_set( (Addr) & VG_(cachesim_log_mem_instr) );
+   /* 9,10,11 or 18,19,20... depends on number whether shadow regs are used
+    * and on compact helpers registered */ 
 
-   /* 18  */ 
-   VGOFF_(helper_value_check4_fail) 
-      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check4_fail) );
-   /* 19 */
-   VGOFF_(helper_value_check0_fail)
-      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check0_fail) );
+   /* (9 or 18) + n_compact_helpers  */
+   /* Register VG_(handle_esp_assignment) if needed. */
+   if (VG_(track_events).new_mem_stack_aligned || 
+       VG_(track_events).die_mem_stack_aligned) 
+      VG_(register_compact_helper)( (Addr) & VG_(handle_esp_assignment) );
 
-   /* 20  */
-   VGOFF_(helperc_STOREV4)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV4) );
-   /* 21  */
-   VGOFF_(helperc_STOREV1)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV1) );
+   /* Allocate slots for compact helpers */
+   assign_helpers_in_baseBlock(VG_(n_compact_helpers), 
+                               VG_(compact_helper_offsets), 
+                               VG_(compact_helper_addrs));
 
-   /* 22  */
-   VGOFF_(helperc_LOADV4)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV4) );
-   /* 23  */
-   VGOFF_(helperc_LOADV1)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV1) );
-
-   /* 24  */
-   VGOFF_(handle_esp_assignment)
-      = alloc_BaB_1_set( (Addr) & VGM_(handle_esp_assignment) );
-
-   /* 25 */
+   /* (9/10 or 18/19) + n_compact_helpers */
    VGOFF_(m_eip) = alloc_BaB(1);
 
    /* There are currently 24 spill slots */
-   /* 26 .. 49  This overlaps the magic boundary at >= 32 words, but
-      most spills are to low numbered spill slots, so the ones above
-      the boundary don't see much action. */
+   /* (11+/20+ .. 32+/43+) + n_compact_helpers.  This can overlap the magic
+    * boundary at >= 32 words, but most spills are to low numbered spill
+    * slots, so the ones above the boundary don't see much action. */
    VGOFF_(spillslots) = alloc_BaB(VG_MAX_SPILLSLOTS);
 
-   /* These two pushed beyond the boundary because 2-byte transactions
-      are rare. */
-   /* 50  */
-   VGOFF_(helperc_STOREV2)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV2) );
-   /* 51  */
-   VGOFF_(helperc_LOADV2)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV2) );
-
-   /* 52  */
-   VGOFF_(fpu_write_check)
-      = alloc_BaB_1_set( (Addr) & VGM_(fpu_write_check) );
-   /* 53  */
-   VGOFF_(fpu_read_check)
-      = alloc_BaB_1_set( (Addr) & VGM_(fpu_read_check) );
-
-   /* Actually I don't think these two are ever used. */
-   /* 54  */ 
-   VGOFF_(helper_value_check2_fail)
-      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check2_fail) );
-   /* 55  */ 
-   VGOFF_(helper_value_check1_fail)
-      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check1_fail) );
-
-   /* I gave up counting at this point.  Since they're way above the
+   /* I gave up counting at this point.  Since they're above the
       short-amode-boundary, there's no point. */
 
    VGOFF_(m_fpustate) = alloc_BaB(VG_SIZE_OF_FPUSTATE_W);
@@ -303,6 +295,31 @@
       = alloc_BaB_1_set( (Addr) & VG_(helper_DAS) );
    VGOFF_(helper_DAA)
       = alloc_BaB_1_set( (Addr) & VG_(helper_DAA) );
+
+   /* Allocate slots for compact helpers */
+   assign_helpers_in_baseBlock(VG_(n_noncompact_helpers), 
+                               VG_(noncompact_helper_offsets), 
+                               VG_(noncompact_helper_addrs));
+}
+
+static void vg_init_shadow_regs ( void )
+{
+   if (VG_(needs).shadow_regs) {
+      UInt eflags;
+   
+      SK_(written_shadow_regs_values) ( & VG_(written_shadow_reg), & eflags );
+      VG_(baseBlock)[VGOFF_(sh_esp)]    = 
+      VG_(baseBlock)[VGOFF_(sh_ebp)]    =
+      VG_(baseBlock)[VGOFF_(sh_eax)]    =
+      VG_(baseBlock)[VGOFF_(sh_ecx)]    =
+      VG_(baseBlock)[VGOFF_(sh_edx)]    =
+      VG_(baseBlock)[VGOFF_(sh_ebx)]    =
+      VG_(baseBlock)[VGOFF_(sh_esi)]    =
+      VG_(baseBlock)[VGOFF_(sh_edi)]    = VG_(written_shadow_reg);
+      VG_(baseBlock)[VGOFF_(sh_eflags)] = eflags;
+
+   } else
+      VG_(written_shadow_reg) = VG_UNUSED_SHADOW_REG_VALUE;
 }
 
 
@@ -330,15 +347,17 @@
 /* 64-bit counter for the number of bbs to go before a debug exit. */
 ULong VG_(bbs_to_go);
 
-/* Produce debugging output? */
-Bool VG_(disassemble) = False;
-
 /* The current LRU epoch. */
 UInt VG_(current_epoch) = 0;
 
 /* This is the ThreadId of the last thread the scheduler ran. */
 ThreadId VG_(last_run_tid) = 0;
 
+/* This is the argument to __NR_exit() supplied by the first thread to
+   call that syscall.  We eventually pass that to __NR_exit() for
+   real. */
+UInt VG_(exitcode) = 0;
+
 
 /* ---------------------------------------------------------------------
    Counters, for informational purposes only.
@@ -396,46 +415,111 @@
 
 
 /* ---------------------------------------------------------------------
+   Skin data structure initialisation
+   ------------------------------------------------------------------ */
+
+/* Init with default values. */
+VgNeeds VG_(needs) = {
+   .name                    = NULL,
+   .description             = NULL,
+
+   .core_errors             = False,
+   .skin_errors             = False,
+   .run_libc_freeres        = False,
+
+   .sizeof_shadow_block     = 0,
+
+   .basic_block_discards    = False,
+   .shadow_regs             = False,
+   .command_line_options    = False,
+   .client_requests         = False,
+   .extended_UCode          = False,
+   .syscall_wrapper         = False,
+   .alternative_free        = False,
+   .sanity_checks           = False,
+};
+
+VgTrackEvents VG_(track_events) = {
+   /* Memory events */
+   .new_mem_startup       = NULL,
+   .new_mem_heap          = NULL,
+   .new_mem_stack         = NULL,
+   .new_mem_stack_aligned = NULL,
+   .new_mem_stack_signal  = NULL,
+   .new_mem_brk           = NULL,
+   .new_mem_mmap          = NULL,
+
+   .copy_mem_heap         = NULL,
+   .change_mem_mprotect   = NULL,
+
+   .ban_mem_heap          = NULL,
+   .ban_mem_stack         = NULL,
+
+   .die_mem_heap          = NULL,
+   .die_mem_stack         = NULL,
+   .die_mem_stack_aligned = NULL,
+   .die_mem_stack_signal  = NULL,
+   .die_mem_brk           = NULL,
+   .die_mem_munmap        = NULL,
+
+   .bad_free              = NULL,
+   .mismatched_free       = NULL,
+
+   .pre_mem_read          = NULL,
+   .pre_mem_read_asciiz   = NULL,
+   .pre_mem_write         = NULL,
+   .post_mem_write        = NULL,
+
+   /* Mutex events */
+   .post_mutex_lock       = NULL,
+   .post_mutex_unlock     = NULL,
+};
+
+static void sanity_check_needs ( void )
+{
+#define CHECK_NOT(var, value)                                     \
+   if ((var)==(value)) {                                          \
+      VG_(printf)("\n`%s' not initialised\n", VG__STRING(var));   \
+      VG_(skin_error)("Uninitialised needs field\n");             \
+   }
+   
+   CHECK_NOT(VG_(needs).name,        NULL);
+   CHECK_NOT(VG_(needs).description, NULL);
+
+#undef CHECK_NOT
+#undef INVALID_Bool
+}
+
+/* ---------------------------------------------------------------------
    Values derived from command-line options.
    ------------------------------------------------------------------ */
 
-Bool   VG_(clo_error_limit);
-Bool   VG_(clo_check_addrVs);
-Bool   VG_(clo_GDB_attach);
-Int    VG_(sanity_level);
-Int    VG_(clo_verbosity);
-Bool   VG_(clo_demangle);
-Bool   VG_(clo_leak_check);
-Bool   VG_(clo_show_reachable);
-Int    VG_(clo_leak_resolution);
-Bool   VG_(clo_sloppy_malloc);
-Int    VG_(clo_alignment);
-Bool   VG_(clo_partial_loads_ok);
-Bool   VG_(clo_trace_children);
-Int    VG_(clo_logfile_fd);
-Int    VG_(clo_freelist_vol);
-Bool   VG_(clo_workaround_gcc296_bugs);
-Int    VG_(clo_n_suppressions);
+/* Define, and set defaults. */
+Bool   VG_(clo_error_limit)    = True;
+Bool   VG_(clo_GDB_attach)     = False;
+Int    VG_(sanity_level)       = 1;
+Int    VG_(clo_verbosity)      = 1;
+Bool   VG_(clo_demangle)       = True;
+Bool   VG_(clo_sloppy_malloc)  = False;
+Int    VG_(clo_alignment)      = 4;
+Bool   VG_(clo_trace_children) = False;
+Int    VG_(clo_logfile_fd)     = 2;
+Int    VG_(clo_n_suppressions) = 0;
 Char*  VG_(clo_suppressions)[VG_CLO_MAX_SFILES];
-Bool   VG_(clo_single_step);
-Bool   VG_(clo_optimise);
-Bool   VG_(clo_instrument);
-Bool   VG_(clo_cleanup);
-Bool   VG_(clo_cachesim);
-cache_t VG_(clo_I1_cache);
-cache_t VG_(clo_D1_cache);
-cache_t VG_(clo_L2_cache);
-Int    VG_(clo_smc_check);
-Bool   VG_(clo_trace_syscalls);
-Bool   VG_(clo_trace_signals);
-Bool   VG_(clo_trace_symtab);
-Bool   VG_(clo_trace_malloc);
-Bool   VG_(clo_trace_sched);
-Int    VG_(clo_trace_pthread_level);
-ULong  VG_(clo_stop_after);
-Int    VG_(clo_dump_error);
-Int    VG_(clo_backtrace_size);
-Char*  VG_(clo_weird_hacks);
+Bool   VG_(clo_profile)        = False;
+Bool   VG_(clo_single_step)    = False;
+Bool   VG_(clo_optimise)       = True;
+UChar  VG_(clo_trace_codegen)  = 0; // 00000000b
+Bool   VG_(clo_trace_syscalls) = False;
+Bool   VG_(clo_trace_signals)  = False;
+Bool   VG_(clo_trace_symtab)   = False;
+Bool   VG_(clo_trace_malloc)   = False;
+Bool   VG_(clo_trace_sched)    = False;
+Int    VG_(clo_trace_pthread_level) = 0;
+ULong  VG_(clo_stop_after)     = 1000000000000LL;
+Int    VG_(clo_dump_error)     = 0;
+Int    VG_(clo_backtrace_size) = 4;
+Char*  VG_(clo_weird_hacks)    = NULL;
 
 /* This Bool is needed by wrappers in vg_clientmalloc.c to decide how
    to behave.  Initially we say False. */
@@ -454,12 +538,11 @@
    don't have to modify the original. */
 static Char vg_cmdline_copy[M_VG_CMDLINE_STRLEN];
 
-
 /* ---------------------------------------------------------------------
    Processing of command-line options.
    ------------------------------------------------------------------ */
 
-static void bad_option ( Char* opt )
+void VG_(bad_option) ( Char* opt )
 {
    VG_(shutdown_logging)();
    VG_(clo_logfile_fd) = 2; /* stderr */
@@ -487,91 +570,85 @@
    config_error("couldn't find client's argc/argc/envp");
 }   
 
-static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
+static void usage ( void )
 {
-   int   i1, i2, i3;
-   int   i;
-   char *opt = VG_(strdup)(VG_AR_PRIVATE, orig_opt);
+   Char* usage1 = 
+"usage: valgrind [options] prog-and-args\n"
+"\n"
+"  core user options, with defaults in [ ], are:\n"
+"    --help                    show this message\n"
+"    --version                 show version\n"
+"    --skin=<name>             main task (skin to use) [Valgrind]\n"
+"    -q --quiet                run silently; only print error msgs\n"
+"    -v --verbose              be more verbose, incl counts of errors\n"
+"    --gdb-attach=no|yes       start GDB when errors detected? [no]\n"
+"    --demangle=no|yes         automatically demangle C++ names? [yes]\n"
+"    --num-callers=<number>    show <num> callers in stack traces [4]\n"
+"    --error-limit=no|yes      stop showing new errors if too many? [yes]\n"
+"    --sloppy-malloc=no|yes    round malloc sizes to next word? [no]\n"
+"    --alignment=<number>      set minimum alignment of allocations [4]\n"
+"    --trace-children=no|yes   Valgrind-ise child processes? [no]\n"
+"    --logfile-fd=<number>     file descriptor for messages [2=stderr]\n"
+"    --suppressions=<filename> suppress errors described in\n"
+"                              suppressions file <filename>\n"
+"    --weird-hacks=hack1,hack2,...  [no hacks selected]\n"
+"         recognised hacks are: ioctl-VTIME truncate-writes\n"
+"\n"
+"  %s skin user options:\n";
 
-   i = i1 = opt_len;
 
-   /* Option looks like "--I1=65536,2,64".
-    * Find commas, replace with NULs to make three independent 
-    * strings, then extract numbers.  Yuck. */
-   while (VG_(isdigit)(opt[i])) i++;
-   if (',' == opt[i]) {
-      opt[i++] = '\0';
-      i2 = i;
-   } else goto bad;
-   while (VG_(isdigit)(opt[i])) i++;
-   if (',' == opt[i]) {
-      opt[i++] = '\0';
-      i3 = i;
-   } else goto bad;
-   while (VG_(isdigit)(opt[i])) i++;
-   if ('\0' != opt[i]) goto bad;
+   Char* usage2 = 
+"\n"
+"  core options for debugging Valgrind itself are:\n"
+"    --sanity-level=<number>   level of sanity checking to do [1]\n"
+"    --single-step=no|yes      translate each instr separately? [no]\n"
+"    --optimise=no|yes         improve intermediate code? [yes]\n"
+"    --profile=no|yes          profile? (skin must be built for it) [no]\n"
+"    --trace-codegen=<XXXXX>   show generated code? (X = 0|1) [00000]\n"
+"    --trace-syscalls=no|yes   show all system calls? [no]\n"
+"    --trace-signals=no|yes    show signal handling details? [no]\n"
+"    --trace-symtab=no|yes     show symbol table details? [no]\n"
+"    --trace-malloc=no|yes     show client malloc details? [no]\n"
+"    --trace-sched=no|yes      show thread scheduler details? [no]\n"
+"    --trace-pthread=none|some|all  show pthread event details? [no]\n"
+"    --stop-after=<number>     switch to real CPU after executing\n"
+"                              <number> basic blocks [infinity]\n"
+"    --dump-error=<number>     show translation for basic block\n"
+"                              associated with <number>'th\n"
+"                              error context [0=don't show any]\n"
+"\n"
+"  Extra options are read from env variable $VALGRIND_OPTS\n"
+"\n"
+"  Valgrind is Copyright (C) 2000-2002 Julian Seward\n"
+"  and licensed under the GNU General Public License, version 2.\n"
+"  Bug reports, feedback, admiration, abuse, etc, to: %s.\n"
+"\n";
 
-   cache->size      = (Int)VG_(atoll)(opt + i1);
-   cache->assoc     = (Int)VG_(atoll)(opt + i2);
-   cache->line_size = (Int)VG_(atoll)(opt + i3);
+   VG_(printf)(usage1, VG_(needs).name);
+   /* Don't print skin string directly for security, ha! */
+   if (VG_(needs).command_line_options)
+      VG_(printf)("%s", SK_(usage)());
+   else
+      VG_(printf)("    (none)\n");
+   VG_(printf)(usage2, VG_EMAIL_ADDR);
 
-   VG_(free)(VG_AR_PRIVATE, opt);
-   return;
-
-  bad:    
-   bad_option(orig_opt);
+   VG_(shutdown_logging)();
+   VG_(clo_logfile_fd) = 2; /* stderr */
+   VG_(exit)(1);
 }
 
 static void process_cmd_line_options ( void )
 {
-   UChar* argv[M_VG_CMDLINE_OPTS];
-   UInt   argc;
-   UChar* p;
-   UChar* str;
-   Int    i, eventually_logfile_fd, ctr;
+   Char* argv[M_VG_CMDLINE_OPTS];
+   UInt  argc;
+   Char* p;
+   Char* str;
+   Int   i, eventually_logfile_fd, ctr;
 
 #  define ISSPACE(cc)      ((cc) == ' ' || (cc) == '\t' || (cc) == '\n')
 #  define STREQ(s1,s2)     (0==VG_(strcmp_ws)((s1),(s2)))
 #  define STREQN(nn,s1,s2) (0==VG_(strncmp_ws)((s1),(s2),(nn)))
 
-   /* Set defaults. */
-   VG_(clo_error_limit)      = True;
-   VG_(clo_check_addrVs)     = True;
-   VG_(clo_GDB_attach)       = False;
-   VG_(sanity_level)         = 1;
-   VG_(clo_verbosity)        = 1;
-   VG_(clo_demangle)         = True;
-   VG_(clo_leak_check)       = False;
-   VG_(clo_show_reachable)   = False;
-   VG_(clo_leak_resolution)  = 2;
-   VG_(clo_sloppy_malloc)    = False;
-   VG_(clo_alignment)        = 4;
-   VG_(clo_partial_loads_ok) = True;
-   VG_(clo_trace_children)   = False;
-   VG_(clo_logfile_fd)       = 2; /* stderr */
-   VG_(clo_freelist_vol)     = 1000000;
-   VG_(clo_workaround_gcc296_bugs) = False;
-   VG_(clo_n_suppressions)   = 0;
-   VG_(clo_single_step)      = False;
-   VG_(clo_optimise)         = True;
-   VG_(clo_instrument)       = True;
-   VG_(clo_cachesim)         = False;
-   VG_(clo_I1_cache)         = UNDEFINED_CACHE;
-   VG_(clo_D1_cache)         = UNDEFINED_CACHE;
-   VG_(clo_L2_cache)         = UNDEFINED_CACHE;
-   VG_(clo_cleanup)          = True;
-   VG_(clo_smc_check)        = /* VG_CLO_SMC_SOME */ VG_CLO_SMC_NONE;
-   VG_(clo_trace_syscalls)   = False;
-   VG_(clo_trace_signals)    = False;
-   VG_(clo_trace_symtab)     = False;
-   VG_(clo_trace_malloc)     = False;
-   VG_(clo_trace_sched)      = False;
-   VG_(clo_trace_pthread_level) = 0;
-   VG_(clo_stop_after)       = 1000000000000LL;
-   VG_(clo_dump_error)       = 0;
-   VG_(clo_backtrace_size)   = 4;
-   VG_(clo_weird_hacks)      = NULL;
-
    eventually_logfile_fd = VG_(clo_logfile_fd);
 
    /* Once logging is started, we can safely send messages pertaining
@@ -603,7 +680,10 @@
        if (VG_STACK_MATCHES_BASE( VG_(esp_at_startup), 
                                   VG_STARTUP_STACK_BASE_3 )) {
           sp = (UInt*)VG_STARTUP_STACK_BASE_3;
- 
+       } else 
+       if (VG_STACK_MATCHES_BASE( VG_(esp_at_startup), 
+                                  VG_STARTUP_STACK_BASE_4 )) {
+          sp = (UInt*)VG_STARTUP_STACK_BASE_4;
        } else {
           args_grok_error(
              "startup %esp is not near any VG_STARTUP_STACK_BASE_*\n   "
@@ -723,7 +803,7 @@
 
    for (i = 0; i < argc; i++) {
 
-      if (STREQ(argv[i], "-v") || STREQ(argv[i], "--verbose"))
+      if      (STREQ(argv[i], "-v") || STREQ(argv[i], "--verbose"))
          VG_(clo_verbosity)++;
       else if (STREQ(argv[i], "-q") || STREQ(argv[i], "--quiet"))
          VG_(clo_verbosity)--;
@@ -733,11 +813,6 @@
       else if (STREQ(argv[i], "--error-limit=no"))
          VG_(clo_error_limit) = False;
 
-      else if (STREQ(argv[i], "--check-addrVs=yes"))
-         VG_(clo_check_addrVs) = True;
-      else if (STREQ(argv[i], "--check-addrVs=no"))
-         VG_(clo_check_addrVs) = False;
-
       else if (STREQ(argv[i], "--gdb-attach=yes"))
          VG_(clo_GDB_attach) = True;
       else if (STREQ(argv[i], "--gdb-attach=no"))
@@ -748,28 +823,6 @@
       else if (STREQ(argv[i], "--demangle=no"))
          VG_(clo_demangle) = False;
 
-      else if (STREQ(argv[i], "--partial-loads-ok=yes"))
-         VG_(clo_partial_loads_ok) = True;
-      else if (STREQ(argv[i], "--partial-loads-ok=no"))
-         VG_(clo_partial_loads_ok) = False;
-
-      else if (STREQ(argv[i], "--leak-check=yes"))
-         VG_(clo_leak_check) = True;
-      else if (STREQ(argv[i], "--leak-check=no"))
-         VG_(clo_leak_check) = False;
-
-      else if (STREQ(argv[i], "--show-reachable=yes"))
-         VG_(clo_show_reachable) = True;
-      else if (STREQ(argv[i], "--show-reachable=no"))
-         VG_(clo_show_reachable) = False;
-
-      else if (STREQ(argv[i], "--leak-resolution=low"))
-         VG_(clo_leak_resolution) = 2;
-      else if (STREQ(argv[i], "--leak-resolution=med"))
-         VG_(clo_leak_resolution) = 4;
-      else if (STREQ(argv[i], "--leak-resolution=high"))
-         VG_(clo_leak_resolution) = VG_DEEPEST_BACKTRACE;
-
       else if (STREQ(argv[i], "--sloppy-malloc=yes"))
          VG_(clo_sloppy_malloc) = True;
       else if (STREQ(argv[i], "--sloppy-malloc=no"))
@@ -783,32 +836,27 @@
       else if (STREQ(argv[i], "--trace-children=no"))
          VG_(clo_trace_children) = False;
 
-      else if (STREQ(argv[i], "--workaround-gcc296-bugs=yes"))
-         VG_(clo_workaround_gcc296_bugs) = True;
-      else if (STREQ(argv[i], "--workaround-gcc296-bugs=no"))
-         VG_(clo_workaround_gcc296_bugs) = False;
-
       else if (STREQN(15, argv[i], "--sanity-level="))
          VG_(sanity_level) = (Int)VG_(atoll)(&argv[i][15]);
 
       else if (STREQN(13, argv[i], "--logfile-fd="))
          eventually_logfile_fd = (Int)VG_(atoll)(&argv[i][13]);
 
-      else if (STREQN(15, argv[i], "--freelist-vol=")) {
-         VG_(clo_freelist_vol) = (Int)VG_(atoll)(&argv[i][15]);
-         if (VG_(clo_freelist_vol) < 0) VG_(clo_freelist_vol) = 2;
-      }
-
       else if (STREQN(15, argv[i], "--suppressions=")) {
          if (VG_(clo_n_suppressions) >= VG_CLO_MAX_SFILES) {
-            VG_(message)(Vg_UserMsg, "Too many logfiles specified.");
+            VG_(message)(Vg_UserMsg, "Too many suppression files specified.");
             VG_(message)(Vg_UserMsg, 
                          "Increase VG_CLO_MAX_SFILES and recompile.");
-            bad_option(argv[i]);
+            VG_(bad_option)(argv[i]);
          }
          VG_(clo_suppressions)[VG_(clo_n_suppressions)] = &argv[i][15];
          VG_(clo_n_suppressions)++;
       }
+      else if (STREQ(argv[i], "--profile=yes"))
+         VG_(clo_profile) = True;
+      else if (STREQ(argv[i], "--profile=no"))
+         VG_(clo_profile) = False;
+
       else if (STREQ(argv[i], "--single-step=yes"))
          VG_(clo_single_step) = True;
       else if (STREQ(argv[i], "--single-step=no"))
@@ -819,35 +867,26 @@
       else if (STREQ(argv[i], "--optimise=no"))
          VG_(clo_optimise) = False;
 
-      else if (STREQ(argv[i], "--instrument=yes"))
-         VG_(clo_instrument) = True;
-      else if (STREQ(argv[i], "--instrument=no"))
-         VG_(clo_instrument) = False;
-
-      else if (STREQ(argv[i], "--cleanup=yes"))
-         VG_(clo_cleanup) = True;
-      else if (STREQ(argv[i], "--cleanup=no"))
-         VG_(clo_cleanup) = False;
-
-      else if (STREQ(argv[i], "--cachesim=yes"))
-         VG_(clo_cachesim) = True;     
-      else if (STREQ(argv[i], "--cachesim=no"))
-         VG_(clo_cachesim) = False;
-
-      /* 5 is length of "--I1=" */
-      else if (0 == VG_(strncmp)(argv[i], "--I1=",    5))
-         parse_cache_opt(&VG_(clo_I1_cache), argv[i], 5);
-      else if (0 == VG_(strncmp)(argv[i], "--D1=",    5))
-         parse_cache_opt(&VG_(clo_D1_cache), argv[i], 5);
-      else if (0 == VG_(strncmp)(argv[i], "--L2=",    5))
-         parse_cache_opt(&VG_(clo_L2_cache), argv[i], 5);
-
-      else if (STREQ(argv[i], "--smc-check=none"))
-         VG_(clo_smc_check) = VG_CLO_SMC_NONE;
-      else if (STREQ(argv[i], "--smc-check=some"))
-         VG_(clo_smc_check) = VG_CLO_SMC_SOME;
-      else if (STREQ(argv[i], "--smc-check=all"))
-         VG_(clo_smc_check) = VG_CLO_SMC_ALL;
+      /* "vwxyz" --> 000zyxwv (binary) */
+      else if (STREQN(16, argv[i], "--trace-codegen=")) {
+         Int j;
+         char* opt = & argv[i][16];
+   
+         if (5 != VG_(strlen)(opt)) {
+            VG_(message)(Vg_UserMsg, 
+                         "--trace-codegen argument must have 5 digits");
+            VG_(bad_option)(argv[i]);
+         }
+         for (j = 0; j < 5; j++) {
+            if      ('0' == opt[j]) { /* do nothing */ }
+            else if ('1' == opt[j]) VG_(clo_trace_codegen) |= (1 << j);
+            else {
+               VG_(message)(Vg_UserMsg, "--trace-codegen argument can only "
+                                        "contain 0s and 1s");
+               VG_(bad_option)(argv[i]);
+            }
+         }
+      }
 
       else if (STREQ(argv[i], "--trace-syscalls=yes"))
          VG_(clo_trace_syscalls) = True;
@@ -899,8 +938,13 @@
             VG_(clo_backtrace_size) = VG_DEEPEST_BACKTRACE;
       }
 
+      else if (VG_(needs).command_line_options) {
+         Bool ok = SK_(process_cmd_line_option)(argv[i]);
+         if (!ok)
+            usage();
+      }
       else
-         bad_option(argv[i]);
+         usage();
    }
 
 #  undef ISSPACE
@@ -917,7 +961,7 @@
       VG_(message)(Vg_UserMsg, 
          "Invalid --alignment= setting.  "
          "Should be a power of 2, >= 4, <= 4096.");
-      bad_option("--alignment");
+      VG_(bad_option)("--alignment");
    }
 
    if (VG_(clo_GDB_attach) && VG_(clo_trace_children)) {
@@ -926,26 +970,14 @@
          "--gdb-attach=yes conflicts with --trace-children=yes");
       VG_(message)(Vg_UserMsg, 
          "Please choose one or the other, but not both.");
-      bad_option("--gdb-attach=yes and --trace-children=yes");
+      VG_(bad_option)("--gdb-attach=yes and --trace-children=yes");
    }
 
    VG_(clo_logfile_fd) = eventually_logfile_fd;
 
-   /* Don't do memory checking if simulating the cache. */
-   if (VG_(clo_cachesim)) {
-       VG_(clo_instrument) = False;
-   }
-
    if (VG_(clo_verbosity > 0)) {
-      if (VG_(clo_cachesim)) {
-         VG_(message)(Vg_UserMsg, 
-            "cachegrind-%s, an I1/D1/L2 cache profiler for x86 GNU/Linux.",
-            VERSION);
-      } else {
-         VG_(message)(Vg_UserMsg, 
-            "valgrind-%s, a memory error detector for x86 GNU/Linux.",
-            VERSION);
-      }
+      VG_(message)(Vg_UserMsg, "%s-%s, %s for x86 GNU/Linux.",
+         VG_(needs).name, VERSION, VG_(needs).description);
    }
 
    if (VG_(clo_verbosity > 0))
@@ -958,12 +990,12 @@
       }
    }
 
-   if (VG_(clo_n_suppressions) == 0 && !VG_(clo_cachesim)) {
+   if (VG_(clo_n_suppressions) == 0 && 
+       (VG_(needs).core_errors || VG_(needs).skin_errors)) {
       config_error("No error-suppression files were specified.");
    }
 }
 
-
 /* ---------------------------------------------------------------------
    Copying to/from m_state_static.
    ------------------------------------------------------------------ */
@@ -1015,11 +1047,40 @@
          = VG_(m_state_static)[40/4 + i];
 }
 
+Addr VG_(get_stack_pointer) ( void )
+{
+   return VG_(baseBlock)[VGOFF_(m_esp)];
+}
+
+/* Some random tests needed for leak checking */
+
+Bool VG_(within_stack)(Addr a)
+{
+   if (a >= ((Addr)(&VG_(stack)))
+       && a <= ((Addr)(&VG_(stack))) + sizeof(VG_(stack)))
+      return True;
+   else
+      return False;
+}
+
+Bool VG_(within_m_state_static)(Addr a)
+{
+   if (a >= ((Addr)(&VG_(m_state_static)))
+       && a <= ((Addr)(&VG_(m_state_static))) + sizeof(VG_(m_state_static)))
+      return True;
+   else
+      return False;
+}
 
 /* ---------------------------------------------------------------------
    Show accumulated counts.
    ------------------------------------------------------------------ */
 
+static __inline__ Int safe_idiv(Int a, Int b)
+{
+   return (b == 0 ? 0 : a / b);
+}
+
 static void vg_show_counts ( void )
 {
    VG_(message)(Vg_DebugMsg,
@@ -1027,13 +1088,17 @@
 		VG_(current_epoch),
                 VG_(number_of_lrus) );
    VG_(message)(Vg_DebugMsg,
-                "translate: new %d (%d -> %d), discard %d (%d -> %d).",
+                "translate: new     %d (%d -> %d; ratio %d:10)",
                 VG_(overall_in_count),
                 VG_(overall_in_osize),
                 VG_(overall_in_tsize),
+                safe_idiv(10*VG_(overall_in_tsize), VG_(overall_in_osize)));
+   VG_(message)(Vg_DebugMsg,
+                "           discard %d (%d -> %d; ratio %d:10).",
                 VG_(overall_out_count),
                 VG_(overall_out_osize),
-                VG_(overall_out_tsize) );
+                VG_(overall_out_tsize),
+                safe_idiv(10*VG_(overall_out_tsize), VG_(overall_out_osize)));
    VG_(message)(Vg_DebugMsg,
       " dispatch: %lu basic blocks, %d/%d sched events, %d tt_fast misses.", 
       VG_(bbs_done), VG_(num_scheduling_events_MAJOR), 
@@ -1050,6 +1115,7 @@
                 "   sanity: %d cheap, %d expensive checks.",
                 VG_(sanity_fast_count), 
                 VG_(sanity_slow_count) );
+   VG_(print_ccall_stats)();
 }
 
 
@@ -1072,21 +1138,32 @@
       VG_(stack)[10000-1-i] = (UInt)(&VG_(stack)[10000-i-1]) ^ 0xABCD4321;
    }
 
-   /* Set up baseBlock offsets and copy the saved machine's state into
-      it. */
+   /* Setup stuff that depends on the skin.  Must be before:
+      - vg_init_baseBlock(): to register helpers
+      - process_cmd_line_options(): to register skin name and description,
+        and turn on/off 'command_line_options' need
+      - init_memory() (to setup memory event trackers).
+    */
+   SK_(pre_clo_init) ( & VG_(needs), & VG_(track_events) );
+   sanity_check_needs();
+
+   /* Set up baseBlock offsets and copy the saved machine's state into it. */
    vg_init_baseBlock();
    VG_(copy_m_state_static_to_baseBlock)();
+   vg_init_shadow_regs();
 
    /* Process Valgrind's command-line opts (from env var VG_OPTS). */
    process_cmd_line_options();
 
    /* Hook to delay things long enough so we can get the pid and
       attach GDB in another shell. */
-   if (0) { 
+#if 0
+   { 
       Int p, q;
       for (p = 0; p < 50000; p++)
          for (q = 0; q < 50000; q++) ;
    }
+#endif
 
    /* Initialise the scheduler, and copy the client's state from
       baseBlock into VG_(threads)[1].  This has to come before signal
@@ -1098,31 +1175,34 @@
    VG_(sigstartup_actions)();
 
    /* Perhaps we're profiling Valgrind? */
-#  ifdef VG_PROFILE
-   VGP_(init_profiling)();
-#  endif
+   if (VG_(clo_profile))
+      VGP_(init_profiling)();
 
    /* Start calibration of our RDTSC-based clock. */
    VG_(start_rdtsc_calibration)();
 
-   if (VG_(clo_instrument) || VG_(clo_cachesim)) {
-      VGP_PUSHCC(VgpInitAudit);
-      VGM_(init_memory_audit)();
-      VGP_POPCC;
-   }
+   /* Do this here just to give rdtsc calibration more time */
+   SK_(post_clo_init)();
 
-   VGP_PUSHCC(VgpReadSyms);
-   VG_(read_symbols)();
-   VGP_POPCC;
+   /* Must come after SK_(init) so memory handler accompaniments (eg.
+    * shadow memory) can be setup ok */
+   VGP_PUSHCC(VgpInitMem);
+   VG_(init_memory)();
+   VGP_POPCC(VgpInitMem);
+
+   /* Read the list of errors to suppress.  This should be found in
+      the file specified by vg_clo_suppressions. */
+   if (VG_(needs).core_errors || VG_(needs).skin_errors)
+      VG_(load_suppressions)();
 
    /* End calibration of our RDTSC-based clock, leaving it as long as
       we can. */
    VG_(end_rdtsc_calibration)();
 
-   /* This should come after init_memory_audit; otherwise the latter
-      carefully sets up the permissions maps to cover the anonymous
-      mmaps for the translation table and translation cache, which
-      wastes > 20M of virtual address space. */
+   /* This should come after init_memory_and_symbols(); otherwise the 
+      latter carefully sets up the permissions maps to cover the 
+      anonymous mmaps for the translation table and translation cache, 
+      which wastes > 20M of virtual address space. */
    VG_(init_tt_tc)();
 
    if (VG_(clo_verbosity) == 1) {
@@ -1132,26 +1212,18 @@
 
    /* Now it is safe for malloc et al in vg_clientmalloc.c to act
       instrumented-ly. */
-   VG_(running_on_simd_CPU) = True;
-   if (VG_(clo_instrument)) {
-      VGM_(make_readable) ( (Addr)&VG_(running_on_simd_CPU), 1 );
-      VGM_(make_readable) ( (Addr)&VG_(clo_instrument), 1 );
-      VGM_(make_readable) ( (Addr)&VG_(clo_trace_malloc), 1 );
-      VGM_(make_readable) ( (Addr)&VG_(clo_sloppy_malloc), 1 );
-   }
-
-   if (VG_(clo_cachesim)) 
-      VG_(init_cachesim)();
-
    if (VG_(clo_verbosity) > 0)
       VG_(message)(Vg_UserMsg, "");
 
    VG_(bbs_to_go) = VG_(clo_stop_after);
 
+
    /* Run! */
+   VG_(running_on_simd_CPU) = True;
    VGP_PUSHCC(VgpSched);
    src = VG_(scheduler)();
-   VGP_POPCC;
+   VGP_POPCC(VgpSched);
+   VG_(running_on_simd_CPU) = False;
 
    if (VG_(clo_verbosity) > 0)
       VG_(message)(Vg_UserMsg, "");
@@ -1161,25 +1233,19 @@
         "Warning: pthread scheduler exited due to deadlock");
    }
 
-   if (VG_(clo_instrument)) {
+   if (VG_(needs).core_errors || VG_(needs).skin_errors)
       VG_(show_all_errors)();
-      VG_(clientmalloc_done)();
-      if (VG_(clo_verbosity) == 1) {
-         VG_(message)(Vg_UserMsg, 
-                      "For counts of detected errors, rerun with: -v");
-      }
-      if (VG_(clo_leak_check)) VG_(detect_memory_leaks)();
-   }
-   VG_(running_on_simd_CPU) = False;
 
-   if (VG_(clo_cachesim))
-      VG_(do_cachesim_results)(VG_(client_argc), VG_(client_argv));
+   SK_(fini)();
 
    VG_(do_sanity_checks)( True /*include expensive checks*/ );
 
    if (VG_(clo_verbosity) > 1)
       vg_show_counts();
 
+   if (VG_(clo_verbosity) > 2)
+      VG_(print_UInstr_histogram)();
+
    if (0) {
       VG_(message)(Vg_DebugMsg, "");
       VG_(message)(Vg_DebugMsg, 
@@ -1189,16 +1255,10 @@
       VG_(message)(Vg_DebugMsg, 
          "------ Valgrind's ExeContext management stats follow ------" );
       VG_(show_ExeContext_stats)();
-      VG_(message)(Vg_DebugMsg, 
-         "------ Valgrind's client block stats follow ---------------" );
-      VG_(show_client_block_stats)();
    }
  
-#  ifdef VG_PROFILE
-   VGP_(done_profiling)();
-#  endif
-
-   VG_(done_prof_mem)();
+   if (VG_(clo_profile))
+      VGP_(done_profiling)();
 
    VG_(shutdown_logging)();
 
@@ -1220,9 +1280,10 @@
                    && VG_(last_run_tid) < VG_N_THREADS);
          tst = & VG_(threads)[VG_(last_run_tid)];
          vg_assert(tst->status == VgTs_Runnable);
-         /* The thread's %EBX will hold the arg to exit(), so we just
-            do exit with that arg. */
-         VG_(exit)( tst->m_ebx );
+         /* The thread's %EBX at the time it did __NR_exit() will hold
+            the arg to __NR_exit(), so we just do __NR_exit() with
+            that arg. */
+         VG_(exit)( VG_(exitcode) );
          /* NOT ALIVE HERE! */
          VG_(panic)("entered the afterlife in vg_main() -- ExitSyscall");
          break; /* what the hell :) */
@@ -1267,6 +1328,10 @@
    tracing into child processes.  To make this work the build system
    also supplies a dummy file, "valgrinq.so". 
 
+   Also replace "vgskin_<foo>.so" with whitespace, for the same reason;
+   without it, child processes try to find valgrind.so symbols in the 
+   skin .so.
+
    Also look for $(libdir)/lib/valgrind in LD_LIBRARY_PATH and change
    it to $(libdir)/lib/valgrinq, so as to make our libpthread.so
    disappear.  
@@ -1274,20 +1339,22 @@
 void VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH) ( Char* ld_preload_str,
                                                 Char* ld_library_path_str )
 {
-   Char* p_prel = NULL;
-   Char* p_path = NULL;
-   Int   what = 0;
+   Char* p_prel  = NULL;
+   Char* sk_prel = NULL;
+   Char* p_path  = NULL;
+   Int   what    = 0;
    if (ld_preload_str == NULL || ld_library_path_str == NULL)
       goto mutancy;
 
    /* VG_(printf)("%s %s\n", ld_preload_str, ld_library_path_str); */
 
    p_prel = VG_(strstr)(ld_preload_str, "valgrind.so");
+   sk_prel = VG_(strstr)(ld_preload_str, "vgskin_");
    p_path = VG_(strstr)(ld_library_path_str, VG_LIBDIR);
 
+   what = 1;
    if (p_prel == NULL) {
       /* perhaps already happened? */
-      what = 1;
       if (VG_(strstr)(ld_preload_str, "valgrinq.so") == NULL)
          goto mutancy;
       if (VG_(strstr)(ld_library_path_str, "lib/valgrinq") == NULL)
@@ -1296,10 +1363,30 @@
    }
 
    what = 2;
+   if (sk_prel == NULL) goto mutancy;
+
+   what = 3;
    if (p_path == NULL) goto mutancy;
 
+   what = 4;
+   {  
+      /* Blank from "vgskin_" back to prev. LD_PRELOAD entry, or start */
+      Char* p = sk_prel;
+      while (*p != ':' && p > ld_preload_str) { 
+         *p = ' ';
+         p--;
+      }
+      /* Blank from "vgskin_" to next LD_PRELOAD entry */
+      while (*p != ':' && *p != '\0') { 
+         *p = ' ';
+         p++;
+      }
+      if (*p == '\0') goto mutancy;    /* valgrind.so has disappeared?! */
+      *p = ' ';                        /* blank ending ':' */
+   }
+
    /* in LD_PRELOAD, turn valgrind.so into valgrinq.so. */
-   what = 3;
+   what = 5;
    if (p_prel[7] != 'd') goto mutancy;
    p_prel[7] = 'q';
 
@@ -1307,10 +1394,10 @@
       .../lib/valgrind .../lib/valgrinq, which doesn't exist,
       so that our own libpthread.so goes out of scope. */
    p_path += VG_(strlen)(VG_LIBDIR);
-   what = 4;
+   what = 6;
    if (p_path[0] != '/') goto mutancy;
    p_path++; /* step over / */
-   what = 5;
+   what = 7;
    if (p_path[7] != 'd') goto mutancy;
    p_path[7] = 'q';
    return;
@@ -1406,6 +1493,70 @@
 }
 
 
+/* ---------------------------------------------------------------------
+   Sanity check machinery (permanently engaged).
+   ------------------------------------------------------------------ */
+
+/* A fast sanity check -- suitable for calling circa once per
+   millisecond. */
+
+void VG_(do_sanity_checks) ( Bool force_expensive )
+{
+   Int          i;
+
+   if (VG_(sanity_level) < 1) return;
+
+   /* --- First do all the tests that we can do quickly. ---*/
+
+   VG_(sanity_fast_count)++;
+
+   /* Check that we haven't overrun our private stack. */
+   for (i = 0; i < 10; i++) {
+      vg_assert(VG_(stack)[i]
+                == ((UInt)(&VG_(stack)[i]) ^ 0xA4B3C2D1));
+      vg_assert(VG_(stack)[10000-1-i] 
+                == ((UInt)(&VG_(stack)[10000-i-1]) ^ 0xABCD4321));
+   }
+
+   /* Check stuff pertaining to the memory check system. */
+
+   /* Check that nobody has spuriously claimed that the first or
+      last 16 pages of memory have become accessible [...] */
+   if (VG_(needs).sanity_checks)
+      vg_assert(SK_(cheap_sanity_check)());
+
+   /* --- Now some more expensive checks. ---*/
+
+   /* Once every 25 times, check some more expensive stuff. */
+   if ( force_expensive
+     || VG_(sanity_level) > 1
+     || (VG_(sanity_level) == 1 && (VG_(sanity_fast_count) % 25) == 0)) {
+
+      VG_(sanity_slow_count)++;
+
+#     if 0
+      { void zzzmemscan(void); zzzmemscan(); }
+#     endif
+
+      if ((VG_(sanity_fast_count) % 250) == 0)
+         VG_(sanity_check_tc_tt)();
+
+      if (VG_(needs).sanity_checks) {
+          vg_assert(SK_(expensive_sanity_check)());
+      }
+      /* 
+      if ((VG_(sanity_fast_count) % 500) == 0) VG_(mallocSanityCheckAll)(); 
+      */
+   }
+
+   if (VG_(sanity_level) > 1) {
+      /* Check sanity of the low-level memory manager.  Note that bugs
+         in the client's code can cause this to fail, so we don't do
+         this check unless specially asked for.  And because it's
+         potentially very expensive. */
+      VG_(mallocSanityCheckAll)();
+   }
+}
 /*--------------------------------------------------------------------*/
 /*--- end                                                vg_main.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_malloc2.c b/coregrind/vg_malloc2.c
index 87f580d..92358c1 100644
--- a/coregrind/vg_malloc2.c
+++ b/coregrind/vg_malloc2.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 
@@ -178,13 +178,14 @@
 /* It is important that this library is self-initialising, because it
    may get called very early on -- as a result of C++ static
    constructor initialisations -- before Valgrind itself is
-   initialised.  Hence vg_malloc() and vg_free() below always call
-   ensure_mm_init() to ensure things are correctly initialised.  */
+   initialised.  Hence VG_(arena_malloc)() and VG_(arena_free)() below always
+   call ensure_mm_init() to ensure things are correctly initialised.  */
 
 static
 void ensure_mm_init ( void )
 {
    static Bool init_done = False;
+
    if (init_done) return;
 
    /* Use a checked red zone size of 1 word for our internal stuff,
@@ -194,22 +195,28 @@
       which merely checks at the time of freeing that the red zone
       words are unchanged. */
 
-   arena_init ( &vg_arena[VG_AR_PRIVATE], "private ", 
+   arena_init ( &vg_arena[VG_AR_CORE],      "core    ", 
                 1, True, 262144 );
 
-   arena_init ( &vg_arena[VG_AR_SYMTAB],  "symtab  ", 
+   arena_init ( &vg_arena[VG_AR_SKIN],      "skin    ", 
                 1, True, 262144 );
 
-   arena_init ( &vg_arena[VG_AR_CLIENT],  "client  ",  
+   arena_init ( &vg_arena[VG_AR_SYMTAB],    "symtab  ", 
+                1, True, 262144 );
+
+   arena_init ( &vg_arena[VG_AR_JITTER],    "JITter  ", 
+                1, True, 8192 );
+
+   arena_init ( &vg_arena[VG_AR_CLIENT],    "client  ",  
                 VG_AR_CLIENT_REDZONE_SZW, False, 262144 );
 
-   arena_init ( &vg_arena[VG_AR_DEMANGLE], "demangle",  
+   arena_init ( &vg_arena[VG_AR_DEMANGLE],  "demangle",  
                 4 /*paranoid*/, True, 16384 );
 
-   arena_init ( &vg_arena[VG_AR_EXECTXT],  "exectxt ",  
+   arena_init ( &vg_arena[VG_AR_EXECTXT],   "exectxt ",  
                 1, True, 16384 );
 
-   arena_init ( &vg_arena[VG_AR_ERRCTXT],  "errctxt ",  
+   arena_init ( &vg_arena[VG_AR_ERRORS],    "errors  ",  
                 1, True, 16384 );
 
    arena_init ( &vg_arena[VG_AR_TRANSIENT], "transien",  
@@ -692,7 +699,7 @@
 
 
 /* Sanity check both the superblocks and the chains. */
-void VG_(mallocSanityCheckArena) ( ArenaId aid )
+static void mallocSanityCheckArena ( ArenaId aid )
 {
    Int         i, superblockctr, b_bszW, b_pszW, blockctr_sb, blockctr_li;
    Int         blockctr_sb_free, listno, list_min_pszW, list_max_pszW;
@@ -703,7 +710,7 @@
    UInt        arena_bytes_on_loan;
    Arena*      a;
 
-#  define BOMB VG_(panic)("vg_mallocSanityCheckArena")
+#  define BOMB VG_(panic)("mallocSanityCheckArena")
 
    a = arenaId_to_ArenaP(aid);
    
@@ -722,15 +729,15 @@
          b     = &sb->payload_words[i];
          b_bszW = get_bszW_lo(b);
          if (!blockSane(a, b)) {
-            VG_(printf)( "mallocSanityCheck: sb %p, block %d (bszW %d): "
-                         "BAD\n",
+            VG_(printf)("mallocSanityCheckArena: sb %p, block %d (bszW %d): "
+                        " BAD\n",
                          sb, i, b_bszW );
             BOMB;
          }
          thisFree = !is_inuse_bszW(b_bszW);
          if (thisFree && lastWasFree) {
-            VG_(printf)( "mallocSanityCheck: sb %p, block %d (bszW %d): "
-                         "UNMERGED FREES\n",
+            VG_(printf)("mallocSanityCheckArena: sb %p, block %d (bszW %d): "
+                        "UNMERGED FREES\n",
                          sb, i, b_bszW );
             BOMB;
          }
@@ -741,7 +748,7 @@
          i += mk_plain_bszW(b_bszW);
       }
       if (i > sb->n_payload_words) {
-         VG_(printf)( "mallocSanityCheck: sb %p: last block "
+         VG_(printf)( "mallocSanityCheckArena: sb %p: last block "
                       "overshoots end\n", sb);
          BOMB;
       }
@@ -750,7 +757,7 @@
 
    if (arena_bytes_on_loan != a->bytes_on_loan) {
             VG_(printf)( 
-                    "mallocSanityCheck: a->bytes_on_loan %d, "
+                    "mallocSanityCheckArena: a->bytes_on_loan %d, "
                     "arena_bytes_on_loan %d: "
                     "MISMATCH\n", a->bytes_on_loan, arena_bytes_on_loan);
       ppSuperblocks(a);
@@ -770,7 +777,7 @@
          b_prev = b;
          b = get_next_p(b);
          if (get_prev_p(b) != b_prev) {
-            VG_(printf)( "mallocSanityCheck: list %d at %p: "
+            VG_(printf)( "mallocSanityCheckArena: list %d at %p: "
                          "BAD LINKAGE\n", 
                          listno, b );
             BOMB;
@@ -778,7 +785,7 @@
          b_pszW = bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(b)));
          if (b_pszW < list_min_pszW || b_pszW > list_max_pszW) {
             VG_(printf)( 
-               "mallocSanityCheck: list %d at %p: "
+               "mallocSanityCheckArena: list %d at %p: "
                "WRONG CHAIN SIZE %d (%d, %d)\n", 
                listno, b, b_pszW, list_min_pszW, list_max_pszW );
             BOMB;
@@ -790,7 +797,7 @@
 
    if (blockctr_sb_free != blockctr_li) {
       VG_(printf)( 
-         "mallocSanityCheck: BLOCK COUNT MISMATCH "
+         "mallocSanityCheckArena: BLOCK COUNT MISMATCH "
          "(via sbs %d, via lists %d)\n",
          blockctr_sb_free, blockctr_li );
       ppSuperblocks(a);
@@ -813,7 +820,7 @@
 {
    Int i;
    for (i = 0; i < VG_N_ARENAS; i++)
-      VG_(mallocSanityCheckArena) ( i );
+      mallocSanityCheckArena ( i );
 }
 
 
@@ -828,6 +835,7 @@
    Superblock* sb;
    WordF*      b;
    Int         b_bszW;
+
    ensure_mm_init();
    a = arenaId_to_ArenaP(aid);
    for (sb = a->sblocks; sb != NULL; sb = sb->next) {
@@ -845,10 +853,10 @@
 
 
 /*------------------------------------------------------------*/
-/*--- Externally-visible functions.                        ---*/
+/*--- Core-visible functions.                              ---*/
 /*------------------------------------------------------------*/
 
-void* VG_(malloc) ( ArenaId aid, Int req_pszB )
+void* VG_(arena_malloc) ( ArenaId aid, Int req_pszB )
 {
    Int         req_pszW, req_bszW, frag_bszW, b_bszW, lno;
    Superblock* new_sb;
@@ -943,15 +951,15 @@
       a->bytes_on_loan_max = a->bytes_on_loan;
 
 #  ifdef DEBUG_MALLOC
-   VG_(mallocSanityCheckArena)(aid);
+   mallocSanityCheckArena(aid);
 #  endif
 
-   VGP_POPCC;
+   VGP_POPCC(VgpMalloc);
    return first_to_payload(a, b);
 }
 
  
-void VG_(free) ( ArenaId aid, void* ptr )
+void VG_(arena_free) ( ArenaId aid, void* ptr )
 {
    Superblock* sb;
    UInt*       sb_payl_firstw;
@@ -966,8 +974,11 @@
    ensure_mm_init();
    a = arenaId_to_ArenaP(aid);
 
-   if (ptr == NULL) return;
-
+   if (ptr == NULL) {
+      VGP_POPCC(VgpMalloc);
+      return;
+   }
+      
    ch = payload_to_first(a, ptr);
 
 #  ifdef DEBUG_MALLOC
@@ -1026,10 +1037,10 @@
    }
 
 #  ifdef DEBUG_MALLOC
-   VG_(mallocSanityCheckArena)(aid);
+   mallocSanityCheckArena(aid);
 #  endif
 
-   VGP_POPCC;
+   VGP_POPCC(VgpMalloc);
 }
 
 
@@ -1065,13 +1076,15 @@
    .    .               .   .   .               .   .
 
 */
-void* VG_(malloc_aligned) ( ArenaId aid, Int req_alignB, Int req_pszB )
+void* VG_(arena_malloc_aligned) ( ArenaId aid, Int req_alignB, Int req_pszB )
 {
    Int    req_alignW, req_pszW, base_pszW_req, base_pszW_act, frag_bszW;
    Word   *base_b, *base_p, *align_p;
    UInt   saved_bytes_on_loan;
    Arena* a;
 
+   VGP_PUSHCC(VgpMalloc);
+
    ensure_mm_init();
    a = arenaId_to_ArenaP(aid);
 
@@ -1091,7 +1104,7 @@
          break;
       default:
          VG_(printf)("vg_malloc_aligned(%p, %d, %d)\nbad alignment request", 
-                     a, req_pszB, req_alignB );
+                     a, req_alignB, req_pszB );
          VG_(panic)("vg_malloc_aligned");
          /*NOTREACHED*/
    }
@@ -1112,7 +1125,7 @@
    /* Payload ptr for the block we are going to split.  Note this
       changes a->bytes_on_loan; we save and restore it ourselves. */
    saved_bytes_on_loan = a->bytes_on_loan;
-   base_p = VG_(malloc) ( aid, base_pszW_req * VKI_BYTES_PER_WORD );
+   base_p = VG_(arena_malloc) ( aid, base_pszW_req * VKI_BYTES_PER_WORD );
    a->bytes_on_loan = saved_bytes_on_loan;
 
    /* Block ptr for the block we are going to split. */
@@ -1163,9 +1176,11 @@
       a->bytes_on_loan_max = a->bytes_on_loan;
 
 #  ifdef DEBUG_MALLOC
-   VG_(mallocSanityCheckArena)(aid);
+   mallocSanityCheckArena(aid);
 #  endif
 
+   VGP_POPCC(VgpMalloc);
+
    return align_p;
 }
 
@@ -1174,25 +1189,34 @@
 /*--- Services layered on top of malloc/free.              ---*/
 /*------------------------------------------------------------*/
 
-void* VG_(calloc) ( ArenaId aid, Int nmemb, Int nbytes )
+void* VG_(arena_calloc) ( ArenaId aid, Int nmemb, Int nbytes )
 {
    Int    i, size;
    UChar* p;
+
+   VGP_PUSHCC(VgpMalloc);
+
    size = nmemb * nbytes;
    vg_assert(size >= 0);
-   p = VG_(malloc) ( aid, size );
+   p = VG_(arena_malloc) ( aid, size );
    for (i = 0; i < size; i++) p[i] = 0;
+
+   VGP_POPCC(VgpMalloc);
+   
    return p;
 }
 
 
-void* VG_(realloc) ( ArenaId aid, void* ptr, Int req_pszB )
+void* VG_(arena_realloc) ( ArenaId aid, void* ptr, 
+                          Int req_alignB, Int req_pszB )
 {
    Arena* a;
    Int    old_bszW, old_pszW, old_pszB, i;
    UChar  *p_old, *p_new;
    UInt*  ch;
 
+   VGP_PUSHCC(VgpMalloc);
+
    ensure_mm_init();
    a = arenaId_to_ArenaP(aid);
 
@@ -1208,19 +1232,60 @@
    old_pszW = bszW_to_pszW(a, old_bszW);
    old_pszB = old_pszW * VKI_BYTES_PER_WORD;
 
-   if (req_pszB <= old_pszB) return ptr;
+   if (req_pszB <= old_pszB) {
+      VGP_POPCC(VgpMalloc);
+      return ptr;
+   }
 
-   p_new = VG_(malloc) ( aid, req_pszB );
+   if (req_alignB == 4)
+      p_new = VG_(arena_malloc) ( aid, req_pszB );
+   else
+      p_new = VG_(arena_malloc_aligned) ( aid, req_alignB, req_pszB );
+
    p_old = (UChar*)ptr;
    for (i = 0; i < old_pszB; i++)
       p_new[i] = p_old[i];
 
-   VG_(free)(aid, p_old);
+   VG_(arena_free)(aid, p_old);
+
+   VGP_POPCC(VgpMalloc);
    return p_new;
 }
 
 
 /*------------------------------------------------------------*/
+/*--- Skin-visible functions.                              ---*/
+/*------------------------------------------------------------*/
+
+/* All just wrappers to avoid exposing arenas to skins */
+
+void* VG_(malloc) ( Int nbytes )
+{
+   return VG_(arena_malloc) ( VG_AR_SKIN, nbytes );
+}
+
+void  VG_(free) ( void* ptr )
+{
+   VG_(arena_free) ( VG_AR_SKIN, ptr );
+}
+
+void* VG_(calloc) ( Int nmemb, Int nbytes )
+{
+   return VG_(arena_calloc) ( VG_AR_SKIN, nmemb, nbytes );
+}
+
+void* VG_(realloc) ( void* ptr, Int size )
+{
+   return VG_(arena_realloc) ( VG_AR_SKIN, ptr, /*alignment*/4, size );
+}
+
+void* VG_(malloc_aligned) ( Int req_alignB, Int req_pszB )
+{
+   return VG_(arena_malloc_aligned) ( VG_AR_SKIN, req_alignB, req_pszB );
+}
+
+
+/*------------------------------------------------------------*/
 /*--- The original test driver machinery.                  ---*/
 /*------------------------------------------------------------*/
 
@@ -1243,7 +1308,7 @@
 {
    Int i, j, k, nbytes, qq;
    unsigned char* chp;
-   Arena* a = &arena[VG_AR_PRIVATE];
+   Arena* a = &arena[VG_AR_CORE];
    srandom(1);
    for (i = 0; i < N_TEST_ARR; i++)
       test_arr[i] = NULL;
diff --git a/coregrind/vg_memory.c b/coregrind/vg_memory.c
index eea79cb..5ea4246 100644
--- a/coregrind/vg_memory.c
+++ b/coregrind/vg_memory.c
@@ -1,7 +1,7 @@
 
 /*--------------------------------------------------------------------*/
-/*--- Maintain bitmaps of memory, tracking the accessibility (A)   ---*/
-/*--- and validity (V) status of each byte.                        ---*/
+/*--- Memory-related stuff: segment initialisation and tracking,   ---*/
+/*--- stack operations                                             ---*/
 /*---                                                  vg_memory.c ---*/
 /*--------------------------------------------------------------------*/
 
@@ -27,1275 +27,208 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
 
-/* Define to debug the mem audit system. */
-/* #define VG_DEBUG_MEMORY */
 
-/* Define to debug the memory-leak-detector. */
-/* #define VG_DEBUG_LEAKCHECK */
+/*--------------------------------------------------------------*/
+/*--- Initialise program data/text etc on program startup.   ---*/
+/*--------------------------------------------------------------*/
 
-/* Define to collect detailed performance info. */
-/* #define VG_PROFILE_MEMORY */
-
-
-/*------------------------------------------------------------*/
-/*--- Low-level support for memory checking.               ---*/
-/*------------------------------------------------------------*/
-
-/* 
-   All reads and writes are checked against a memory map, which
-   records the state of all memory in the process.  The memory map is
-   organised like this:
-
-   The top 16 bits of an address are used to index into a top-level
-   map table, containing 65536 entries.  Each entry is a pointer to a
-   second-level map, which records the accesibililty and validity
-   permissions for the 65536 bytes indexed by the lower 16 bits of the
-   address.  Each byte is represented by nine bits, one indicating
-   accessibility, the other eight validity.  So each second-level map
-   contains 73728 bytes.  This two-level arrangement conveniently
-   divides the 4G address space into 64k lumps, each size 64k bytes.
-
-   All entries in the primary (top-level) map must point to a valid
-   secondary (second-level) map.  Since most of the 4G of address
-   space will not be in use -- ie, not mapped at all -- there is a
-   distinguished secondary map, which indicates `not addressible and
-   not valid' writeable for all bytes.  Entries in the primary map for
-   which the entire 64k is not in use at all point at this
-   distinguished map.
-
-   [...] lots of stuff deleted due to out of date-ness
-
-   As a final optimisation, the alignment and address checks for
-   4-byte loads and stores are combined in a neat way.  The primary
-   map is extended to have 262144 entries (2^18), rather than 2^16.
-   The top 3/4 of these entries are permanently set to the
-   distinguished secondary map.  For a 4-byte load/store, the
-   top-level map is indexed not with (addr >> 16) but instead f(addr),
-   where
-
-    f( XXXX XXXX XXXX XXXX ____ ____ ____ __YZ )
-        = ____ ____ ____ __YZ XXXX XXXX XXXX XXXX  or 
-        = ____ ____ ____ __ZY XXXX XXXX XXXX XXXX
-
-   ie the lowest two bits are placed above the 16 high address bits.
-   If either of these two bits are nonzero, the address is misaligned;
-   this will select a secondary map from the upper 3/4 of the primary
-   map.  Because this is always the distinguished secondary map, a
-   (bogus) address check failure will result.  The failure handling
-   code can then figure out whether this is a genuine addr check
-   failure or whether it is a possibly-legitimate access at a
-   misaligned address.  
-*/
-
-
-/*------------------------------------------------------------*/
-/*--- Crude profiling machinery.                           ---*/
-/*------------------------------------------------------------*/
-
-#ifdef VG_PROFILE_MEMORY
-
-#define N_PROF_EVENTS 150
-
-static UInt event_ctr[N_PROF_EVENTS];
-
-static void init_prof_mem ( void )
-{
-   Int i;
-   for (i = 0; i < N_PROF_EVENTS; i++)
-      event_ctr[i] = 0;
-}
-
-void VG_(done_prof_mem) ( void )
-{
-   Int i;
-   for (i = 0; i < N_PROF_EVENTS; i++) {
-      if ((i % 10) == 0) 
-         VG_(printf)("\n");
-      if (event_ctr[i] > 0)
-         VG_(printf)( "prof mem event %2d: %d\n", i, event_ctr[i] );
+typedef
+   struct _ExeSeg {
+      Addr start;
+      UInt size;
+      struct _ExeSeg* next;
    }
-   VG_(printf)("\n");
-}
+   ExeSeg;
 
-#define PROF_EVENT(ev)                                  \
-   do { vg_assert((ev) >= 0 && (ev) < N_PROF_EVENTS);   \
-        event_ctr[ev]++;                                \
-   } while (False);
+/* The list of current executable segments loaded.  Required so that when a
+   segment is munmap'd, if it's executable we can recognise it as such and
+   invalidate translations for it, and drop any basic-block specific
+   information being stored.  If symbols are being used, this list will have
+   the same segments recorded in it as the SegInfo symbols list (but much
+   less information about each segment).
+*/
+static ExeSeg* exeSegsHead = NULL;
 
-#else
-
-static void init_prof_mem ( void ) { }
-       void VG_(done_prof_mem) ( void ) { }
-
-#define PROF_EVENT(ev) /* */
-
-#endif
-
-/* Event index.  If just the name of the fn is given, this means the
-   number of calls to the fn.  Otherwise it is the specified event.
-
-   10   alloc_secondary_map
-
-   20   get_abit
-   21   get_vbyte
-   22   set_abit
-   23   set_vbyte
-   24   get_abits4_ALIGNED
-   25   get_vbytes4_ALIGNED
-
-   30   set_address_range_perms
-   31   set_address_range_perms(lower byte loop)
-   32   set_address_range_perms(quadword loop)
-   33   set_address_range_perms(upper byte loop)
+/* Prepend it -- mmaps/munmaps likely to follow a stack pattern(?) so this
+   is good.
+   Also check no segments overlap, which would be very bad.  Check is linear
+   for each seg added (quadratic overall) but the total number should be
+   small (konqueror has around 50 --njn). */
+static void add_exe_segment_to_list( a, len ) 
+{
+   Addr lo = a;
+   Addr hi = a + len - 1;
+   ExeSeg* es;
+   ExeSeg* es2;
    
-   35   make_noaccess
-   36   make_writable
-   37   make_readable
+   /* Prepend it */
+   es        = (ExeSeg*)VG_(arena_malloc)(VG_AR_CORE, sizeof(ExeSeg));
+   es->start = a;
+   es->size  = len;
+   es->next  = exeSegsHead;
+   exeSegsHead = es;
 
-   40   copy_address_range_perms
-   41   copy_address_range_perms(byte loop)
-   42   check_writable
-   43   check_writable(byte loop)
-   44   check_readable
-   45   check_readable(byte loop)
-   46   check_readable_asciiz
-   47   check_readable_asciiz(byte loop)
-
-   50   make_aligned_word_NOACCESS
-   51   make_aligned_word_WRITABLE
-
-   60   helperc_LOADV4
-   61   helperc_STOREV4
-   62   helperc_LOADV2
-   63   helperc_STOREV2
-   64   helperc_LOADV1
-   65   helperc_STOREV1
-
-   70   rim_rd_V4_SLOWLY
-   71   rim_wr_V4_SLOWLY
-   72   rim_rd_V2_SLOWLY
-   73   rim_wr_V2_SLOWLY
-   74   rim_rd_V1_SLOWLY
-   75   rim_wr_V1_SLOWLY
-
-   80   fpu_read
-   81   fpu_read aligned 4
-   82   fpu_read aligned 8
-   83   fpu_read 2
-   84   fpu_read 10
-
-   85   fpu_write
-   86   fpu_write aligned 4
-   87   fpu_write aligned 8
-   88   fpu_write 2
-   89   fpu_write 10
-
-   90   fpu_read_check_SLOWLY
-   91   fpu_read_check_SLOWLY(byte loop)
-   92   fpu_write_check_SLOWLY
-   93   fpu_write_check_SLOWLY(byte loop)
-
-   100  is_plausible_stack_addr
-   101  handle_esp_assignment
-   102  handle_esp_assignment(-4)
-   103  handle_esp_assignment(+4)
-   104  handle_esp_assignment(-12)
-   105  handle_esp_assignment(-8)
-   106  handle_esp_assignment(+16)
-   107  handle_esp_assignment(+12)
-   108  handle_esp_assignment(0)
-   109  handle_esp_assignment(+8)
-   110  handle_esp_assignment(-16)
-   111  handle_esp_assignment(+20)
-   112  handle_esp_assignment(-20)
-   113  handle_esp_assignment(+24)
-   114  handle_esp_assignment(-24)
-
-   120  vg_handle_esp_assignment_SLOWLY
-   121  vg_handle_esp_assignment_SLOWLY(normal; move down)
-   122  vg_handle_esp_assignment_SLOWLY(normal; move up)
-   123  vg_handle_esp_assignment_SLOWLY(normal)
-   124  vg_handle_esp_assignment_SLOWLY(>= HUGE_DELTA)
-*/
-
-/*------------------------------------------------------------*/
-/*--- Function declarations.                               ---*/
-/*------------------------------------------------------------*/
-
-/* Set permissions for an address range.  Not speed-critical. */
-void VGM_(make_noaccess) ( Addr a, UInt len );
-void VGM_(make_writable) ( Addr a, UInt len );
-void VGM_(make_readable) ( Addr a, UInt len );
-
-/* Check permissions for an address range.  Not speed-critical. */
-Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr );
-Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr );
-Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr );
-
-static UInt vgm_rd_V4_SLOWLY ( Addr a );
-static UInt vgm_rd_V2_SLOWLY ( Addr a );
-static UInt vgm_rd_V1_SLOWLY ( Addr a );
-static void vgm_wr_V4_SLOWLY ( Addr a, UInt vbytes );
-static void vgm_wr_V2_SLOWLY ( Addr a, UInt vbytes );
-static void vgm_wr_V1_SLOWLY ( Addr a, UInt vbytes );
-static void fpu_read_check_SLOWLY ( Addr addr, Int size );
-static void fpu_write_check_SLOWLY ( Addr addr, Int size );
-
-
-/*------------------------------------------------------------*/
-/*--- Data defns.                                          ---*/
-/*------------------------------------------------------------*/
-
-typedef 
-   struct {
-      UChar abits[8192];
-      UChar vbyte[65536];
-   }
-   SecMap;
-
-/* These two are statically allocated.  Should they be non-public? */
-SecMap* VG_(primary_map)[ /*65536*/ 262144 ];
-static SecMap  vg_distinguished_secondary_map;
-
-#define IS_DISTINGUISHED_SM(smap) \
-   ((smap) == &vg_distinguished_secondary_map)
-
-#define ENSURE_MAPPABLE(addr,caller)                                   \
-   do {                                                                \
-      if (IS_DISTINGUISHED_SM(VG_(primary_map)[(addr) >> 16])) {       \
-         VG_(primary_map)[(addr) >> 16] = alloc_secondary_map(caller); \
-         /* VG_(printf)("new 2map because of %p\n", addr);   */       \
-      }                                                                \
-   } while(0)
-
-#define BITARR_SET(aaa_p,iii_p)                         \
-   do {                                                 \
-      UInt   iii = (UInt)iii_p;                         \
-      UChar* aaa = (UChar*)aaa_p;                       \
-      aaa[iii >> 3] |= (1 << (iii & 7));                \
-   } while (0)
-
-#define BITARR_CLEAR(aaa_p,iii_p)                       \
-   do {                                                 \
-      UInt   iii = (UInt)iii_p;                         \
-      UChar* aaa = (UChar*)aaa_p;                       \
-      aaa[iii >> 3] &= ~(1 << (iii & 7));               \
-   } while (0)
-
-#define BITARR_TEST(aaa_p,iii_p)                        \
-      (0 != (((UChar*)aaa_p)[ ((UInt)iii_p) >> 3 ]      \
-               & (1 << (((UInt)iii_p) & 7))))           \
-
-
-#define VGM_BIT_VALID      0
-#define VGM_BIT_INVALID    1
-
-#define VGM_NIBBLE_VALID   0
-#define VGM_NIBBLE_INVALID 0xF
-
-#define VGM_BYTE_VALID     0
-#define VGM_BYTE_INVALID   0xFF
-
-/* Now in vg_include.h.
-#define VGM_WORD_VALID     0
-#define VGM_WORD_INVALID   0xFFFFFFFF
-*/
-
-#define VGM_EFLAGS_VALID   0xFFFFFFFE
-#define VGM_EFLAGS_INVALID 0xFFFFFFFF
-
-
-#define IS_ALIGNED4_ADDR(aaa_p) (0 == (((UInt)(aaa_p)) & 3))
-
-
-/*------------------------------------------------------------*/
-/*--- Basic bitmap management, reading and writing.        ---*/
-/*------------------------------------------------------------*/
-
-/* Allocate and initialise a secondary map. */
-
-static SecMap* alloc_secondary_map ( __attribute__ ((unused)) 
-                                     Char* caller )
-{
-   SecMap* map;
-   UInt  i;
-   PROF_EVENT(10);
-
-   /* Mark all bytes as invalid access and invalid value. */
-
-   /* It just happens that a SecMap occupies exactly 18 pages --
-      although this isn't important, so the following assert is
-      spurious. */
-   vg_assert(0 == (sizeof(SecMap) % VKI_BYTES_PER_PAGE));
-   map = VG_(get_memory_from_mmap)( sizeof(SecMap), caller );
-
-   for (i = 0; i < 8192; i++)
-      map->abits[i] = VGM_BYTE_INVALID; /* Invalid address */
-   for (i = 0; i < 65536; i++)
-      map->vbyte[i] = VGM_BYTE_INVALID; /* Invalid Value */
-
-   /* VG_(printf)("ALLOC_2MAP(%s)\n", caller ); */
-   return map;
-}
-
-
-/* Basic reading/writing of the bitmaps, for byte-sized accesses. */
-
-static __inline__ UChar get_abit ( Addr a )
-{
-   SecMap* sm     = VG_(primary_map)[a >> 16];
-   UInt    sm_off = a & 0xFFFF;
-   PROF_EVENT(20);
-   return BITARR_TEST(sm->abits, sm_off) 
-             ? VGM_BIT_INVALID : VGM_BIT_VALID;
-}
-
-static __inline__ UChar get_vbyte ( Addr a )
-{
-   SecMap* sm     = VG_(primary_map)[a >> 16];
-   UInt    sm_off = a & 0xFFFF;
-   PROF_EVENT(21);
-   return sm->vbyte[sm_off];
-}
-
-static __inline__ void set_abit ( Addr a, UChar abit )
-{
-   SecMap* sm;
-   UInt    sm_off;
-   PROF_EVENT(22);
-   ENSURE_MAPPABLE(a, "set_abit");
-   sm     = VG_(primary_map)[a >> 16];
-   sm_off = a & 0xFFFF;
-   if (abit) 
-      BITARR_SET(sm->abits, sm_off);
-   else
-      BITARR_CLEAR(sm->abits, sm_off);
-}
-
-static __inline__ void set_vbyte ( Addr a, UChar vbyte )
-{
-   SecMap* sm;
-   UInt    sm_off;
-   PROF_EVENT(23);
-   ENSURE_MAPPABLE(a, "set_vbyte");
-   sm     = VG_(primary_map)[a >> 16];
-   sm_off = a & 0xFFFF;
-   sm->vbyte[sm_off] = vbyte;
-}
-
-
-/* Reading/writing of the bitmaps, for aligned word-sized accesses. */
-
-static __inline__ UChar get_abits4_ALIGNED ( Addr a )
-{
-   SecMap* sm;
-   UInt    sm_off;
-   UChar   abits8;
-   PROF_EVENT(24);
-#  ifdef VG_DEBUG_MEMORY
-   vg_assert(IS_ALIGNED4_ADDR(a));
-#  endif
-   sm     = VG_(primary_map)[a >> 16];
-   sm_off = a & 0xFFFF;
-   abits8 = sm->abits[sm_off >> 3];
-   abits8 >>= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
-   abits8 &= 0x0F;
-   return abits8;
-}
-
-static UInt __inline__ get_vbytes4_ALIGNED ( Addr a )
-{
-   SecMap* sm     = VG_(primary_map)[a >> 16];
-   UInt    sm_off = a & 0xFFFF;
-   PROF_EVENT(25);
-#  ifdef VG_DEBUG_MEMORY
-   vg_assert(IS_ALIGNED4_ADDR(a));
-#  endif
-   return ((UInt*)(sm->vbyte))[sm_off >> 2];
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Setting permissions over address ranges.             ---*/
-/*------------------------------------------------------------*/
-
-static void set_address_range_perms ( Addr a, UInt len, 
-                                      UInt example_a_bit,
-                                      UInt example_v_bit )
-{
-   UChar   vbyte, abyte8;
-   UInt    vword4, sm_off;
-   SecMap* sm;
-
-   PROF_EVENT(30);
-
-   if (len == 0)
-      return;
-
-   if (len > 100 * 1000 * 1000) 
-      VG_(message)(Vg_UserMsg, 
-                   "Warning: set address range perms: "
-                   "large range %d, a %d, v %d",
-                   len, example_a_bit, example_v_bit );
-
-   VGP_PUSHCC(VgpSARP);
-
-   /* Requests to change permissions of huge address ranges may
-      indicate bugs in our machinery.  30,000,000 is arbitrary, but so
-      far all legitimate requests have fallen beneath that size. */
-   /* 4 Mar 02: this is just stupid; get rid of it. */
-   /* vg_assert(len < 30000000); */
-
-   /* Check the permissions make sense. */
-   vg_assert(example_a_bit == VGM_BIT_VALID 
-             || example_a_bit == VGM_BIT_INVALID);
-   vg_assert(example_v_bit == VGM_BIT_VALID 
-             || example_v_bit == VGM_BIT_INVALID);
-   if (example_a_bit == VGM_BIT_INVALID)
-      vg_assert(example_v_bit == VGM_BIT_INVALID);
-
-   /* The validity bits to write. */
-   vbyte = example_v_bit==VGM_BIT_VALID 
-              ? VGM_BYTE_VALID : VGM_BYTE_INVALID;
-
-   /* In order that we can charge through the address space at 8
-      bytes/main-loop iteration, make up some perms. */
-   abyte8 = (example_a_bit << 7)
-            | (example_a_bit << 6)
-            | (example_a_bit << 5)
-            | (example_a_bit << 4)
-            | (example_a_bit << 3)
-            | (example_a_bit << 2)
-            | (example_a_bit << 1)
-            | (example_a_bit << 0);
-   vword4 = (vbyte << 24) | (vbyte << 16) | (vbyte << 8) | vbyte;
-
-#  ifdef VG_DEBUG_MEMORY
-   /* Do it ... */
-   while (True) {
-      PROF_EVENT(31);
-      if (len == 0) break;
-      set_abit ( a, example_a_bit );
-      set_vbyte ( a, vbyte );
-      a++;
-      len--;
-   }
-
-#  else
-   /* Slowly do parts preceding 8-byte alignment. */
-   while (True) {
-      PROF_EVENT(31);
-      if (len == 0) break;
-      if ((a % 8) == 0) break;
-      set_abit ( a, example_a_bit );
-      set_vbyte ( a, vbyte );
-      a++;
-      len--;
-   }   
-
-   if (len == 0) {
-      VGP_POPCC;
-      return;
-   }
-   vg_assert((a % 8) == 0 && len > 0);
-
-   /* Once aligned, go fast. */
-   while (True) {
-      PROF_EVENT(32);
-      if (len < 8) break;
-      ENSURE_MAPPABLE(a, "set_address_range_perms(fast)");
-      sm = VG_(primary_map)[a >> 16];
-      sm_off = a & 0xFFFF;
-      sm->abits[sm_off >> 3] = abyte8;
-      ((UInt*)(sm->vbyte))[(sm_off >> 2) + 0] = vword4;
-      ((UInt*)(sm->vbyte))[(sm_off >> 2) + 1] = vword4;
-      a += 8;
-      len -= 8;
-   }
-
-   if (len == 0) {
-      VGP_POPCC;
-      return;
-   }
-   vg_assert((a % 8) == 0 && len > 0 && len < 8);
-
-   /* Finish the upper fragment. */
-   while (True) {
-      PROF_EVENT(33);
-      if (len == 0) break;
-      set_abit ( a, example_a_bit );
-      set_vbyte ( a, vbyte );
-      a++;
-      len--;
-   }   
-#  endif
-
-   /* Check that zero page and highest page have not been written to
-      -- this could happen with buggy syscall wrappers.  Today
-      (2001-04-26) had precisely such a problem with
-      __NR_setitimer. */
-   vg_assert(VG_(first_and_last_secondaries_look_plausible)());
-   VGP_POPCC;
-}
-
-
-/* Set permissions for address ranges ... */
-
-void VGM_(make_noaccess) ( Addr a, UInt len )
-{
-   PROF_EVENT(35);
-   set_address_range_perms ( a, len, VGM_BIT_INVALID, VGM_BIT_INVALID );
-}
-
-void VGM_(make_writable) ( Addr a, UInt len )
-{
-   PROF_EVENT(36);
-   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_INVALID );
-}
-
-void VGM_(make_readable) ( Addr a, UInt len )
-{
-   PROF_EVENT(37);
-   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID );
-}
-
-void VGM_(make_readwritable) ( Addr a, UInt len )
-{
-   PROF_EVENT(38);
-   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID );
-}
-
-/* Block-copy permissions (needed for implementing realloc()). */
-
-void VGM_(copy_address_range_perms) ( Addr src, Addr dst, UInt len )
-{
-   UInt i;
-   PROF_EVENT(40);
-   for (i = 0; i < len; i++) {
-      UChar abit  = get_abit ( src+i );
-      UChar vbyte = get_vbyte ( src+i );
-      PROF_EVENT(41);
-      set_abit ( dst+i, abit );
-      set_vbyte ( dst+i, vbyte );
-   }
-}
-
-
-/* Check permissions for address range.  If inadequate permissions
-   exist, *bad_addr is set to the offending address, so the caller can
-   know what it is. */
-
-Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr )
-{
-   UInt  i;
-   UChar abit;
-   PROF_EVENT(42);
-   for (i = 0; i < len; i++) {
-      PROF_EVENT(43);
-      abit = get_abit(a);
-      if (abit == VGM_BIT_INVALID) {
-         if (bad_addr != NULL) *bad_addr = a;
-         return False;
+   /* Check there's no overlap with the rest of the list */
+   for (es2 = es->next; es2 != NULL; es2 = es2->next) {
+      Addr lo2 = es2->start;
+      Addr hi2 = es2->start + es2->size - 1;
+      Bool overlap;
+      vg_assert(lo < hi);
+      vg_assert(lo2 < hi2);
+      /* the main assertion */
+      overlap = (lo <= lo2 && lo2 <= hi)
+                 || (lo <= hi2 && hi2 <= hi);
+      if (overlap) {
+         VG_(printf)("\n\nOVERLAPPING EXE SEGMENTS\n"
+                     "  new: start %p, size %d\n"
+                     "  old: start %p, size %d\n\n",
+                     es->start, es->size, es2->start, es2->size );
+         vg_assert(! overlap);
       }
-      a++;
    }
+}
+
+static Bool remove_if_exe_segment_from_list( Addr a, UInt len )
+{
+   ExeSeg **prev_next_ptr = & exeSegsHead, 
+          *curr = exeSegsHead;
+
+   while (True) {
+      if (curr == NULL) break;
+      if (a == curr->start) break;
+      prev_next_ptr = &curr->next;
+      curr = curr->next;
+   }
+   if (curr == NULL)
+      return False;
+
+   vg_assert(*prev_next_ptr == curr);
+
+   *prev_next_ptr = curr->next;
+
+   VG_(arena_free)(VG_AR_CORE, curr);
    return True;
 }
 
-Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr )
+/* Records the exe segment in the ExeSeg list (checking for overlaps), and
+   reads debug info if required.  Note the entire /proc/pid/maps file is 
+   read for the debug info, but it just reads symbols for newly added exe
+   segments.  This is required to find out their names if they have one.  So
+   we don't use this at startup because it's overkill and can screw reading
+   of /proc/pid/maps.
+ */
+void VG_(new_exe_segment) ( Addr a, UInt len )
 {
-   UInt  i;
-   UChar abit;
-   UChar vbyte;
-   PROF_EVENT(44);
-   for (i = 0; i < len; i++) {
-      abit  = get_abit(a);
-      vbyte = get_vbyte(a);
-      PROF_EVENT(45);
-      if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) {
-         if (bad_addr != NULL) *bad_addr = a;
-         return False;
-      }
-      a++;
-   }
-   return True;
+   // SSS: only bother if size != 0?  Does that happen? (probably can)
+
+   add_exe_segment_to_list( a, len );
+   VG_(maybe_read_symbols)();
 }
 
-
-/* Check a zero-terminated ascii string.  Tricky -- don't want to
-   examine the actual bytes, to find the end, until we're sure it is
-   safe to do so. */
-
-Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr )
+/* Invalidate translations as necessary (also discarding any basic
+   block-specific info retained by the skin) and unload any debug
+   symbols. */
+// Nb: remove_if_exe_segment_from_list() and VG_(maybe_unload_symbols)()
+// both ignore 'len', but that seems that's ok for most programs...  see
+// comment above vg_syscalls.c:mmap_segment() et al for more details.
+void VG_(remove_if_exe_segment) ( Addr a, UInt len )
 {
-   UChar abit;
-   UChar vbyte;
-   PROF_EVENT(46);
-   while (True) {
-      PROF_EVENT(47);
-      abit  = get_abit(a);
-      vbyte = get_vbyte(a);
-      if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) {
-         if (bad_addr != NULL) *bad_addr = a;
-         return False;
-      }
-      /* Ok, a is safe to read. */
-      if (* ((UChar*)a) == 0) return True;
-      a++;
+   if (remove_if_exe_segment_from_list( a, len )) {
+      VG_(invalidate_translations) ( a, len );
+      VG_(maybe_unload_symbols)    ( a, len );
    }
 }
 
 
-/* Setting permissions for aligned words.  This supports fast stack
-   operations. */
-
-static __inline__ void make_aligned_word_NOACCESS ( Addr a )
+static
+void startup_segment_callback ( Addr start, UInt size, 
+                                Char rr, Char ww, Char xx, 
+                                UInt foffset, UChar* filename )
 {
-   SecMap* sm;
-   UInt    sm_off;
-   UChar   mask;
-   PROF_EVENT(50);
-#  ifdef VG_DEBUG_MEMORY
-   vg_assert(IS_ALIGNED4_ADDR(a));
-#  endif
-   ENSURE_MAPPABLE(a, "make_aligned_word_NOACCESS");
-   sm     = VG_(primary_map)[a >> 16];
-   sm_off = a & 0xFFFF;
-   ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID;
-   mask = 0x0F;
-   mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
-   /* mask now contains 1s where we wish to make address bits
-      invalid (1s). */
-   sm->abits[sm_off >> 3] |= mask;
-}
+   UInt r_esp;
+   Bool is_stack_segment;
 
-static __inline__ void make_aligned_word_WRITABLE ( Addr a )
-{
-   SecMap* sm;
-   UInt    sm_off;
-   UChar   mask;
-   PROF_EVENT(51);
-#  ifdef VG_DEBUG_MEMORY
-   vg_assert(IS_ALIGNED4_ADDR(a));
-#  endif
-   ENSURE_MAPPABLE(a, "make_aligned_word_WRITABLE");
-   sm     = VG_(primary_map)[a >> 16];
-   sm_off = a & 0xFFFF;
-   ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID;
-   mask = 0x0F;
-   mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
-   /* mask now contains 1s where we wish to make address bits
-      invalid (0s). */
-   sm->abits[sm_off >> 3] &= ~mask;
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Functions called directly from generated code.       ---*/
-/*------------------------------------------------------------*/
-
-static __inline__ UInt rotateRight16 ( UInt x )
-{
-   /* Amazingly, gcc turns this into a single rotate insn. */
-   return (x >> 16) | (x << 16);
-}
-
-
-static __inline__ UInt shiftRight16 ( UInt x )
-{
-   return x >> 16;
-}
-
-
-/* Read/write 1/2/4 sized V bytes, and emit an address error if
-   needed. */
-
-/* VG_(helperc_{LD,ST}V{1,2,4}) handle the common case fast.
-   Under all other circumstances, it defers to the relevant _SLOWLY
-   function, which can handle all situations.
-*/
-UInt VG_(helperc_LOADV4) ( Addr a )
-{
-#  ifdef VG_DEBUG_MEMORY
-   return vgm_rd_V4_SLOWLY(a);
-#  else
-   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   UChar   abits  = sm->abits[a_off];
-   abits >>= (a & 4);
-   abits &= 15;
-   PROF_EVENT(60);
-   if (abits == VGM_NIBBLE_VALID) {
-      /* Handle common case quickly: a is suitably aligned, is mapped,
-         and is addressible. */
-      UInt v_off = a & 0xFFFF;
-      return ((UInt*)(sm->vbyte))[ v_off >> 2 ];
-   } else {
-      /* Slow but general case. */
-      return vgm_rd_V4_SLOWLY(a);
-   }
-#  endif
-}
-
-void VG_(helperc_STOREV4) ( Addr a, UInt vbytes )
-{
-#  ifdef VG_DEBUG_MEMORY
-   vgm_wr_V4_SLOWLY(a, vbytes);
-#  else
-   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   UChar   abits  = sm->abits[a_off];
-   abits >>= (a & 4);
-   abits &= 15;
-   PROF_EVENT(61);
-   if (abits == VGM_NIBBLE_VALID) {
-      /* Handle common case quickly: a is suitably aligned, is mapped,
-         and is addressible. */
-      UInt v_off = a & 0xFFFF;
-      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = vbytes;
-   } else {
-      /* Slow but general case. */
-      vgm_wr_V4_SLOWLY(a, vbytes);
-   }
-#  endif
-}
-
-UInt VG_(helperc_LOADV2) ( Addr a )
-{
-#  ifdef VG_DEBUG_MEMORY
-   return vgm_rd_V2_SLOWLY(a);
-#  else
-   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   PROF_EVENT(62);
-   if (sm->abits[a_off] == VGM_BYTE_VALID) {
-      /* Handle common case quickly. */
-      UInt v_off = a & 0xFFFF;
-      return 0xFFFF0000 
-             |  
-             (UInt)( ((UShort*)(sm->vbyte))[ v_off >> 1 ] );
-   } else {
-      /* Slow but general case. */
-      return vgm_rd_V2_SLOWLY(a);
-   }
-#  endif
-}
-
-void VG_(helperc_STOREV2) ( Addr a, UInt vbytes )
-{
-#  ifdef VG_DEBUG_MEMORY
-   vgm_wr_V2_SLOWLY(a, vbytes);
-#  else
-   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   PROF_EVENT(63);
-   if (sm->abits[a_off] == VGM_BYTE_VALID) {
-      /* Handle common case quickly. */
-      UInt v_off = a & 0xFFFF;
-      ((UShort*)(sm->vbyte))[ v_off >> 1 ] = vbytes & 0x0000FFFF;
-   } else {
-      /* Slow but general case. */
-      vgm_wr_V2_SLOWLY(a, vbytes);
-   }
-#  endif
-}
-
-UInt VG_(helperc_LOADV1) ( Addr a )
-{
-#  ifdef VG_DEBUG_MEMORY
-   return vgm_rd_V1_SLOWLY(a);
-#  else
-   UInt    sec_no = shiftRight16(a);
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   PROF_EVENT(64);
-   if (sm->abits[a_off] == VGM_BYTE_VALID) {
-      /* Handle common case quickly. */
-      UInt v_off = a & 0xFFFF;
-      return 0xFFFFFF00
-             |
-             (UInt)( ((UChar*)(sm->vbyte))[ v_off ] );
-   } else {
-      /* Slow but general case. */
-      return vgm_rd_V1_SLOWLY(a);
-   }
-#  endif
-}
-
-void VG_(helperc_STOREV1) ( Addr a, UInt vbytes )
-{
-#  ifdef VG_DEBUG_MEMORY
-   vgm_wr_V1_SLOWLY(a, vbytes);
-#  else
-   UInt    sec_no = shiftRight16(a);
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   PROF_EVENT(65);
-   if (sm->abits[a_off] == VGM_BYTE_VALID) {
-      /* Handle common case quickly. */
-      UInt v_off = a & 0xFFFF;
-      ((UChar*)(sm->vbyte))[ v_off ] = vbytes & 0x000000FF;
-   } else {
-      /* Slow but general case. */
-      vgm_wr_V1_SLOWLY(a, vbytes);
-   }
-#  endif
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Fallback functions to handle cases that the above    ---*/
-/*--- VG_(helperc_{LD,ST}V{1,2,4}) can't manage.           ---*/
-/*------------------------------------------------------------*/
-
-static UInt vgm_rd_V4_SLOWLY ( Addr a )
-{
-   Bool a0ok, a1ok, a2ok, a3ok;
-   UInt vb0, vb1, vb2, vb3;
-
-   PROF_EVENT(70);
-
-   /* First establish independently the addressibility of the 4 bytes
-      involved. */
-   a0ok = get_abit(a+0) == VGM_BIT_VALID;
-   a1ok = get_abit(a+1) == VGM_BIT_VALID;
-   a2ok = get_abit(a+2) == VGM_BIT_VALID;
-   a3ok = get_abit(a+3) == VGM_BIT_VALID;
-
-   /* Also get the validity bytes for the address. */
-   vb0 = (UInt)get_vbyte(a+0);
-   vb1 = (UInt)get_vbyte(a+1);
-   vb2 = (UInt)get_vbyte(a+2);
-   vb3 = (UInt)get_vbyte(a+3);
-
-   /* Now distinguish 3 cases */
-
-   /* Case 1: the address is completely valid, so:
-      - no addressing error
-      - return V bytes as read from memory
-   */
-   if (a0ok && a1ok && a2ok && a3ok) {
-      UInt vw = VGM_WORD_INVALID;
-      vw <<= 8; vw |= vb3;
-      vw <<= 8; vw |= vb2;
-      vw <<= 8; vw |= vb1;
-      vw <<= 8; vw |= vb0;
-      return vw;
-   }
-
-   /* Case 2: the address is completely invalid.  
-      - emit addressing error
-      - return V word indicating validity.  
-      This sounds strange, but if we make loads from invalid addresses 
-      give invalid data, we also risk producing a number of confusing
-      undefined-value errors later, which confuses the fact that the
-      error arose in the first place from an invalid address. 
-   */
-   /* VG_(printf)("%p (%d %d %d %d)\n", a, a0ok, a1ok, a2ok, a3ok); */
-   if (!VG_(clo_partial_loads_ok) 
-       || ((a & 3) != 0)
-       || (!a0ok && !a1ok && !a2ok && !a3ok)) {
-      VG_(record_address_error)( a, 4, False );
-      return (VGM_BYTE_VALID << 24) | (VGM_BYTE_VALID << 16) 
-             | (VGM_BYTE_VALID << 8) | VGM_BYTE_VALID;
-   }
-
-   /* Case 3: the address is partially valid.  
-      - no addressing error
-      - returned V word is invalid where the address is invalid, 
-        and contains V bytes from memory otherwise. 
-      Case 3 is only allowed if VG_(clo_partial_loads_ok) is True
-      (which is the default), and the address is 4-aligned.  
-      If not, Case 2 will have applied.
-   */
-   vg_assert(VG_(clo_partial_loads_ok));
-   {
-      UInt vw = VGM_WORD_INVALID;
-      vw <<= 8; vw |= (a3ok ? vb3 : VGM_BYTE_INVALID);
-      vw <<= 8; vw |= (a2ok ? vb2 : VGM_BYTE_INVALID);
-      vw <<= 8; vw |= (a1ok ? vb1 : VGM_BYTE_INVALID);
-      vw <<= 8; vw |= (a0ok ? vb0 : VGM_BYTE_INVALID);
-      return vw;
-   }
-}
-
-static void vgm_wr_V4_SLOWLY ( Addr a, UInt vbytes )
-{
-   /* Check the address for validity. */
-   Bool aerr = False;
-   PROF_EVENT(71);
-
-   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
-   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
-   if (get_abit(a+2) != VGM_BIT_VALID) aerr = True;
-   if (get_abit(a+3) != VGM_BIT_VALID) aerr = True;
-
-   /* Store the V bytes, remembering to do it little-endian-ly. */
-   set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8;
-   set_vbyte( a+1, vbytes & 0x000000FF ); vbytes >>= 8;
-   set_vbyte( a+2, vbytes & 0x000000FF ); vbytes >>= 8;
-   set_vbyte( a+3, vbytes & 0x000000FF );
-
-   /* If an address error has happened, report it. */
-   if (aerr)
-      VG_(record_address_error)( a, 4, True );
-}
-
-static UInt vgm_rd_V2_SLOWLY ( Addr a )
-{
-   /* Check the address for validity. */
-   UInt vw   = VGM_WORD_INVALID;
-   Bool aerr = False;
-   PROF_EVENT(72);
-
-   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
-   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
-
-   /* Fetch the V bytes, remembering to do it little-endian-ly. */
-   vw <<= 8; vw |= (UInt)get_vbyte(a+1);
-   vw <<= 8; vw |= (UInt)get_vbyte(a+0);
-
-   /* If an address error has happened, report it. */
-   if (aerr) {
-      VG_(record_address_error)( a, 2, False );
-      vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) 
-           | (VGM_BYTE_VALID << 8) | (VGM_BYTE_VALID);
-   }
-   return vw;   
-}
-
-static void vgm_wr_V2_SLOWLY ( Addr a, UInt vbytes )
-{
-   /* Check the address for validity. */
-   Bool aerr = False;
-   PROF_EVENT(73);
-
-   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
-   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
-
-   /* Store the V bytes, remembering to do it little-endian-ly. */
-   set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8;
-   set_vbyte( a+1, vbytes & 0x000000FF );
-
-   /* If an address error has happened, report it. */
-   if (aerr)
-      VG_(record_address_error)( a, 2, True );
-}
-
-static UInt vgm_rd_V1_SLOWLY ( Addr a )
-{
-   /* Check the address for validity. */
-   UInt vw   = VGM_WORD_INVALID;
-   Bool aerr = False;
-   PROF_EVENT(74);
-
-   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
-
-   /* Fetch the V byte. */
-   vw <<= 8; vw |= (UInt)get_vbyte(a+0);
-
-   /* If an address error has happened, report it. */
-   if (aerr) {
-      VG_(record_address_error)( a, 1, False );
-      vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) 
-           | (VGM_BYTE_INVALID << 8) | (VGM_BYTE_VALID);
-   }
-   return vw;   
-}
-
-static void vgm_wr_V1_SLOWLY ( Addr a, UInt vbytes )
-{
-   /* Check the address for validity. */
-   Bool aerr = False;
-   PROF_EVENT(75);
-   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
-
-   /* Store the V bytes, remembering to do it little-endian-ly. */
-   set_vbyte( a+0, vbytes & 0x000000FF );
-
-   /* If an address error has happened, report it. */
-   if (aerr)
-      VG_(record_address_error)( a, 1, True );
-}
-
-
-/* ---------------------------------------------------------------------
-   Called from generated code, or from the assembly helpers.
-   Handlers for value check failures.
-   ------------------------------------------------------------------ */
-
-void VG_(helperc_value_check0_fail) ( void )
-{
-   VG_(record_value_error) ( 0 );
-}
-
-void VG_(helperc_value_check1_fail) ( void )
-{
-   VG_(record_value_error) ( 1 );
-}
-
-void VG_(helperc_value_check2_fail) ( void )
-{
-   VG_(record_value_error) ( 2 );
-}
-
-void VG_(helperc_value_check4_fail) ( void )
-{
-   VG_(record_value_error) ( 4 );
-}
-
-
-/* ---------------------------------------------------------------------
-   FPU load and store checks, called from generated code.
-   ------------------------------------------------------------------ */
-
-void VGM_(fpu_read_check) ( Addr addr, Int size )
-{
-   /* Ensure the read area is both addressible and valid (ie,
-      readable).  If there's an address error, don't report a value
-      error too; but if there isn't an address error, check for a
-      value error. 
-
-      Try to be reasonably fast on the common case; wimp out and defer
-      to fpu_read_check_SLOWLY for everything else.  */
-
-   SecMap* sm;
-   UInt    sm_off, v_off, a_off;
-   Addr    addr4;
-
-   PROF_EVENT(80);
-
-#  ifdef VG_DEBUG_MEMORY
-   fpu_read_check_SLOWLY ( addr, size );
-#  else
-
-   if (size == 4) {
-      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
-      PROF_EVENT(81);
-      /* Properly aligned. */
-      sm     = VG_(primary_map)[addr >> 16];
-      sm_off = addr & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
-      /* Properly aligned and addressible. */
-      v_off = addr & 0xFFFF;
-      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
-         goto slow4;
-      /* Properly aligned, addressible and with valid data. */
-      return;
-     slow4:
-      fpu_read_check_SLOWLY ( addr, 4 );
-      return;
-   }
-
-   if (size == 8) {
-      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
-      PROF_EVENT(82);
-      /* Properly aligned.  Do it in two halves. */
-      addr4 = addr + 4;
-      /* First half. */
-      sm     = VG_(primary_map)[addr >> 16];
-      sm_off = addr & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
-      /* First half properly aligned and addressible. */
-      v_off = addr & 0xFFFF;
-      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
-         goto slow8;
-      /* Second half. */
-      sm     = VG_(primary_map)[addr4 >> 16];
-      sm_off = addr4 & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
-      /* Second half properly aligned and addressible. */
-      v_off = addr4 & 0xFFFF;
-      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
-         goto slow8;
-      /* Both halves properly aligned, addressible and with valid
-         data. */
-      return;
-     slow8:
-      fpu_read_check_SLOWLY ( addr, 8 );
-      return;
-   }
-
-   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
-      cases go quickly.  */
-   if (size == 2) {
-      PROF_EVENT(83);
-      fpu_read_check_SLOWLY ( addr, 2 );
-      return;
-   }
-
-   if (size == 10) {
-      PROF_EVENT(84);
-      fpu_read_check_SLOWLY ( addr, 10 );
-      return;
-   }
-
-   if (size == 28) {
-      PROF_EVENT(84); /* XXX assign correct event number */
-      fpu_read_check_SLOWLY ( addr, 28 );
-      return;
-   }
-
-   VG_(printf)("size is %d\n", size);
-   VG_(panic)("vgm_fpu_read_check: unhandled size");
-#  endif
-}
-
-
-void VGM_(fpu_write_check) ( Addr addr, Int size )
-{
-   /* Ensure the written area is addressible, and moan if otherwise.
-      If it is addressible, make it valid, otherwise invalid. 
-   */
-
-   SecMap* sm;
-   UInt    sm_off, v_off, a_off;
-   Addr    addr4;
-
-   PROF_EVENT(85);
-
-#  ifdef VG_DEBUG_MEMORY
-   fpu_write_check_SLOWLY ( addr, size );
-#  else
-
-   if (size == 4) {
-      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
-      PROF_EVENT(86);
-      /* Properly aligned. */
-      sm     = VG_(primary_map)[addr >> 16];
-      sm_off = addr & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
-      /* Properly aligned and addressible.  Make valid. */
-      v_off = addr & 0xFFFF;
-      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
-      return;
-     slow4:
-      fpu_write_check_SLOWLY ( addr, 4 );
-      return;
-   }
-
-   if (size == 8) {
-      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
-      PROF_EVENT(87);
-      /* Properly aligned.  Do it in two halves. */
-      addr4 = addr + 4;
-      /* First half. */
-      sm     = VG_(primary_map)[addr >> 16];
-      sm_off = addr & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
-      /* First half properly aligned and addressible.  Make valid. */
-      v_off = addr & 0xFFFF;
-      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
-      /* Second half. */
-      sm     = VG_(primary_map)[addr4 >> 16];
-      sm_off = addr4 & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
-      /* Second half properly aligned and addressible. */
-      v_off = addr4 & 0xFFFF;
-      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
-      /* Properly aligned, addressible and with valid data. */
-      return;
-     slow8:
-      fpu_write_check_SLOWLY ( addr, 8 );
-      return;
-   }
-
-   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
-      cases go quickly.  */
-   if (size == 2) {
-      PROF_EVENT(88);
-      fpu_write_check_SLOWLY ( addr, 2 );
-      return;
-   }
-
-   if (size == 10) {
-      PROF_EVENT(89);
-      fpu_write_check_SLOWLY ( addr, 10 );
-      return;
-   }
-
-   if (size == 28) {
-      PROF_EVENT(89); /* XXX assign correct event number */
-      fpu_write_check_SLOWLY ( addr, 28 );
-      return;
-   }
-
-   VG_(printf)("size is %d\n", size);
-   VG_(panic)("vgm_fpu_write_check: unhandled size");
-#  endif
-}
-
-
-/* ---------------------------------------------------------------------
-   Slow, general cases for FPU load and store checks.
-   ------------------------------------------------------------------ */
-
-/* Generic version.  Test for both addr and value errors, but if
-   there's an addr error, don't report a value error even if it
-   exists. */
-
-void fpu_read_check_SLOWLY ( Addr addr, Int size )
-{
-   Int  i;
-   Bool aerr = False;
-   Bool verr = False;
-   PROF_EVENT(90);
-   for (i = 0; i < size; i++) {
-      PROF_EVENT(91);
-      if (get_abit(addr+i) != VGM_BIT_VALID)
-         aerr = True;
-      if (get_vbyte(addr+i) != VGM_BYTE_VALID)
-         verr = True;
-   }
-
-   if (aerr) {
-      VG_(record_address_error)( addr, size, False );
-   } else {
-     if (verr)
-        VG_(record_value_error)( size );
-   }
-}
-
-
-/* Generic version.  Test for addr errors.  Valid addresses are
-   given valid values, and invalid addresses invalid values. */
-
-void fpu_write_check_SLOWLY ( Addr addr, Int size )
-{
-   Int  i;
-   Addr a_here;
-   Bool a_ok;
-   Bool aerr = False;
-   PROF_EVENT(92);
-   for (i = 0; i < size; i++) {
-      PROF_EVENT(93);
-      a_here = addr+i;
-      a_ok = get_abit(a_here) == VGM_BIT_VALID;
-      if (a_ok) {
-	set_vbyte(a_here, VGM_BYTE_VALID);
-      } else {
-	set_vbyte(a_here, VGM_BYTE_INVALID);
-        aerr = True;
+   /* Sanity check ... if this is the executable's text segment,
+      ensure it is loaded where we think it ought to be.  Any file
+      name which doesn't contain ".so" is assumed to be the
+      executable. */
+   if (filename != NULL
+       && xx == 'x'
+       && VG_(strstr(filename, ".so")) == NULL
+      ) {
+      /* We assume this is the executable. */
+      if (start != VG_ASSUMED_EXE_BASE) {
+         VG_(message)(Vg_UserMsg,
+                      "FATAL: executable base addr not as assumed.");
+         VG_(message)(Vg_UserMsg, "name %s, actual %p, assumed %p.",
+                      filename, start, VG_ASSUMED_EXE_BASE);
+         VG_(message)(Vg_UserMsg,
+            "One reason this could happen is that you have a shared object");
+         VG_(message)(Vg_UserMsg,
+            " whose name doesn't contain the characters \".so\", so Valgrind ");
+         VG_(message)(Vg_UserMsg,
+            "naively assumes it is the executable.  ");
+         VG_(message)(Vg_UserMsg,
+            "In that case, rename it appropriately.");
+         VG_(panic)("VG_ASSUMED_EXE_BASE doesn't match reality");
       }
    }
-   if (aerr) {
-      VG_(record_address_error)( addr, size, True );
+
+   if (0)
+      VG_(message)(Vg_DebugMsg,
+                   "initial map %8x-%8x %c%c%c? %8x (%d) (%s)",
+                   start,start+size,rr,ww,xx,foffset,
+                   size, filename?filename:(UChar*)"NULL");
+
+   if (rr != 'r' && xx != 'x' && ww != 'w') {
+      VG_(printf)("No permissions on the segment named %s\n", filename);
+      VG_(panic)("Non-readable, writable, executable segment at startup");
    }
+
+   /* This parallels what happens when we mmap some new memory */
+   if (filename != NULL && xx == 'x') {
+      VG_(new_exe_segment)( start, size );
+   }
+   VG_TRACK( new_mem_startup, start, size, rr=='r', ww=='w', xx=='x' );
+
+   /* If this is the stack segment mark all below %esp as noaccess. */
+   r_esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   is_stack_segment = start <= r_esp && r_esp < start+size;
+   if (is_stack_segment) {
+      if (0)
+         VG_(message)(Vg_DebugMsg, "invalidating stack area: %x .. %x",
+                      start,r_esp);
+      VG_TRACK( die_mem_stack, start, r_esp-start );
+   }
+}
+
+
+/* 1. Records exe segments from /proc/pid/maps -- always necessary, because 
+      if they're munmap()ed we need to know if they were executable in order
+      to discard translations.  Also checks there's no exe segment overlaps.
+
+   2. Marks global variables that might be accessed from generated code;
+
+   3. Sets up the end of the data segment so that vg_syscalls.c can make
+      sense of calls to brk().
+ */
+void VG_(init_memory) ( void )
+{
+   /* 1 and 2 */
+   VG_(read_procselfmaps) ( startup_segment_callback );
+
+   /* 3 */
+   VG_TRACK( post_mem_write, (Addr) & VG_(running_on_simd_CPU), 1 );
+   VG_TRACK( post_mem_write, (Addr) & VG_(clo_trace_malloc),    1 );
+   VG_TRACK( post_mem_write, (Addr) & VG_(clo_sloppy_malloc),   1 );
+
+   /* 4 */
+   VG_(init_dataseg_end_for_brk)();
 }
 
 
@@ -1340,7 +273,7 @@
 Bool is_plausible_stack_addr ( ThreadState* tst, Addr aa )
 {
    UInt a = (UInt)aa;
-   PROF_EVENT(100);
+   //PROF_EVENT(100);   PPP
    if (a <= tst->stack_highest_word && 
        a > tst->stack_highest_word - VG_PLAUSIBLE_STACK_SIZE)
       return True;
@@ -1349,18 +282,6 @@
 }
 
 
-/* Is this address within some small distance below %ESP?  Used only
-   for the --workaround-gcc296-bugs kludge. */
-Bool VG_(is_just_below_ESP)( Addr esp, Addr aa )
-{
-   if ((UInt)esp > (UInt)aa
-       && ((UInt)esp - (UInt)aa) <= VG_GCC296_BUG_STACK_SLOP)
-      return True;
-   else
-      return False;
-}
-
-
 /* Kludgey ... how much does %esp have to change before we reckon that
    the application is switching stacks ? */
 #define VG_HUGE_DELTA (VG_PLAUSIBLE_STACK_SIZE / 4)
@@ -1370,133 +291,59 @@
    return a & ~(VKI_BYTES_PER_PAGE-1);
 }
 
+static void vg_handle_esp_assignment_SLOWLY ( Addr old_esp, Addr new_esp );
 
-static void vg_handle_esp_assignment_SLOWLY ( Addr );
-
-void VGM_(handle_esp_assignment) ( Addr new_espA )
+__attribute__ ((regparm (1)))
+void VG_(handle_esp_assignment) ( Addr new_esp )
 {
-   UInt old_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   UInt new_esp = (UInt)new_espA;
-   Int  delta   = ((Int)new_esp) - ((Int)old_esp);
+   UInt old_esp;
+   Int  delta;
 
-   PROF_EVENT(101);
+   VGP_MAYBE_PUSHCC(VgpStack);
+
+   old_esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   delta = ((Int)new_esp) - ((Int)old_esp);
+
+   /* Update R_ESP */
+   VG_(baseBlock)[VGOFF_(m_esp)] = new_esp;
+
+   //PROF_EVENT(101);   PPP
 
 #  ifndef VG_DEBUG_MEMORY
 
-   if (IS_ALIGNED4_ADDR(old_esp)) {
+   if (IS_ALIGNED4_ADDR(old_esp) &&  IS_ALIGNED4_ADDR(new_esp)) {
 
       /* Deal with the most common cases fast.  These are ordered in
          the sequence most common first. */
 
-      if (delta == -4) {
-         /* Moving down by 4 and properly aligned.. */
-         PROF_EVENT(102);
-         make_aligned_word_WRITABLE(new_esp);
-         return;
+#     ifdef VG_PROFILE_MEMORY
+      // PPP
+      if      (delta = - 4) PROF_EVENT(102);
+      else if (delta =   4) PROF_EVENT(103);
+      else if (delta = -12) PROF_EVENT(104);
+      else if (delta = - 8) PROF_EVENT(105);
+      else if (delta =  16) PROF_EVENT(106);
+      else if (delta =  12) PROF_EVENT(107);
+      else if (delta =   0) PROF_EVENT(108);
+      else if (delta =   8) PROF_EVENT(109);
+      else if (delta = -16) PROF_EVENT(110);
+      else if (delta =  20) PROF_EVENT(111);
+      else if (delta = -20) PROF_EVENT(112);
+      else if (delta =  24) PROF_EVENT(113);
+      else if (delta = -24) PROF_EVENT(114);
+      else if (delta > 0)   PROF_EVENT(115); // PPP: new: aligned_big_pos
+      else                  PROF_EVENT(116); // PPP: new: aligned_big_neg
+#     endif
+      
+      if (delta < 0) {
+         VG_TRACK(new_mem_stack_aligned, new_esp, -delta);
+      } else if (delta > 0) {
+         VG_TRACK(die_mem_stack_aligned, old_esp, delta);
       }
+      /* Do nothing if (delta==0) */
 
-      if (delta == 4) {
-         /* Moving up by 4 and properly aligned. */
-         PROF_EVENT(103);
-         make_aligned_word_NOACCESS(old_esp);
-         return;
-      }
-
-      if (delta == -12) {
-         PROF_EVENT(104);
-         make_aligned_word_WRITABLE(new_esp);
-         make_aligned_word_WRITABLE(new_esp+4);
-         make_aligned_word_WRITABLE(new_esp+8);
-         return;
-      }
-
-      if (delta == -8) {
-         PROF_EVENT(105);
-         make_aligned_word_WRITABLE(new_esp);
-         make_aligned_word_WRITABLE(new_esp+4);
-         return;
-      }
-
-      if (delta == 16) {
-         PROF_EVENT(106);
-         make_aligned_word_NOACCESS(old_esp);
-         make_aligned_word_NOACCESS(old_esp+4);
-         make_aligned_word_NOACCESS(old_esp+8);
-         make_aligned_word_NOACCESS(old_esp+12);
-         return;
-      }
-
-      if (delta == 12) {
-         PROF_EVENT(107);
-         make_aligned_word_NOACCESS(old_esp);
-         make_aligned_word_NOACCESS(old_esp+4);
-         make_aligned_word_NOACCESS(old_esp+8);
-         return;
-      }
-
-      if (delta == 0) {
-         PROF_EVENT(108);
-         return;
-      }
-
-      if (delta == 8) {
-         PROF_EVENT(109);
-         make_aligned_word_NOACCESS(old_esp);
-         make_aligned_word_NOACCESS(old_esp+4);
-         return;
-      }
-
-      if (delta == -16) {
-         PROF_EVENT(110);
-         make_aligned_word_WRITABLE(new_esp);
-         make_aligned_word_WRITABLE(new_esp+4);
-         make_aligned_word_WRITABLE(new_esp+8);
-         make_aligned_word_WRITABLE(new_esp+12);
-         return;
-      }
-
-      if (delta == 20) {
-         PROF_EVENT(111);
-         make_aligned_word_NOACCESS(old_esp);
-         make_aligned_word_NOACCESS(old_esp+4);
-         make_aligned_word_NOACCESS(old_esp+8);
-         make_aligned_word_NOACCESS(old_esp+12);
-         make_aligned_word_NOACCESS(old_esp+16);
-         return;
-      }
-
-      if (delta == -20) {
-         PROF_EVENT(112);
-         make_aligned_word_WRITABLE(new_esp);
-         make_aligned_word_WRITABLE(new_esp+4);
-         make_aligned_word_WRITABLE(new_esp+8);
-         make_aligned_word_WRITABLE(new_esp+12);
-         make_aligned_word_WRITABLE(new_esp+16);
-         return;
-      }
-
-      if (delta == 24) {
-         PROF_EVENT(113);
-         make_aligned_word_NOACCESS(old_esp);
-         make_aligned_word_NOACCESS(old_esp+4);
-         make_aligned_word_NOACCESS(old_esp+8);
-         make_aligned_word_NOACCESS(old_esp+12);
-         make_aligned_word_NOACCESS(old_esp+16);
-         make_aligned_word_NOACCESS(old_esp+20);
-         return;
-      }
-
-      if (delta == -24) {
-         PROF_EVENT(114);
-         make_aligned_word_WRITABLE(new_esp);
-         make_aligned_word_WRITABLE(new_esp+4);
-         make_aligned_word_WRITABLE(new_esp+8);
-         make_aligned_word_WRITABLE(new_esp+12);
-         make_aligned_word_WRITABLE(new_esp+16);
-         make_aligned_word_WRITABLE(new_esp+20);
-         return;
-      }
-
+      VGP_MAYBE_POPCC(VgpStack);
+      return;
    }
 
 #  endif
@@ -1504,33 +351,35 @@
    /* The above special cases handle 90% to 95% of all the stack
       adjustments.  The rest we give to the slow-but-general
       mechanism. */
-   vg_handle_esp_assignment_SLOWLY ( new_espA );
+   vg_handle_esp_assignment_SLOWLY ( old_esp, new_esp );
+   VGP_MAYBE_POPCC(VgpStack);
 }
 
 
-static void vg_handle_esp_assignment_SLOWLY ( Addr new_espA )
+static void vg_handle_esp_assignment_SLOWLY ( Addr old_esp, Addr new_esp )
 {
-   UInt old_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   UInt new_esp = (UInt)new_espA;
-   Int  delta   = ((Int)new_esp) - ((Int)old_esp);
-   //   VG_(printf)("%d ", delta);
-   PROF_EVENT(120);
+   Int  delta;
+   
+   delta = ((Int)new_esp) - ((Int)old_esp);
+   //VG_(printf)("delta %d (%x) %x --> %x\n", delta, delta, old_esp, new_esp);
+   //PROF_EVENT(120);   PPP
    if (-(VG_HUGE_DELTA) < delta && delta < VG_HUGE_DELTA) {
       /* "Ordinary" stack change. */
       if (new_esp < old_esp) {
          /* Moving down; the stack is growing. */
-         PROF_EVENT(121);
-         VGM_(make_writable) ( new_esp, old_esp - new_esp );
-         return;
-      }
-      if (new_esp > old_esp) {
+         //PROF_EVENT(121); PPP
+         VG_TRACK( new_mem_stack, new_esp, -delta );
+      
+      } else if (new_esp > old_esp) {
          /* Moving up; the stack is shrinking. */
-         PROF_EVENT(122);
-         VGM_(make_noaccess) ( old_esp, new_esp - old_esp );
-         return;
+         //PROF_EVENT(122); PPP
+         VG_TRACK( die_mem_stack, old_esp, delta );
+
+      } else {
+         /* when old_esp == new_esp */
+         //PROF_EVENT(123);    PPP
       }
-      PROF_EVENT(123);
-      return; /* when old_esp == new_esp */
+      return;
    }
 
    /* %esp has changed by more than HUGE_DELTA.  We take this to mean
@@ -1552,863 +401,21 @@
      Addr valid_up_to     = get_page_base(new_esp) + VKI_BYTES_PER_PAGE
                             + 0 * VKI_BYTES_PER_PAGE;
      ThreadState* tst     = VG_(get_current_thread_state)();
-     PROF_EVENT(124);
+     //PROF_EVENT(124); PPP
      if (VG_(clo_verbosity) > 1)
         VG_(message)(Vg_UserMsg, "Warning: client switching stacks?  "
-                                 "%%esp: %p --> %p",
-                                  old_esp, new_esp);
+                                 "%%esp: %p --> %p", old_esp, new_esp);
      /* VG_(printf)("na %p,   %%esp %p,   wr %p\n",
                     invalid_down_to, new_esp, valid_up_to ); */
-     VGM_(make_noaccess) ( invalid_down_to, new_esp - invalid_down_to );
+     VG_TRACK( die_mem_stack, invalid_down_to, new_esp - invalid_down_to );
      if (!is_plausible_stack_addr(tst, new_esp)) {
-        VGM_(make_readable) ( new_esp, valid_up_to - new_esp );
+        VG_TRACK( post_mem_write, new_esp, valid_up_to - new_esp );
      }
    }
 }
 
 
-/*--------------------------------------------------------------*/
-/*--- Initialise the memory audit system on program startup. ---*/
-/*--------------------------------------------------------------*/
-
-/* Handle one entry derived from /proc/self/maps. */
-
-static
-void init_memory_audit_callback ( 
-        Addr start, UInt size, 
-        Char rr, Char ww, Char xx, 
-        UInt foffset, UChar* filename )
-{
-   UChar example_a_bit;
-   UChar example_v_bit;
-   UInt  r_esp;
-   Bool  is_stack_segment;
-
-   /* Sanity check ... if this is the executable's text segment,
-      ensure it is loaded where we think it ought to be.  Any file
-      name which doesn't contain ".so" is assumed to be the
-      executable. */
-   if (filename != NULL
-       && xx == 'x'
-       && VG_(strstr(filename, ".so")) == NULL
-      ) {
-      /* We assume this is the executable. */
-      if (start != VG_ASSUMED_EXE_BASE) {
-         VG_(message)(Vg_UserMsg,
-                      "FATAL: executable base addr not as assumed.");
-         VG_(message)(Vg_UserMsg, "name %s, actual %p, assumed %p.",
-                      filename, start, VG_ASSUMED_EXE_BASE);
-         VG_(message)(Vg_UserMsg,
-            "One reason this could happen is that you have a shared object");
-         VG_(message)(Vg_UserMsg,
-            " whose name doesn't contain the characters \".so\", so Valgrind ");
-         VG_(message)(Vg_UserMsg,
-            "naively assumes it is the executable.  ");
-         VG_(message)(Vg_UserMsg,
-            "In that case, rename it appropriately.");
-         VG_(panic)("VG_ASSUMED_EXE_BASE doesn't match reality");
-      }
-   }
-    
-   if (0)
-      VG_(message)(Vg_DebugMsg, 
-                   "initial map %8x-%8x %c%c%c? %8x (%d) (%s)",
-                   start,start+size,rr,ww,xx,foffset,
-                   size, filename?filename:(UChar*)"NULL");
-
-   r_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   is_stack_segment = start <= r_esp && r_esp < start+size;
-
-   /* Figure out the segment's permissions.
-
-      All segments are addressible -- since a process can read its
-      own text segment.
-
-      A read-but-not-write segment presumably contains initialised
-      data, so is all valid.  Read-write segments presumably contains
-      uninitialised data, so is all invalid.  */
-
-   /* ToDo: make this less bogus. */
-   if (rr != 'r' && xx != 'x' && ww != 'w') {
-      /* Very bogus; this path never gets taken. */
-      /* A no, V no */
-      example_a_bit = VGM_BIT_INVALID;
-      example_v_bit = VGM_BIT_INVALID;
-   } else {
-      /* A yes, V yes */
-      example_a_bit = VGM_BIT_VALID;
-      example_v_bit = VGM_BIT_VALID;
-      /* Causes a lot of errs for unknown reasons. 
-         if (filename is valgrind.so 
-               [careful about end conditions on filename]) {
-            example_a_bit = VGM_BIT_INVALID;
-            example_v_bit = VGM_BIT_INVALID;
-         }
-      */
-   }
-
-   set_address_range_perms ( start, size, 
-                             example_a_bit, example_v_bit );
-
-   if (is_stack_segment) {
-      /* This is the stack segment.  Mark all below %esp as
-         noaccess. */
-      if (0)
-         VG_(message)(Vg_DebugMsg, 
-                      "invalidating stack area: %x .. %x",
-                      start,r_esp);
-      VGM_(make_noaccess)( start, r_esp-start );
-   }
-}
-
-
-/* Initialise the memory audit system. */
-void VGM_(init_memory_audit) ( void )
-{
-   Int i;
-
-   init_prof_mem();
-
-   for (i = 0; i < 8192; i++)
-      vg_distinguished_secondary_map.abits[i] 
-         = VGM_BYTE_INVALID; /* Invalid address */
-   for (i = 0; i < 65536; i++)
-      vg_distinguished_secondary_map.vbyte[i] 
-         = VGM_BYTE_INVALID; /* Invalid Value */
-
-   /* These entries gradually get overwritten as the used address
-      space expands. */
-   for (i = 0; i < 65536; i++)
-      VG_(primary_map)[i] = &vg_distinguished_secondary_map;
-   /* These ones should never change; it's a bug in Valgrind if they
-      do. */
-   for (i = 65536; i < 262144; i++)
-      VG_(primary_map)[i] = &vg_distinguished_secondary_map;
-
-   /* Read the initial memory mapping from the /proc filesystem, and
-      set up our own maps accordingly. */
-   VG_(read_procselfmaps) ( init_memory_audit_callback );
-
-   /* Last but not least, set up the shadow regs with reasonable (sic)
-      values.  All regs are claimed to have valid values.
-   */
-   VG_(baseBlock)[VGOFF_(sh_esp)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_ebp)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_eax)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_ecx)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_edx)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_ebx)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_esi)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_edi)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_eflags)] = VGM_EFLAGS_VALID;
-
-   /* Record the end of the data segment, so that vg_syscall_mem.c
-      can make sense of calls to brk(). 
-   */
-   VGM_(curr_dataseg_end) = (Addr)VG_(brk)(0);
-   if (VGM_(curr_dataseg_end) == (Addr)(-1))
-      VG_(panic)("vgm_init_memory_audit: can't determine data-seg end");
-
-   if (0)
-      VG_(printf)("DS END is %p\n", (void*)VGM_(curr_dataseg_end));
-
-   /* Read the list of errors to suppress.  This should be found in
-      the file specified by vg_clo_suppressions. */
-   VG_(load_suppressions)();
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Low-level address-space scanning, for the leak       ---*/
-/*--- detector.                                            ---*/
-/*------------------------------------------------------------*/
-
-static 
-jmp_buf memscan_jmpbuf;
-
-static
-void vg_scan_all_valid_memory_sighandler ( Int sigNo )
-{
-   __builtin_longjmp(memscan_jmpbuf, 1);
-}
-
-UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) )
-{
-   /* All volatile, because some gccs seem paranoid about longjmp(). */
-   volatile UInt res, numPages, page, vbytes, primaryMapNo, nWordsNotified;
-   volatile Addr pageBase, addr;
-   volatile SecMap* sm;
-   volatile UChar abits;
-   volatile UInt page_first_word;
-
-   vki_ksigaction sigbus_saved;
-   vki_ksigaction sigbus_new;
-   vki_ksigaction sigsegv_saved;
-   vki_ksigaction sigsegv_new;
-   vki_ksigset_t  blockmask_saved;
-   vki_ksigset_t  unblockmask_new;
-
-   /* Temporarily install a new sigsegv and sigbus handler, and make
-      sure SIGBUS, SIGSEGV and SIGTERM are unblocked.  (Perhaps the
-      first two can never be blocked anyway?)  */
-
-   sigbus_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
-   sigbus_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
-   sigbus_new.ksa_restorer = NULL;
-   res = VG_(ksigemptyset)( &sigbus_new.ksa_mask );
-   vg_assert(res == 0);
-
-   sigsegv_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
-   sigsegv_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
-   sigsegv_new.ksa_restorer = NULL;
-   res = VG_(ksigemptyset)( &sigsegv_new.ksa_mask );
-   vg_assert(res == 0+0);
-
-   res =  VG_(ksigemptyset)( &unblockmask_new );
-   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGBUS );
-   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGSEGV );
-   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGTERM );
-   vg_assert(res == 0+0+0);
-
-   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_new, &sigbus_saved );
-   vg_assert(res == 0+0+0+0);
-
-   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_new, &sigsegv_saved );
-   vg_assert(res == 0+0+0+0+0);
-
-   res = VG_(ksigprocmask)( VKI_SIG_UNBLOCK, &unblockmask_new, &blockmask_saved );
-   vg_assert(res == 0+0+0+0+0+0);
-
-   /* The signal handlers are installed.  Actually do the memory scan. */
-   numPages = 1 << (32-VKI_BYTES_PER_PAGE_BITS);
-   vg_assert(numPages == 1048576);
-   vg_assert(4096 == (1 << VKI_BYTES_PER_PAGE_BITS));
-
-   nWordsNotified = 0;
-
-   for (page = 0; page < numPages; page++) {
-      pageBase = page << VKI_BYTES_PER_PAGE_BITS;
-      primaryMapNo = pageBase >> 16;
-      sm = VG_(primary_map)[primaryMapNo];
-      if (IS_DISTINGUISHED_SM(sm)) continue;
-      if (__builtin_setjmp(memscan_jmpbuf) == 0) {
-         /* try this ... */
-         page_first_word = * (volatile UInt*)pageBase;
-         /* we get here if we didn't get a fault */
-         /* Scan the page */
-         for (addr = pageBase; addr < pageBase+VKI_BYTES_PER_PAGE; addr += 4) {
-            abits  = get_abits4_ALIGNED(addr);
-            vbytes = get_vbytes4_ALIGNED(addr);
-            if (abits == VGM_NIBBLE_VALID 
-                && vbytes == VGM_WORD_VALID) {
-               nWordsNotified++;
-               notify_word ( addr, *(UInt*)addr );
-	    }
-         }
-      } else {
-         /* We get here if reading the first word of the page caused a
-            fault, which in turn caused the signal handler to longjmp.
-            Ignore this page. */
-         if (0)
-         VG_(printf)(
-            "vg_scan_all_valid_memory_sighandler: ignoring page at %p\n",
-            (void*)pageBase 
-         );
-      }
-   }
-
-   /* Restore signal state to whatever it was before. */
-   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_saved, NULL );
-   vg_assert(res == 0 +0);
-
-   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_saved, NULL );
-   vg_assert(res == 0 +0 +0);
-
-   res = VG_(ksigprocmask)( VKI_SIG_SETMASK, &blockmask_saved, NULL );
-   vg_assert(res == 0 +0 +0 +0);
-
-   return nWordsNotified;
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
-/*------------------------------------------------------------*/
-
-/* A block is either 
-   -- Proper-ly reached; a pointer to its start has been found
-   -- Interior-ly reached; only an interior pointer to it has been found
-   -- Unreached; so far, no pointers to any part of it have been found. 
-*/
-typedef 
-   enum { Unreached, Interior, Proper } 
-   Reachedness;
-
-/* A block record, used for generating err msgs. */
-typedef
-   struct _LossRecord {
-      struct _LossRecord* next;
-      /* Where these lost blocks were allocated. */
-      ExeContext*  allocated_at;
-      /* Their reachability. */
-      Reachedness  loss_mode;
-      /* Number of blocks and total # bytes involved. */
-      UInt         total_bytes;
-      UInt         num_blocks;
-   }
-   LossRecord;
-
-
-/* Find the i such that ptr points at or inside the block described by
-   shadows[i].  Return -1 if none found.  This assumes that shadows[]
-   has been sorted on the ->data field. */
-
-#ifdef VG_DEBUG_LEAKCHECK
-/* Used to sanity-check the fast binary-search mechanism. */
-static Int find_shadow_for_OLD ( Addr          ptr, 
-                                 ShadowChunk** shadows,
-                                 Int           n_shadows )
-
-{
-   Int  i;
-   Addr a_lo, a_hi;
-   PROF_EVENT(70);
-   for (i = 0; i < n_shadows; i++) {
-      PROF_EVENT(71);
-      a_lo = shadows[i]->data;
-      a_hi = ((Addr)shadows[i]->data) + shadows[i]->size - 1;
-      if (a_lo <= ptr && ptr <= a_hi)
-         return i;
-   }
-   return -1;
-}
-#endif
-
-
-static Int find_shadow_for ( Addr          ptr, 
-                             ShadowChunk** shadows,
-                             Int           n_shadows )
-{
-   Addr a_mid_lo, a_mid_hi;
-   Int lo, mid, hi, retVal;
-   PROF_EVENT(70);
-   /* VG_(printf)("find shadow for %p = ", ptr); */
-   retVal = -1;
-   lo = 0;
-   hi = n_shadows-1;
-   while (True) {
-      PROF_EVENT(71);
-
-      /* invariant: current unsearched space is from lo to hi,
-         inclusive. */
-      if (lo > hi) break; /* not found */
-
-      mid      = (lo + hi) / 2;
-      a_mid_lo = shadows[mid]->data;
-      a_mid_hi = ((Addr)shadows[mid]->data) + shadows[mid]->size - 1;
-
-      if (ptr < a_mid_lo) {
-         hi = mid-1;
-         continue;
-      } 
-      if (ptr > a_mid_hi) {
-         lo = mid+1;
-         continue;
-      }
-      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
-      retVal = mid;
-      break;
-   }
-
-#  ifdef VG_DEBUG_LEAKCHECK
-   vg_assert(retVal == find_shadow_for_OLD ( ptr, shadows, n_shadows ));
-#  endif
-   /* VG_(printf)("%d\n", retVal); */
-   return retVal;
-}
-
-
-
-static void sort_malloc_shadows ( ShadowChunk** shadows, UInt n_shadows )
-{
-   Int   incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
-                      9841, 29524, 88573, 265720,
-                      797161, 2391484 };
-   Int          lo = 0;
-   Int          hi = n_shadows-1;
-   Int          i, j, h, bigN, hp;
-   ShadowChunk* v;
-
-   PROF_EVENT(72);
-   bigN = hi - lo + 1; if (bigN < 2) return;
-   hp = 0; while (incs[hp] < bigN) hp++; hp--;
-
-   for (; hp >= 0; hp--) {
-      PROF_EVENT(73);
-      h = incs[hp];
-      i = lo + h;
-      while (1) {
-         PROF_EVENT(74);
-         if (i > hi) break;
-         v = shadows[i];
-         j = i;
-         while (shadows[j-h]->data > v->data) {
-            PROF_EVENT(75);
-            shadows[j] = shadows[j-h];
-            j = j - h;
-            if (j <= (lo + h - 1)) break;
-         }
-         shadows[j] = v;
-         i++;
-      }
-   }
-}
-
-/* Globals, for the callback used by VG_(detect_memory_leaks). */
-
-static ShadowChunk** vglc_shadows;
-static Int           vglc_n_shadows;
-static Reachedness*  vglc_reachedness;
-static Addr          vglc_min_mallocd_addr;
-static Addr          vglc_max_mallocd_addr;
-
-static 
-void vg_detect_memory_leaks_notify_addr ( Addr a, UInt word_at_a )
-{
-   Int  sh_no;
-   Addr ptr;
-
-   /* Rule out some known causes of bogus pointers.  Mostly these do
-      not cause much trouble because only a few false pointers can
-      ever lurk in these places.  This mainly stops it reporting that
-      blocks are still reachable in stupid test programs like this
-
-         int main (void) { char* a = malloc(100); return 0; }
-
-      which people seem inordinately fond of writing, for some reason.  
-
-      Note that this is a complete kludge.  It would be better to
-      ignore any addresses corresponding to valgrind.so's .bss and
-      .data segments, but I cannot think of a reliable way to identify
-      where the .bss segment has been put.  If you can, drop me a
-      line.  
-   */
-   if (a >= ((Addr)(&VG_(stack)))
-       && a <= ((Addr)(&VG_(stack))) + sizeof(VG_(stack))) {
-      return;
-   }
-   if (a >= ((Addr)(&VG_(m_state_static)))
-       && a <= ((Addr)(&VG_(m_state_static))) + sizeof(VG_(m_state_static))) {
-      return;
-   }
-   if (a == (Addr)(&vglc_min_mallocd_addr))
-      return;
-   if (a == (Addr)(&vglc_max_mallocd_addr))
-      return;
-
-   /* OK, let's get on and do something Useful for a change. */
-
-   ptr = (Addr)word_at_a;
-   if (ptr >= vglc_min_mallocd_addr && ptr <= vglc_max_mallocd_addr) {
-      /* Might be legitimate; we'll have to investigate further. */
-      sh_no = find_shadow_for ( ptr, vglc_shadows, vglc_n_shadows );
-      if (sh_no != -1) {
-         /* Found a block at/into which ptr points. */
-         vg_assert(sh_no >= 0 && sh_no < vglc_n_shadows);
-         vg_assert(ptr < vglc_shadows[sh_no]->data 
-                         + vglc_shadows[sh_no]->size);
-         /* Decide whether Proper-ly or Interior-ly reached. */
-         if (ptr == vglc_shadows[sh_no]->data) {
-            if (0) VG_(printf)("pointer at %p to %p\n", a, word_at_a );
-            vglc_reachedness[sh_no] = Proper;
-         } else {
-            if (vglc_reachedness[sh_no] == Unreached)
-               vglc_reachedness[sh_no] = Interior;
-         }
-      }
-   }
-}
-
-
-void VG_(detect_memory_leaks) ( void )
-{
-   Int    i;
-   Int    blocks_leaked, bytes_leaked;
-   Int    blocks_dubious, bytes_dubious;
-   Int    blocks_reachable, bytes_reachable;
-   Int    n_lossrecords;
-   UInt   bytes_notified;
-   
-   LossRecord*  errlist;
-   LossRecord*  p;
-
-   Bool (*ec_comparer_fn) ( ExeContext*, ExeContext* );
-   PROF_EVENT(76);
-   vg_assert(VG_(clo_instrument));
-
-   /* Decide how closely we want to match ExeContexts in leak
-      records. */
-   switch (VG_(clo_leak_resolution)) {
-      case 2: 
-         ec_comparer_fn = VG_(eq_ExeContext_top2); 
-         break;
-      case 4: 
-         ec_comparer_fn = VG_(eq_ExeContext_top4); 
-         break;
-      case VG_DEEPEST_BACKTRACE: 
-         ec_comparer_fn = VG_(eq_ExeContext_all); 
-         break;
-      default: 
-         VG_(panic)("VG_(detect_memory_leaks): "
-                    "bad VG_(clo_leak_resolution)");
-         break;
-   }
-
-   /* vg_get_malloc_shadows allocates storage for shadows */
-   vglc_shadows = VG_(get_malloc_shadows)( &vglc_n_shadows );
-   if (vglc_n_shadows == 0) {
-      vg_assert(vglc_shadows == NULL);
-      VG_(message)(Vg_UserMsg, 
-                   "No malloc'd blocks -- no leaks are possible.\n");
-      return;
-   }
-
-   VG_(message)(Vg_UserMsg, 
-                "searching for pointers to %d not-freed blocks.", 
-                vglc_n_shadows );
-   sort_malloc_shadows ( vglc_shadows, vglc_n_shadows );
-
-   /* Sanity check; assert that the blocks are now in order and that
-      they don't overlap. */
-   for (i = 0; i < vglc_n_shadows-1; i++) {
-      vg_assert( ((Addr)vglc_shadows[i]->data)
-                 < ((Addr)vglc_shadows[i+1]->data) );
-      vg_assert( ((Addr)vglc_shadows[i]->data) + vglc_shadows[i]->size
-                 < ((Addr)vglc_shadows[i+1]->data) );
-   }
-
-   vglc_min_mallocd_addr = ((Addr)vglc_shadows[0]->data);
-   vglc_max_mallocd_addr = ((Addr)vglc_shadows[vglc_n_shadows-1]->data)
-                         + vglc_shadows[vglc_n_shadows-1]->size - 1;
-
-   vglc_reachedness 
-      = VG_(malloc)( VG_AR_PRIVATE, vglc_n_shadows * sizeof(Reachedness) );
-   for (i = 0; i < vglc_n_shadows; i++)
-      vglc_reachedness[i] = Unreached;
-
-   /* Do the scan of memory. */
-   bytes_notified
-       = VG_(scan_all_valid_memory)( &vg_detect_memory_leaks_notify_addr )
-         * VKI_BYTES_PER_WORD;
-
-   VG_(message)(Vg_UserMsg, "checked %d bytes.", bytes_notified);
-
-   blocks_leaked    = bytes_leaked    = 0;
-   blocks_dubious   = bytes_dubious   = 0;
-   blocks_reachable = bytes_reachable = 0;
-
-   for (i = 0; i < vglc_n_shadows; i++) {
-      if (vglc_reachedness[i] == Unreached) {
-         blocks_leaked++;
-         bytes_leaked += vglc_shadows[i]->size;
-      }
-      else if (vglc_reachedness[i] == Interior) {
-         blocks_dubious++;
-         bytes_dubious += vglc_shadows[i]->size;
-      }
-      else if (vglc_reachedness[i] == Proper) {
-         blocks_reachable++;
-         bytes_reachable += vglc_shadows[i]->size;
-      }
-   }
-
-   VG_(message)(Vg_UserMsg, "");
-   VG_(message)(Vg_UserMsg, "definitely lost: %d bytes in %d blocks.", 
-                            bytes_leaked, blocks_leaked );
-   VG_(message)(Vg_UserMsg, "possibly lost:   %d bytes in %d blocks.", 
-                            bytes_dubious, blocks_dubious );
-   VG_(message)(Vg_UserMsg, "still reachable: %d bytes in %d blocks.", 
-                            bytes_reachable, blocks_reachable );
-
-
-   /* Common up the lost blocks so we can print sensible error
-      messages. */
-
-   n_lossrecords = 0;
-   errlist       = NULL;
-   for (i = 0; i < vglc_n_shadows; i++) {
-      for (p = errlist; p != NULL; p = p->next) {
-         if (p->loss_mode == vglc_reachedness[i]
-             && ec_comparer_fn (
-                   p->allocated_at, 
-                   vglc_shadows[i]->where) ) {
-            break;
-	 }
-      }
-      if (p != NULL) {
-         p->num_blocks  ++;
-         p->total_bytes += vglc_shadows[i]->size;
-      } else {
-         n_lossrecords ++;
-         p = VG_(malloc)(VG_AR_PRIVATE, sizeof(LossRecord));
-         p->loss_mode    = vglc_reachedness[i];
-         p->allocated_at = vglc_shadows[i]->where;
-         p->total_bytes  = vglc_shadows[i]->size;
-         p->num_blocks   = 1;
-         p->next         = errlist;
-         errlist         = p;
-      }
-   }
-   
-   for (i = 0; i < n_lossrecords; i++) {
-      LossRecord* p_min = NULL;
-      UInt        n_min = 0xFFFFFFFF;
-      for (p = errlist; p != NULL; p = p->next) {
-         if (p->num_blocks > 0 && p->total_bytes < n_min) {
-            n_min = p->total_bytes;
-            p_min = p;
-         }
-      }
-      vg_assert(p_min != NULL);
-
-      if ( (!VG_(clo_show_reachable)) && p_min->loss_mode == Proper) {
-         p_min->num_blocks = 0;
-         continue;
-      }
-
-      VG_(message)(Vg_UserMsg, "");
-      VG_(message)(
-         Vg_UserMsg,
-         "%d bytes in %d blocks are %s in loss record %d of %d",
-         p_min->total_bytes, p_min->num_blocks,
-         p_min->loss_mode==Unreached ? "definitely lost" :
-            (p_min->loss_mode==Interior ? "possibly lost"
-                                        : "still reachable"),
-         i+1, n_lossrecords
-      );
-      VG_(pp_ExeContext)(p_min->allocated_at);
-      p_min->num_blocks = 0;
-   }
-
-   VG_(message)(Vg_UserMsg, "");
-   VG_(message)(Vg_UserMsg, "LEAK SUMMARY:");
-   VG_(message)(Vg_UserMsg, "   definitely lost: %d bytes in %d blocks.", 
-                            bytes_leaked, blocks_leaked );
-   VG_(message)(Vg_UserMsg, "   possibly lost:   %d bytes in %d blocks.", 
-                            bytes_dubious, blocks_dubious );
-   VG_(message)(Vg_UserMsg, "   still reachable: %d bytes in %d blocks.", 
-                            bytes_reachable, blocks_reachable );
-   if (!VG_(clo_show_reachable)) {
-      VG_(message)(Vg_UserMsg, 
-         "Reachable blocks (those to which a pointer was found) are not shown.");
-      VG_(message)(Vg_UserMsg, 
-         "To see them, rerun with: --show-reachable=yes");
-   }
-   VG_(message)(Vg_UserMsg, "");
-
-   VG_(free) ( VG_AR_PRIVATE, vglc_shadows );
-   VG_(free) ( VG_AR_PRIVATE, vglc_reachedness );
-}
-
-
-/* ---------------------------------------------------------------------
-   Sanity check machinery (permanently engaged).
-   ------------------------------------------------------------------ */
-
-/* Check that nobody has spuriously claimed that the first or last 16
-   pages (64 KB) of address space have become accessible.  Failure of
-   the following do not per se indicate an internal consistency
-   problem, but they are so likely to that we really want to know
-   about it if so. */
-
-Bool VG_(first_and_last_secondaries_look_plausible) ( void )
-{
-   if (IS_DISTINGUISHED_SM(VG_(primary_map)[0])
-       && IS_DISTINGUISHED_SM(VG_(primary_map)[65535])) {
-      return True;
-   } else {
-      return False;
-   }
-}
-
-
-/* A fast sanity check -- suitable for calling circa once per
-   millisecond. */
-
-void VG_(do_sanity_checks) ( Bool force_expensive )
-{
-   Int          i;
-   Bool         do_expensive_checks;
-
-   if (VG_(sanity_level) < 1) return;
-
-   /* --- First do all the tests that we can do quickly. ---*/
-
-   VG_(sanity_fast_count)++;
-
-   /* Check that we haven't overrun our private stack. */
-   for (i = 0; i < 10; i++) {
-      vg_assert(VG_(stack)[i]
-                == ((UInt)(&VG_(stack)[i]) ^ 0xA4B3C2D1));
-      vg_assert(VG_(stack)[10000-1-i] 
-                == ((UInt)(&VG_(stack)[10000-i-1]) ^ 0xABCD4321));
-   }
-
-   /* Check stuff pertaining to the memory check system. */
-
-   if (VG_(clo_instrument)) {
-
-      /* Check that nobody has spuriously claimed that the first or
-         last 16 pages of memory have become accessible [...] */
-      vg_assert(VG_(first_and_last_secondaries_look_plausible)());
-   }
-
-   /* --- Now some more expensive checks. ---*/
-
-   /* Once every 25 times, check some more expensive stuff. */
-
-   do_expensive_checks = False;
-   if (force_expensive) 
-      do_expensive_checks = True;
-   if (VG_(sanity_level) > 1) 
-      do_expensive_checks = True;
-   if (VG_(sanity_level) == 1 
-       && (VG_(sanity_fast_count) % 25) == 0)
-      do_expensive_checks = True;
-
-   if (do_expensive_checks) {
-      VG_(sanity_slow_count)++;
-
-#     if 0
-      { void zzzmemscan(void); zzzmemscan(); }
-#     endif
-
-      if ((VG_(sanity_fast_count) % 250) == 0)
-         VG_(sanity_check_tc_tt)();
-
-      if (VG_(clo_instrument)) {
-         /* Make sure nobody changed the distinguished secondary. */
-         for (i = 0; i < 8192; i++)
-            vg_assert(vg_distinguished_secondary_map.abits[i] 
-                      == VGM_BYTE_INVALID);
-         for (i = 0; i < 65536; i++)
-            vg_assert(vg_distinguished_secondary_map.vbyte[i] 
-                      == VGM_BYTE_INVALID);
-
-         /* Make sure that the upper 3/4 of the primary map hasn't
-            been messed with. */
-         for (i = 65536; i < 262144; i++)
-            vg_assert(VG_(primary_map)[i] 
-                      == & vg_distinguished_secondary_map);
-      }
-      /* 
-      if ((VG_(sanity_fast_count) % 500) == 0) VG_(mallocSanityCheckAll)(); 
-      */
-   }
-
-   if (VG_(sanity_level) > 1) {
-      /* Check sanity of the low-level memory manager.  Note that bugs
-         in the client's code can cause this to fail, so we don't do
-         this check unless specially asked for.  And because it's
-         potentially very expensive. */
-      VG_(mallocSanityCheckAll)();
-   }
-}
-
-
-/* ---------------------------------------------------------------------
-   Debugging machinery (turn on to debug).  Something of a mess.
-   ------------------------------------------------------------------ */
-
-/* Print the value tags on the 8 integer registers & flag reg. */
-
-static void uint_to_bits ( UInt x, Char* str )
-{
-   Int i;
-   Int w = 0;
-   /* str must point to a space of at least 36 bytes. */
-   for (i = 31; i >= 0; i--) {
-      str[w++] = (x & ( ((UInt)1) << i)) ? '1' : '0';
-      if (i == 24 || i == 16 || i == 8)
-         str[w++] = ' ';
-   }
-   str[w++] = 0;
-   vg_assert(w == 36);
-}
-
-/* Caution!  Not vthread-safe; looks in VG_(baseBlock), not the thread
-   state table. */
-
-void VG_(show_reg_tags) ( void )
-{
-   Char buf1[36];
-   Char buf2[36];
-   UInt z_eax, z_ebx, z_ecx, z_edx, 
-        z_esi, z_edi, z_ebp, z_esp, z_eflags;
-
-   z_eax    = VG_(baseBlock)[VGOFF_(sh_eax)];
-   z_ebx    = VG_(baseBlock)[VGOFF_(sh_ebx)];
-   z_ecx    = VG_(baseBlock)[VGOFF_(sh_ecx)];
-   z_edx    = VG_(baseBlock)[VGOFF_(sh_edx)];
-   z_esi    = VG_(baseBlock)[VGOFF_(sh_esi)];
-   z_edi    = VG_(baseBlock)[VGOFF_(sh_edi)];
-   z_ebp    = VG_(baseBlock)[VGOFF_(sh_ebp)];
-   z_esp    = VG_(baseBlock)[VGOFF_(sh_esp)];
-   z_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
-   
-   uint_to_bits(z_eflags, buf1);
-   VG_(message)(Vg_DebugMsg, "efl %\n", buf1);
-
-   uint_to_bits(z_eax, buf1);
-   uint_to_bits(z_ebx, buf2);
-   VG_(message)(Vg_DebugMsg, "eax %s   ebx %s\n", buf1, buf2);
-
-   uint_to_bits(z_ecx, buf1);
-   uint_to_bits(z_edx, buf2);
-   VG_(message)(Vg_DebugMsg, "ecx %s   edx %s\n", buf1, buf2);
-
-   uint_to_bits(z_esi, buf1);
-   uint_to_bits(z_edi, buf2);
-   VG_(message)(Vg_DebugMsg, "esi %s   edi %s\n", buf1, buf2);
-
-   uint_to_bits(z_ebp, buf1);
-   uint_to_bits(z_esp, buf2);
-   VG_(message)(Vg_DebugMsg, "ebp %s   esp %s\n", buf1, buf2);
-}
-
-
-#if 0
-/* For debugging only.  Scan the address space and touch all allegedly
-   addressible words.  Useful for establishing where Valgrind's idea of
-   addressibility has diverged from what the kernel believes. */
-
-static 
-void zzzmemscan_notify_word ( Addr a, UInt w )
-{
-}
-
-void zzzmemscan ( void )
-{
-   Int n_notifies
-      = VG_(scan_all_valid_memory)( zzzmemscan_notify_word );
-   VG_(printf)("zzzmemscan: n_bytes = %d\n", 4 * n_notifies );
-}
-#endif
-
-
-
-
-#if 0
-static Int zzz = 0;
-
-void show_bb ( Addr eip_next )
-{
-   VG_(printf)("[%4d] ", zzz);
-   VG_(show_reg_tags)( &VG_(m_shadow );
-   VG_(translate) ( eip_next, NULL, NULL, NULL );
-}
-#endif /* 0 */
-
 /*--------------------------------------------------------------------*/
 /*--- end                                              vg_memory.c ---*/
 /*--------------------------------------------------------------------*/
+
diff --git a/coregrind/vg_messages.c b/coregrind/vg_messages.c
index 3eaf8cd..b0051bd 100644
--- a/coregrind/vg_messages.c
+++ b/coregrind/vg_messages.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 
diff --git a/coregrind/vg_mylibc.c b/coregrind/vg_mylibc.c
index e32aee8..3fe6032 100644
--- a/coregrind/vg_mylibc.c
+++ b/coregrind/vg_mylibc.c
@@ -27,7 +27,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -177,7 +177,7 @@
 {
    if (set == NULL)
       return -1;
-   if (signum < 1 && signum > VKI_KNSIG)
+   if (signum < 1 || signum > VKI_KNSIG)
       return -1;
    signum--;
    set->ws[signum / VKI_KNSIG_BPW] |= (1 << (signum % VKI_KNSIG_BPW));
@@ -188,7 +188,7 @@
 {
    if (set == NULL)
       return -1;
-   if (signum < 1 && signum > VKI_KNSIG)
+   if (signum < 1 || signum > VKI_KNSIG)
       return -1;
    signum--;
    set->ws[signum / VKI_KNSIG_BPW] &= ~(1 << (signum % VKI_KNSIG_BPW));
@@ -199,7 +199,7 @@
 {
    if (set == NULL)
       return 0;
-   if (signum < 1 && signum > VKI_KNSIG)
+   if (signum < 1 || signum > VKI_KNSIG)
       return 0;
    signum--;
    if (1 & ((set->ws[signum / VKI_KNSIG_BPW]) >> (signum % VKI_KNSIG_BPW)))
@@ -675,22 +675,49 @@
 }
 
 
-Long VG_(atoll36) ( Char* str )
+Long VG_(atoll16) ( Char* str )
 {
    Bool neg = False;
    Long n = 0;
    if (*str == '-') { str++; neg = True; };
    while (True) {
       if (*str >= '0' && *str <= '9') {
-         n = 36*n + (Long)(*str - '0');
+         n = 16*n + (Long)(*str - '0');
       }
       else 
-      if (*str >= 'A' && *str <= 'Z') {
-         n = 36*n + (Long)((*str - 'A') + 10);
+      if (*str >= 'A' && *str <= 'F') {
+         n = 16*n + (Long)((*str - 'A') + 10);
       }
       else 
-      if (*str >= 'a' && *str <= 'z') {
-         n = 36*n + (Long)((*str - 'a') + 10);
+      if (*str >= 'a' && *str <= 'f') {
+         n = 16*n + (Long)((*str - 'a') + 10);
+      }
+      else {
+	break;
+      }
+      str++;
+   }
+   if (neg) n = -n;
+   return n;
+}
+
+Long VG_(atoll36) ( UInt base, Char* str )
+{
+   Bool neg = False;
+   Long n = 0;
+   vg_assert(base >= 2 && base <= 36);
+   if (*str == '-') { str++; neg = True; };
+   while (True) {
+      if (*str >= '0' && *str <=('9' - (10 - base))) {
+         n = base*n + (Long)(*str - '0');
+      }
+      else 
+      if (base > 10 && *str >= 'A' && *str <= ('Z' - (36 - base))) {
+         n = base*n + (Long)((*str - 'A') + 10);
+      }
+      else 
+      if (base > 10 && *str >= 'a' && *str <= ('z' - (36 - base))) {
+         n = base*n + (Long)((*str - 'a') + 10);
       }
       else {
 	break;
@@ -763,9 +790,18 @@
 }
 
 
-void VG_(strncpy) ( Char* dest, const Char* src, Int ndest )
+Char* VG_(strncpy) ( Char* dest, const Char* src, Int ndest )
 {
-   VG_(strncpy_safely)( dest, src, ndest+1 ); 
+   Int i = 0;
+   while (True) {
+      if (i >= ndest) return dest;     /* reached limit */
+      dest[i] = src[i];
+      if (src[i++] == 0) {
+         /* reached NUL;  pad rest with zeroes as required */
+         while (i < ndest) dest[i++] = 0;
+         return dest;
+      }
+   }
 }
 
 
@@ -868,16 +904,22 @@
 }
 
 
-Char* VG_(strdup) ( ArenaId aid, const Char* s )
+/* Inline just for the wrapper VG_(strdup) below */
+__inline__ Char* VG_(arena_strdup) ( ArenaId aid, const Char* s )
 {
-    Int   i;
-    Int   len = VG_(strlen)(s) + 1;
-    Char* res = VG_(malloc) (aid, len);
-    for (i = 0; i < len; i++)
-       res[i] = s[i];
-    return res;
+   Int   i;
+   Int   len = VG_(strlen)(s) + 1;
+   Char* res = VG_(arena_malloc) (aid, len);
+   for (i = 0; i < len; i++)
+      res[i] = s[i];
+   return res;
 }
 
+/* Wrapper to avoid exposing skins to ArenaId's */
+Char* VG_(strdup) ( const Char* s )
+{
+   return VG_(arena_strdup) ( VG_AR_SKIN, s ); 
+}
 
 /* ---------------------------------------------------------------------
    A simple string matching routine, purloined from Hugs98.
@@ -966,66 +1008,32 @@
    VG_(exit)(1);
 }
 
+void VG_(skin_error) ( Char* str )
+{
+   VG_(printf)("\n%s: misconfigured skin:\n   %s\n\n", VG_(needs).name, str);
+   //VG_(printf)("Please report this bug to me at: %s\n\n", VG_EMAIL_ADDR);
+   VG_(shutdown_logging)();
+   VG_(exit)(1);
+}
+
 
 /* ---------------------------------------------------------------------
    Primitive support for reading files.
    ------------------------------------------------------------------ */
 
 /* Returns -1 on failure. */
-Int VG_(open_read) ( Char* pathname )
-{
+Int VG_(open) ( const Char* pathname, Int flags, Int mode )
+{  
    Int fd;
-   /* VG_(printf)("vg_open_read %s\n", pathname ); */
 
+   /* (old comment, not sure if it still applies  NJN 2002-sep-09) */
    /* This gets a segmentation fault if pathname isn't a valid file.
       I don't know why.  It seems like the call to open is getting
       intercepted and messed with by glibc ... */
    /* fd = open( pathname, O_RDONLY ); */
    /* ... so we go direct to the horse's mouth, which seems to work
       ok: */
-   const int O_RDONLY = 0; /* See /usr/include/bits/fcntl.h */
-   fd = vg_do_syscall3(__NR_open, (UInt)pathname, O_RDONLY, 0);
-   /* VG_(printf)("result = %d\n", fd); */
-   if (VG_(is_kerror)(fd)) fd = -1;
-   return fd;
-}
-
-/* Returns -1 on failure. */
-static Int VG_(chmod_u_rw) ( Int fd )
-{
-   Int res;
-   const int O_IRUSR_IWUSR = 000600; /* See /usr/include/cpio.h */
-   res = vg_do_syscall2(__NR_fchmod, fd, O_IRUSR_IWUSR);
-   if (VG_(is_kerror)(res)) res = -1;
-   return res;
-}
- 
-/* Returns -1 on failure. */
-Int VG_(create_and_write) ( Char* pathname )
-{
-   Int fd;
-
-   const int O_CR_AND_WR_ONLY = 0101; /* See /usr/include/bits/fcntl.h */
-   fd = vg_do_syscall3(__NR_open, (UInt)pathname, O_CR_AND_WR_ONLY, 0);
-   /* VG_(printf)("result = %d\n", fd); */
-   if (VG_(is_kerror)(fd)) {
-      fd = -1;
-   } else {
-      VG_(chmod_u_rw)(fd);
-      if (VG_(is_kerror)(fd)) {
-         fd = -1;
-      }
-   }
-   return fd;
-}
- 
-/* Returns -1 on failure. */
-Int VG_(open_write) ( Char* pathname )
-{  
-   Int fd;
-
-   const int O_WRONLY_AND_TRUNC = 01001; /* See /usr/include/bits/fcntl.h */
-   fd = vg_do_syscall3(__NR_open, (UInt)pathname, O_WRONLY_AND_TRUNC, 0);
+   fd = vg_do_syscall3(__NR_open, (UInt)pathname, flags, mode);
    /* VG_(printf)("result = %d\n", fd); */
    if (VG_(is_kerror)(fd)) {
       fd = -1;
@@ -1068,7 +1076,7 @@
 /* Misc functions looking for a proper home. */
 
 /* We do getenv without libc's help by snooping around in
-   VG_(client_env) as determined at startup time. */
+   VG_(client_envp) as determined at startup time. */
 Char* VG_(getenv) ( Char* varname )
 {
    Int i, n;
@@ -1266,11 +1274,40 @@
             tot_alloc, nBytes, p, ((char*)p) + nBytes - 1, who );
       return p;
    }
-   VG_(printf)("vg_get_memory_from_mmap failed on request of %d\n", 
+   VG_(printf)("\n");
+   VG_(printf)("VG_(get_memory_from_mmap): request for %d bytes failed.\n", 
                nBytes);
-   VG_(panic)("vg_get_memory_from_mmap: out of memory!  Fatal!  Bye!\n");
+   VG_(printf)("VG_(get_memory_from_mmap): %d bytes already allocated.\n", 
+               tot_alloc);
+   VG_(printf)("\n");
+   VG_(printf)("This may mean that you have run out of swap space,\n");
+   VG_(printf)("since running programs on valgrind increases their memory\n");
+   VG_(printf)("usage at least 3 times.  You might want to use 'top'\n");
+   VG_(printf)("to determine whether you really have run out of swap.\n");
+   VG_(printf)("If so, you may be able to work around it by adding a\n");
+   VG_(printf)("temporary swap file -- this is easier than finding a\n");
+   VG_(printf)("new swap partition.  Go ask your sysadmin(s) [politely!]\n");
+   VG_(printf)("\n");
+   VG_(printf)("VG_(get_memory_from_mmap): out of memory!  Fatal!  Bye!\n");
+   VG_(printf)("\n");
+   VG_(exit)(1);
 }
 
+/* ---------------------------------------------------------------------
+   Generally useful...
+   ------------------------------------------------------------------ */
+
+Int VG_(log2) ( Int x ) 
+{
+   Int i;
+   /* Any more than 32 and we overflow anyway... */
+   for (i = 0; i < 32; i++) {
+      if (1 << i == x) return i;
+   }
+   return -1;
+}
+
+
 
 /*--------------------------------------------------------------------*/
 /*--- end                                              vg_mylibc.c ---*/
diff --git a/coregrind/vg_procselfmaps.c b/coregrind/vg_procselfmaps.c
index ceba7b3..840f34b 100644
--- a/coregrind/vg_procselfmaps.c
+++ b/coregrind/vg_procselfmaps.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 
@@ -102,7 +102,7 @@
    UChar  rr, ww, xx, pp, ch;
 
    /* Read the initial memory mapping from the /proc filesystem. */
-   fd = VG_(open_read) ( "/proc/self/maps" );
+   fd = VG_(open) ( "/proc/self/maps", VKI_O_RDONLY, 0 );
    if (fd == -1) {
       VG_(message)(Vg_UserMsg, "FATAL: can't open /proc/self/maps");
       VG_(exit)(1);
@@ -172,6 +172,7 @@
        VG_(exit)(1);
 
     read_line_ok:
+
       /* Try and find the name of the file mapped to this segment, if
          it exists. */
       while (procmap_buf[i] != '\n' && i < M_PROCMAP_BUF-1) i++;
diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c
index 0ad56b1..b65426b 100644
--- a/coregrind/vg_scheduler.c
+++ b/coregrind/vg_scheduler.c
@@ -25,13 +25,12 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
-#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
-                         VG_USERREQ__DO_LEAK_CHECK */
+#include "valgrind.h" /* for VG_USERREQ__RUNNING_ON_VALGRIND and
+                             VG_USERREQ__DISCARD_TRANSLATIONS */
 
 /* BORKAGE/ISSUES as of 29 May 02
 
@@ -126,6 +125,10 @@
          happens, this entire record is marked as no longer in use, by
          making the fd field be -1.  */
       Bool     ready; 
+
+      /* The result from SK_(pre_blocking_syscall)();  is passed to
+       * SK_(post_blocking_syscall)(). */
+      void*    pre_result;
    }
    VgWaitedOnFd;
 
@@ -149,12 +152,13 @@
 typedef UInt ThreadKey;
 
 
+UInt VG_(written_shadow_reg);
+
 /* Forwards */
 static void do_client_request ( ThreadId tid );
 static void scheduler_sanity ( void );
 static void do_pthread_cond_timedwait_TIMEOUT ( ThreadId tid );
 
-
 /* ---------------------------------------------------------------------
    Helper functions for the scheduler.
    ------------------------------------------------------------------ */
@@ -181,11 +185,12 @@
 
 
 /* For constructing error messages only: try and identify a thread
-   whose stack this address currently falls within, or return
-   VG_INVALID_THREADID if it doesn't.  A small complication is dealing
-   with any currently VG_(baseBlock)-resident thread. 
+   whose stack satisfies the predicate p, or return VG_INVALID_THREADID
+   if none do.  A small complication is dealing with any currently
+   VG_(baseBlock)-resident thread. 
 */
-ThreadId VG_(identify_stack_addr)( Addr a )
+ThreadId VG_(any_matching_thread_stack)
+              ( Bool (*p) ( Addr stack_min, Addr stack_max ))
 {
    ThreadId tid, tid_to_skip;
 
@@ -195,8 +200,8 @@
       VG_(baseBlock). */
    if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) {
       tid = vg_tid_currently_in_baseBlock;
-      if (VG_(baseBlock)[VGOFF_(m_esp)] <= a
-          && a <= VG_(threads)[tid].stack_highest_word) 
+      if ( p ( VG_(baseBlock)[VGOFF_(m_esp)], 
+               VG_(threads)[tid].stack_highest_word) )
          return tid;
       else
          tid_to_skip = tid;
@@ -205,8 +210,8 @@
    for (tid = 1; tid < VG_N_THREADS; tid++) {
       if (VG_(threads)[tid].status == VgTs_Empty) continue;
       if (tid == tid_to_skip) continue;
-      if (VG_(threads)[tid].m_esp <= a 
-          && a <= VG_(threads)[tid].stack_highest_word)
+      if ( p ( VG_(threads)[tid].m_esp,
+               VG_(threads)[tid].stack_highest_word) )
          return tid;
    }
    return VG_INVALID_THREADID;
@@ -238,14 +243,16 @@
                   VG_(threads)[i].associated_mx,
                   VG_(threads)[i].associated_cv );
       VG_(pp_ExeContext)( 
-         VG_(get_ExeContext)( False, VG_(threads)[i].m_eip, 
-                                     VG_(threads)[i].m_ebp ));
+         VG_(get_ExeContext2)( VG_(threads)[i].m_eip, VG_(threads)[i].m_ebp,
+                               VG_(threads)[i].m_esp, 
+                               VG_(threads)[i].stack_highest_word)
+      );
    }
    VG_(printf)("\n");
 }
 
 static
-void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
+void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no, void* pre_res )
 {
    Int i;
 
@@ -265,6 +272,7 @@
    vg_waiting_fds[i].tid        = tid;
    vg_waiting_fds[i].ready      = False;
    vg_waiting_fds[i].syscall_no = syscall_no;
+   vg_waiting_fds[i].pre_result = pre_res;
 }
 
 
@@ -325,7 +333,7 @@
                        ( trans_addr, trans_size );
    tte.mru_epoch  = VG_(current_epoch);
    /* Free the intermediary -- was allocated by VG_(emit_code). */
-   VG_(jitfree)( (void*)trans_addr );
+   VG_(arena_free)( VG_AR_JITTER, (void*)trans_addr );
    /* Add to trans tab and set back pointer. */
    VG_(add_to_trans_tab) ( &tte );
    /* Update stats. */
@@ -353,6 +361,11 @@
    /*NOTREACHED*/
 }
 
+ThreadState* VG_(get_ThreadState)( ThreadId tid )
+{
+   vg_assert(tid >= 0 && tid < VG_N_THREADS);
+   return & VG_(threads)[tid];
+}
 
 ThreadState* VG_(get_current_thread_state) ( void )
 {
@@ -367,6 +380,15 @@
    return vg_tid_currently_in_baseBlock;
 }
 
+ThreadId VG_(get_current_tid_1_if_root) ( void )
+{
+   if (0 == vg_tid_currently_in_baseBlock)
+      return 1;     /* root thread */
+    
+   vg_assert(VG_(is_valid_tid)(vg_tid_currently_in_baseBlock));
+   return vg_tid_currently_in_baseBlock;
+}
+
 
 /* Copy the saved state of a thread into VG_(baseBlock), ready for it
    to be run. */
@@ -390,15 +412,31 @@
    for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
       VG_(baseBlock)[VGOFF_(m_fpustate) + i] = VG_(threads)[tid].m_fpu[i];
 
-   VG_(baseBlock)[VGOFF_(sh_eax)] = VG_(threads)[tid].sh_eax;
-   VG_(baseBlock)[VGOFF_(sh_ebx)] = VG_(threads)[tid].sh_ebx;
-   VG_(baseBlock)[VGOFF_(sh_ecx)] = VG_(threads)[tid].sh_ecx;
-   VG_(baseBlock)[VGOFF_(sh_edx)] = VG_(threads)[tid].sh_edx;
-   VG_(baseBlock)[VGOFF_(sh_esi)] = VG_(threads)[tid].sh_esi;
-   VG_(baseBlock)[VGOFF_(sh_edi)] = VG_(threads)[tid].sh_edi;
-   VG_(baseBlock)[VGOFF_(sh_ebp)] = VG_(threads)[tid].sh_ebp;
-   VG_(baseBlock)[VGOFF_(sh_esp)] = VG_(threads)[tid].sh_esp;
-   VG_(baseBlock)[VGOFF_(sh_eflags)] = VG_(threads)[tid].sh_eflags;
+   if (VG_(needs).shadow_regs) {
+      VG_(baseBlock)[VGOFF_(sh_eax)] = VG_(threads)[tid].sh_eax;
+      VG_(baseBlock)[VGOFF_(sh_ebx)] = VG_(threads)[tid].sh_ebx;
+      VG_(baseBlock)[VGOFF_(sh_ecx)] = VG_(threads)[tid].sh_ecx;
+      VG_(baseBlock)[VGOFF_(sh_edx)] = VG_(threads)[tid].sh_edx;
+      VG_(baseBlock)[VGOFF_(sh_esi)] = VG_(threads)[tid].sh_esi;
+      VG_(baseBlock)[VGOFF_(sh_edi)] = VG_(threads)[tid].sh_edi;
+      VG_(baseBlock)[VGOFF_(sh_ebp)] = VG_(threads)[tid].sh_ebp;
+      VG_(baseBlock)[VGOFF_(sh_esp)] = VG_(threads)[tid].sh_esp;
+      VG_(baseBlock)[VGOFF_(sh_eflags)] = VG_(threads)[tid].sh_eflags;
+   } else {
+      /* Fields shouldn't be used -- check their values haven't changed. */
+      /* Nb: they are written to by some macros like SET_EDX, but they
+       *     should just write VG_UNUSED_SHADOW_REG_VALUE. */
+      vg_assert(
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_eax &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_ebx &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_ecx &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_edx &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_esi &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_edi &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_ebp &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_esp &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_eflags);
+   }
 
    vg_tid_currently_in_baseBlock = tid;
 }
@@ -432,15 +470,28 @@
    for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
       VG_(threads)[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
 
-   VG_(threads)[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
-   VG_(threads)[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
-   VG_(threads)[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
-   VG_(threads)[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
-   VG_(threads)[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
-   VG_(threads)[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
-   VG_(threads)[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
-   VG_(threads)[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
-   VG_(threads)[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
+   if (VG_(needs).shadow_regs) {
+      VG_(threads)[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
+      VG_(threads)[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
+      VG_(threads)[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
+      VG_(threads)[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
+      VG_(threads)[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
+      VG_(threads)[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
+      VG_(threads)[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
+      VG_(threads)[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
+      VG_(threads)[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
+   } else {
+      /* Fill with recognisable junk */
+      VG_(threads)[tid].sh_eax =
+      VG_(threads)[tid].sh_ebx =
+      VG_(threads)[tid].sh_ecx =
+      VG_(threads)[tid].sh_edx =
+      VG_(threads)[tid].sh_esi =
+      VG_(threads)[tid].sh_edi =
+      VG_(threads)[tid].sh_ebp =
+      VG_(threads)[tid].sh_esp = 
+      VG_(threads)[tid].sh_eflags = VG_UNUSED_SHADOW_REG_VALUE;
+   }
 
    /* Fill it up with junk. */
    VG_(baseBlock)[VGOFF_(m_eax)] = junk;
@@ -491,7 +542,7 @@
    vg_assert(!VG_(scheduler_jmpbuf_valid));
 
    VG_(save_thread_state) ( tid );
-   VGP_POPCC;
+   VGP_POPCC(VgpRun);
    return trc;
 }
 
@@ -566,14 +617,18 @@
 
    if (VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_1)
        || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_2) 
-       || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_3)) {
+       || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_3)
+       || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_4)) {
       /* Jolly good! */
    } else {
-      VG_(printf)("%%esp at startup = %p is not near %p, %p or %p; aborting\n", 
-                  (void*)startup_esp, 
-                  (void*)VG_STARTUP_STACK_BASE_1,
-                  (void*)VG_STARTUP_STACK_BASE_2,
-                  (void*)VG_STARTUP_STACK_BASE_3 );
+      VG_(printf)(
+         "%%esp at startup = %p is not near %p, %p, %p or %p; aborting\n", 
+         (void*)startup_esp, 
+         (void*)VG_STARTUP_STACK_BASE_1,
+         (void*)VG_STARTUP_STACK_BASE_2,
+         (void*)VG_STARTUP_STACK_BASE_3,
+         (void*)VG_STARTUP_STACK_BASE_4 
+      );
       VG_(panic)("unexpected %esp at startup");
    }
 
@@ -751,11 +806,12 @@
 static
 void sched_do_syscall ( ThreadId tid )
 {
-   UInt saved_eax;
-   UInt res, syscall_no;
-   UInt fd;
-   Bool orig_fd_blockness;
-   Char msg_buf[100];
+   UInt  saved_eax;
+   UInt  res, syscall_no;
+   UInt  fd;
+   void* pre_res;
+   Bool  orig_fd_blockness;
+   Char  msg_buf[100];
 
    vg_assert(VG_(is_valid_tid)(tid));
    vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
@@ -797,12 +853,13 @@
 
    /* Deal with error case immediately. */
    if (!fd_is_valid(fd)) {
-      VG_(message)(Vg_UserMsg, 
-         "Warning: invalid file descriptor %d in syscall %s",
-         fd, syscall_no == __NR_read ? "read()" : "write()" );
-      VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
+      if (VG_(needs).core_errors)
+         VG_(message)(Vg_UserMsg, 
+            "Warning: invalid file descriptor %d in syscall %s",
+            fd, syscall_no == __NR_read ? "read()" : "write()" );
+      pre_res = VG_(pre_known_blocking_syscall)(tid, syscall_no);
       KERNEL_DO_SYSCALL(tid, res);
-      VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
+      VG_(post_known_blocking_syscall)(tid, syscall_no, pre_res, res);
       /* We're still runnable. */
       vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
       return;
@@ -813,7 +870,7 @@
    orig_fd_blockness = fd_is_blockful(fd);
    set_fd_nonblocking(fd);
    vg_assert(!fd_is_blockful(fd));
-   VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
+   pre_res = VG_(pre_known_blocking_syscall)(tid, syscall_no);
 
    /* This trashes the thread's %eax; we have to preserve it. */
    saved_eax = VG_(threads)[tid].m_eax;
@@ -834,7 +891,7 @@
              the I/O completion -- the client is.  So don't file a 
              completion-wait entry. 
       */
-      VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
+      VG_(post_known_blocking_syscall)(tid, syscall_no, pre_res, res);
       /* We're still runnable. */
       vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
 
@@ -848,7 +905,8 @@
       /* Put this fd in a table of fds on which we are waiting for
          completion. The arguments for select() later are constructed
          from this table.  */
-      add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
+      add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */,
+                     pre_res);
       /* Deschedule thread until an I/O completion happens. */
       VG_(threads)[tid].status = VgTs_WaitFD;
       if (VG_(clo_trace_sched)) {
@@ -1042,6 +1100,7 @@
 void complete_blocked_syscalls ( void )
 {
    Int      fd, i, res, syscall_no;
+   void*    pre_res;
    ThreadId tid;
    Char     msg_buf[100];
 
@@ -1069,6 +1128,8 @@
       syscall_no = vg_waiting_fds[i].syscall_no;
       vg_assert(syscall_no == VG_(threads)[tid].m_eax);
 
+      pre_res = vg_waiting_fds[i].pre_result;
+
       /* In a rare case pertaining to writing into a pipe, write()
          will block when asked to write > 4096 bytes even though the
          kernel claims, when asked via select(), that blocking will
@@ -1086,7 +1147,7 @@
       }
 
       KERNEL_DO_SYSCALL(tid,res);
-      VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
+      VG_(post_known_blocking_syscall)(tid, syscall_no, pre_res, res);
 
       /* Reschedule. */
       VG_(threads)[tid].status = VgTs_Runnable;
@@ -1125,7 +1186,7 @@
    struct vki_timespec req;
    struct vki_timespec rem;
    req.tv_sec = 0;
-   req.tv_nsec = 20 * 1000 * 1000;
+   req.tv_nsec = 10 * 1000 * 1000;
    res = VG_(nanosleep)( &req, &rem );   
    vg_assert(res == 0 /* ok */ || res == 1 /* interrupted by signal */);
 }
@@ -1266,6 +1327,8 @@
       if (0)
          VG_(printf)("SCHED: tid %d\n", tid);
 
+      VG_TRACK( thread_run, tid );
+
       /* Figure out how many bbs to ask vg_run_innerloop to do.  Note
          that it decrements the counter before testing it for zero, so
          that if VG_(dispatch_ctr) is set to N you get at most N-1
@@ -1326,7 +1389,8 @@
                = VG_(search_transtab) ( VG_(threads)[tid].m_eip );
             if (trans_addr == (Addr)0) {
                /* Not found; we need to request a translation. */
-               create_translation_for( tid, VG_(threads)[tid].m_eip ); 
+               create_translation_for( 
+                  tid, VG_(threads)[tid].m_eip ); 
                trans_addr = VG_(search_transtab) ( VG_(threads)[tid].m_eip ); 
                if (trans_addr == (Addr)0)
                   VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
@@ -1382,8 +1446,13 @@
                If not valgrinding (cachegrinding, etc) don't do this.
                __libc_freeres does some invalid frees which crash
                the unprotected malloc/free system. */
+
+            /* If __NR_exit, remember the supplied argument. */
+            if (VG_(threads)[tid].m_eax == __NR_exit)
+               VG_(exitcode) = VG_(threads)[tid].m_ebx; /* syscall arg1 */
+
             if (VG_(threads)[tid].m_eax == __NR_exit 
-                && !VG_(clo_instrument)) {
+                && ! VG_(needs).run_libc_freeres) {
                if (VG_(clo_trace_syscalls) || VG_(clo_trace_sched)) {
                   VG_(message)(Vg_DebugMsg, 
                      "Caught __NR_exit; quitting");
@@ -1392,7 +1461,7 @@
             }
 
             if (VG_(threads)[tid].m_eax == __NR_exit) {
-               vg_assert(VG_(clo_instrument));
+               vg_assert(VG_(needs).run_libc_freeres);
                if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched)) {
                   VG_(message)(Vg_DebugMsg, 
                      "Caught __NR_exit; running __libc_freeres()");
@@ -1574,10 +1643,10 @@
    vki_ksigset_t irrelevant_sigmask;
    vg_assert(VG_(is_valid_or_empty_tid)(tid));
    vg_assert(VG_(threads)[tid].status == VgTs_Empty);
-   /* Mark its stack no-access */
-   if (VG_(clo_instrument) && tid != 1)
-      VGM_(make_noaccess)( VG_(threads)[tid].stack_base,
-                           VG_(threads)[tid].stack_size );
+   /* Its stack is now off-limits */
+   VG_TRACK( die_mem_stack, VG_(threads)[tid].stack_base,
+                            VG_(threads)[tid].stack_size );
+
    /* Forget about any pending signals directed specifically at this
       thread, and get rid of signal handlers specifically arranged for
       this thread. */
@@ -1620,17 +1689,14 @@
       thread_return = VG_(threads)[jnr].joiner_thread_return;
       if (thread_return != NULL) {
          /* CHECK thread_return writable */
-         if (VG_(clo_instrument)
-             && !VGM_(check_writable)( (Addr)thread_return, 
-                                       sizeof(void*), NULL))
-            VG_(record_pthread_err)( jnr, 
-               "pthread_join: thread_return points to invalid location");
+         VG_TRACK( pre_mem_write, Vg_CorePThread, &VG_(threads)[jnr],
+                                  "pthread_join: thread_return",
+                                  (Addr)thread_return, sizeof(void*));
 
          *thread_return = VG_(threads)[jee].joinee_retval;
          /* Not really right, since it makes the thread's return value
             appear to be defined even if it isn't. */
-         if (VG_(clo_instrument))
-            VGM_(make_readable)( (Addr)thread_return, sizeof(void*) );
+         VG_TRACK( post_mem_write, (Addr)thread_return, sizeof(void*) );
       }
 
       /* Joinee is discarded */
@@ -1716,8 +1782,8 @@
    }
    sp--;
    *cu = VG_(threads)[tid].custack[sp];
-   if (VG_(clo_instrument))
-      VGM_(make_readable)( (Addr)cu, sizeof(CleanupEntry) );
+   // JJJ: no corresponding pre_mem_write check??
+   VG_TRACK( post_mem_write, (Addr)cu, sizeof(CleanupEntry) );
    VG_(threads)[tid].custack_used = sp;
    SET_EDX(tid, 0);
 }
@@ -1884,7 +1950,7 @@
             "set_cancelpend for invalid tid %d", cee);
          print_sched_event(tid, msg_buf);
       }
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_cancel: target thread does not exist, or invalid");
       SET_EDX(tid, -VKI_ESRCH);
       return;
@@ -1919,7 +1985,7 @@
    vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
 
    if (jee == tid) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_join: attempt to join to self");
       SET_EDX(tid, EDEADLK); /* libc constant, not a kernel one */
       VG_(threads)[tid].status = VgTs_Runnable;
@@ -1935,7 +2001,7 @@
        || jee >= VG_N_THREADS
        || VG_(threads)[jee].status == VgTs_Empty) {
       /* Invalid thread to join to. */
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_join: target thread does not exist, or invalid");
       SET_EDX(tid, EINVAL);
       VG_(threads)[tid].status = VgTs_Runnable;
@@ -1948,7 +2014,7 @@
       if (VG_(threads)[i].status == VgTs_WaitJoinee
           && VG_(threads)[i].joiner_jee_tid == jee) {
          /* Someone already did join on this thread */
-         VG_(record_pthread_err)( tid, 
+         VG_(record_pthread_error)( tid, 
             "pthread_join: another thread already "
             "in join-wait for target thread");
          SET_EDX(tid, EINVAL);
@@ -2074,33 +2140,39 @@
                      - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4  ??? */;
    }
 
-   VG_(threads)[tid].m_esp 
-      = VG_(threads)[tid].stack_base 
-        + VG_(threads)[tid].stack_size
-        - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
+   /* Having got memory to hold the thread's stack:
+      - set %esp as base + size
+      - mark everything below %esp inaccessible
+      - mark redzone at stack end inaccessible
+    */
+   VG_(threads)[tid].m_esp = VG_(threads)[tid].stack_base 
+                           + VG_(threads)[tid].stack_size
+                           - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
 
-   if (VG_(clo_instrument))
-      VGM_(make_noaccess)( VG_(threads)[tid].m_esp, 
-                           VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
+   VG_TRACK ( die_mem_stack, VG_(threads)[tid].stack_base, 
+                           + new_stk_szb - VG_AR_CLIENT_STACKBASE_REDZONE_SZB);
+   VG_TRACK ( ban_mem_stack, VG_(threads)[tid].m_esp, 
+                             VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
    
-   /* push arg */
-   VG_(threads)[tid].m_esp -= 4;
-   * (UInt*)(VG_(threads)[tid].m_esp) = (UInt)arg;
-
-   /* push (bogus) return address */
-   VG_(threads)[tid].m_esp -= 4;
+   /* push two args */
+   VG_(threads)[tid].m_esp -= 8;
+   VG_TRACK ( new_mem_stack, (Addr)VG_(threads)[tid].m_esp, 2 * 4 );
+   VG_TRACK ( pre_mem_write, Vg_CorePThread, & VG_(threads)[tid], 
+                             "new thread: stack",
+                             (Addr)VG_(threads)[tid].m_esp, 2 * 4 );
+ 
+   /* push arg and (bogus) return address */
+   * (UInt*)(VG_(threads)[tid].m_esp+4) = (UInt)arg;
    * (UInt*)(VG_(threads)[tid].m_esp) 
       = (UInt)&do__apply_in_new_thread_bogusRA;
 
-   if (VG_(clo_instrument))
-      VGM_(make_readable)( VG_(threads)[tid].m_esp, 2 * 4 );
+   VG_TRACK ( post_mem_write, VG_(threads)[tid].m_esp, 2 * 4 );
 
    /* this is where we start */
    VG_(threads)[tid].m_eip = (UInt)fn;
 
    if (VG_(clo_trace_sched)) {
-      VG_(sprintf)(msg_buf,
-         "new thread, created by %d", parent_tid );
+      VG_(sprintf)(msg_buf, "new thread, created by %d", parent_tid );
       print_sched_event(tid, msg_buf);
    }
 
@@ -2230,7 +2302,7 @@
 
    /* POSIX doesn't mandate this, but for sanity ... */
    if (mutex == NULL) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_mutex_lock/trylock: mutex is NULL");
       SET_EDX(tid, EINVAL);
       return;
@@ -2250,7 +2322,7 @@
          if (mutex->__m_count >= 0) break;
          /* else fall thru */
       default:
-         VG_(record_pthread_err)( tid, 
+         VG_(record_pthread_error)( tid, 
             "pthread_mutex_lock/trylock: mutex is invalid");
          SET_EDX(tid, EINVAL);
          return;
@@ -2304,6 +2376,9 @@
       /* We get it! [for the first time]. */
       mutex->__m_count = 1;
       mutex->__m_owner = (_pthread_descr)tid;
+
+      VG_TRACK( post_mutex_lock, tid, mutex);
+
       /* return 0 (success). */
       SET_EDX(tid, 0);
    }
@@ -2327,7 +2402,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (mutex == NULL) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_mutex_unlock: mutex is NULL");
       SET_EDX(tid, EINVAL);
       return;
@@ -2347,7 +2422,7 @@
          if (mutex->__m_count >= 0) break;
          /* else fall thru */
       default:
-         VG_(record_pthread_err)( tid, 
+         VG_(record_pthread_error)( tid, 
             "pthread_mutex_unlock: mutex is invalid");
          SET_EDX(tid, EINVAL);
          return;
@@ -2356,7 +2431,7 @@
    /* Barf if we don't currently hold the mutex. */
    if (mutex->__m_count == 0) {
       /* nobody holds it */
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_mutex_unlock: mutex is not locked");
       SET_EDX(tid, EPERM);
       return;
@@ -2364,7 +2439,7 @@
 
    if ((ThreadId)mutex->__m_owner != tid) {
       /* we don't hold it */
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_mutex_unlock: mutex is locked by a different thread");
       SET_EDX(tid, EPERM);
       return;
@@ -2384,6 +2459,8 @@
    vg_assert(mutex->__m_count == 1);
    vg_assert((ThreadId)mutex->__m_owner == tid);
 
+   VG_TRACK( post_mutex_unlock, tid, mutex);
+
    /* Release at max one thread waiting on this mutex. */
    release_one_thread_waiting_on_mutex ( mutex, "pthread_mutex_lock" );
 
@@ -2561,7 +2638,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (mutex == NULL || cond == NULL) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_cond_wait/timedwait: cond or mutex is NULL");
       SET_EDX(tid, EINVAL);
       return;
@@ -2581,7 +2658,7 @@
          if (mutex->__m_count >= 0) break;
          /* else fall thru */
       default:
-         VG_(record_pthread_err)( tid, 
+         VG_(record_pthread_error)( tid, 
             "pthread_cond_wait/timedwait: mutex is invalid");
          SET_EDX(tid, EINVAL);
          return;
@@ -2590,7 +2667,7 @@
    /* Barf if we don't currently hold the mutex. */
    if (mutex->__m_count == 0 /* nobody holds it */
        || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
-         VG_(record_pthread_err)( tid, 
+         VG_(record_pthread_error)( tid, 
             "pthread_cond_wait/timedwait: mutex is unlocked "
             "or is locked but not owned by thread");
       SET_EDX(tid, EINVAL);
@@ -2636,7 +2713,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (cond == NULL) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_cond_signal/broadcast: cond is NULL");
       SET_EDX(tid, EINVAL);
       return;
@@ -2699,14 +2776,11 @@
    vg_thread_keys[i].destructor = destructor;
 
    /* check key for addressibility */
-   if (VG_(clo_instrument)
-       && !VGM_(check_writable)( (Addr)key, 
-                                 sizeof(pthread_key_t), NULL))
-      VG_(record_pthread_err)( tid, 
-         "pthread_key_create: key points to invalid location");
+   VG_TRACK( pre_mem_write, Vg_CorePThread, &VG_(threads)[tid], 
+                            "pthread_key_create: key",
+                            (Addr)key, sizeof(pthread_key_t));
    *key = i;
-   if (VG_(clo_instrument))
-      VGM_(make_readable)( (Addr)key, sizeof(pthread_key_t) );
+   VG_TRACK( post_mem_write, (Addr)key, sizeof(pthread_key_t) );
 
    SET_EDX(tid, 0);
 }
@@ -2726,7 +2800,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
    
    if (!is_valid_key(key)) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_key_delete: key is invalid");
       SET_EDX(tid, EINVAL);
       return;
@@ -2760,7 +2834,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (!is_valid_key(key)) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_getspecific: key is invalid");
       SET_EDX(tid, (UInt)NULL);
       return;
@@ -2786,7 +2860,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (!is_valid_key(key)) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_setspecific: key is invalid");
       SET_EDX(tid, EINVAL);
       return;
@@ -2814,14 +2888,16 @@
    }
    vg_assert(VG_(is_valid_tid)(tid));
    vg_assert(key >= 0 && key < VG_N_THREAD_KEYS);
+
+   // JJJ: no pre_mem_write check??
+   
    if (!vg_thread_keys[key].inuse) {
       SET_EDX(tid, -1);
       return;
    }
    cu->fn = vg_thread_keys[key].destructor;
    cu->arg = VG_(threads)[tid].specifics[key];
-   if (VG_(clo_instrument))
-      VGM_(make_readable)( (Addr)cu, sizeof(CleanupEntry) );
+   VG_TRACK( post_mem_write, (Addr)cu, sizeof(CleanupEntry) );
    SET_EDX(tid, 0);
 }
 
@@ -2852,27 +2928,19 @@
    vg_assert(VG_(is_valid_tid)(tid) 
              && VG_(threads)[tid].status == VgTs_Runnable);
 
-   if (VG_(clo_instrument)) {
-      /* check newmask/oldmask are addressible/defined */
-      if (newmask
-          && !VGM_(check_readable)( (Addr)newmask, 
-                                    sizeof(vki_ksigset_t), NULL))
-         VG_(record_pthread_err)( tid, 
-            "pthread_sigmask: newmask contains "
-            "unaddressible or undefined bytes");
-      if (oldmask
-          && !VGM_(check_writable)( (Addr)oldmask, 
-                                    sizeof(vki_ksigset_t), NULL))
-         VG_(record_pthread_err)( tid, 
-            "pthread_sigmask: oldmask contains "
-            "unaddressible bytes");
-   }
+   if (newmask)
+      VG_TRACK( pre_mem_read, Vg_CorePThread, &VG_(threads)[tid],
+                              "pthread_sigmask: newmask",
+                              (Addr)newmask, sizeof(vki_ksigset_t));
+   if (oldmask)
+      VG_TRACK( pre_mem_write, Vg_CorePThread, &VG_(threads)[tid],
+                               "pthread_sigmask: oldmask",
+                               (Addr)oldmask, sizeof(vki_ksigset_t));
 
    VG_(do_pthread_sigmask_SCSS_upd) ( tid, vki_how, newmask, oldmask );
 
-   if (oldmask && VG_(clo_instrument)) {
-      VGM_(make_readable)( (Addr)oldmask, sizeof(vki_ksigset_t) );
-   }
+   if (oldmask)
+      VG_TRACK( post_mem_write, (Addr)oldmask, sizeof(vki_ksigset_t) );
 
    /* Success. */
    SET_EDX(tid, 0);
@@ -2924,7 +2992,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (!VG_(is_valid_tid)(thread)) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_kill: invalid target thread");
       SET_EDX(tid, -VKI_ESRCH);
       return;
@@ -2994,18 +3062,11 @@
 
    vg_assert(VG_(is_valid_tid)(tid) 
              && VG_(threads)[tid].status == VgTs_Runnable);
+   VG_TRACK( pre_mem_read, Vg_CorePThread, &VG_(threads)[tid],
+                           "pthread_atfork: prepare/parent/child",
+                           (Addr)fh, sizeof(ForkHandlerEntry));
 
-   if (VG_(clo_instrument)) {
-      /* check fh is addressible/defined */
-      if (!VGM_(check_readable)( (Addr)fh,
-                                 sizeof(ForkHandlerEntry), NULL)) {
-         VG_(record_pthread_err)( tid, 
-            "pthread_atfork: prepare/parent/child contains "
-            "unaddressible or undefined bytes");
-      }
-   }
-
-   if (n < 0 && n >= VG_N_FORKHANDLERSTACK) {
+   if (n < 0 || n >= VG_N_FORKHANDLERSTACK) {
       SET_EDX(tid, -1);
       return;
    } 
@@ -3027,18 +3088,11 @@
 
    vg_assert(VG_(is_valid_tid)(tid) 
              && VG_(threads)[tid].status == VgTs_Runnable);
+   VG_TRACK( pre_mem_write, Vg_CorePThread, &VG_(threads)[tid],
+                            "fork: prepare/parent/child",
+                            (Addr)fh, sizeof(ForkHandlerEntry));
 
-   if (VG_(clo_instrument)) {
-      /* check fh is addressible/defined */
-      if (!VGM_(check_writable)( (Addr)fh,
-                                 sizeof(ForkHandlerEntry), NULL)) {
-         VG_(record_pthread_err)( tid, 
-            "fork: prepare/parent/child contains "
-            "unaddressible bytes");
-      }
-   }
-
-   if (n < 0 && n >= VG_N_FORKHANDLERSTACK) {
+   if (n < 0 || n >= VG_N_FORKHANDLERSTACK) {
       SET_EDX(tid, -1);
       return;
    } 
@@ -3046,9 +3100,7 @@
    *fh = vg_fhstack[n];
    SET_EDX(tid, 0);
 
-   if (VG_(clo_instrument)) {
-      VGM_(make_readable)( (Addr)fh, sizeof(ForkHandlerEntry) );
-   }
+   VG_TRACK( post_mem_write, (Addr)fh, sizeof(ForkHandlerEntry) );
 }
 
 
@@ -3063,9 +3115,9 @@
 static
 void do_client_request ( ThreadId tid )
 {
-#  define RETURN_WITH(vvv)                        \
-       { tst->m_edx = (vvv);                      \
-         tst->sh_edx = VGM_WORD_VALID;            \
+#  define RETURN_WITH(vvv)                      \
+       { tst->m_edx = (vvv);                    \
+         tst->sh_edx = VG_(written_shadow_reg); \
        }
 
    ThreadState* tst    = &VG_(threads)[tid];
@@ -3289,7 +3341,7 @@
          break;
 
       case VG_USERREQ__PTHREAD_ERROR:
-         VG_(record_pthread_err)( tid, (Char*)(arg[1]) );
+         VG_(record_pthread_error)( tid, (Char*)(arg[1]) );
          SET_EDX(tid, 0);
          break;
 
@@ -3311,30 +3363,40 @@
                                      (ForkHandlerEntry*)(arg[2]) );
          break;
 
-      case VG_USERREQ__MAKE_NOACCESS:
-      case VG_USERREQ__MAKE_WRITABLE:
-      case VG_USERREQ__MAKE_READABLE:
-      case VG_USERREQ__DISCARD:
-      case VG_USERREQ__CHECK_WRITABLE:
-      case VG_USERREQ__CHECK_READABLE:
-      case VG_USERREQ__MAKE_NOACCESS_STACK:
-      case VG_USERREQ__DO_LEAK_CHECK:
-      case VG_USERREQ__DISCARD_TRANSLATIONS:
-         SET_EDX(
-            tid, 
-            VG_(handle_client_request) ( &VG_(threads)[tid], arg )
-         );
-	 break;
-
       case VG_USERREQ__SIGNAL_RETURNS: 
          handle_signal_return(tid);
 	 break;
 
+      /* Requests from the client program */
+
+      case VG_USERREQ__DISCARD_TRANSLATIONS:
+         if (VG_(clo_verbosity) > 2)
+            VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
+                         " addr %p,  len %d\n",
+                         (void*)arg[1], arg[2] );
+
+         VG_(invalidate_translations)( arg[1], arg[2] );
+
+         SET_EDX( tid, 0 );     /* return value is meaningless */
+	 break;
+
       default:
-         VG_(printf)("panic'd on client request = 0x%x\n", arg[0] );
-         VG_(panic)("do_client_request: "
-                    "unknown request");
-         /*NOTREACHED*/
+         if (VG_(needs).client_requests) {
+            if (VG_(clo_verbosity) > 2)
+               VG_(printf)("client request: code %d,  addr %p,  len %d\n",
+                           arg[0], (void*)arg[1], arg[2] );
+
+            SET_EDX(tid,
+                    SK_(handle_client_request) ( &VG_(threads)[tid], arg )
+            );
+         } else {
+            VG_(printf)("\nError:\n"
+                        "  unhandled client request: 0x%x.  Perhaps\n" 
+                        "  VG_(needs).client_requests should be set?\n",
+                        arg[0]);
+            VG_(panic)("do_client_request: unknown request");
+            /*NOTREACHED*/
+         }
          break;
    }
 
@@ -3392,7 +3454,7 @@
              && stack_used 
                 >= (VG_PTHREAD_STACK_MIN - 1000 /* paranoia */)) {
             VG_(message)(Vg_UserMsg,
-               "Warning: STACK OVERFLOW: "
+               "Error: STACK OVERFLOW: "
                "thread %d: stack used %d, available %d", 
                i, stack_used, VG_PTHREAD_STACK_MIN );
             VG_(message)(Vg_UserMsg,
diff --git a/coregrind/vg_signals.c b/coregrind/vg_signals.c
index f58ec11..f849544 100644
--- a/coregrind/vg_signals.c
+++ b/coregrind/vg_signals.c
@@ -26,12 +26,11 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 
 #include "vg_include.h"
-#include "vg_constants.h"
 #include "vg_unsafe.h"
 #include "valgrind.h"  /* for VALGRIND_MAGIC_SEQUENCE */
 
@@ -598,16 +597,18 @@
    return;
 
   bad_signo:
-   VG_(message)(Vg_UserMsg,
-                "Warning: bad signal number %d in __NR_sigaction.", 
-                signo);
+   if (VG_(needs).core_errors)
+      VG_(message)(Vg_UserMsg,
+                   "Warning: bad signal number %d in __NR_sigaction.", 
+                   signo);
    SET_EAX(tid, -VKI_EINVAL);
    return;
 
   bad_sigkill_or_sigstop:
-   VG_(message)(Vg_UserMsg,
-      "Warning: attempt to set %s handler in __NR_sigaction.", 
-      signo == VKI_SIGKILL ? "SIGKILL" : "SIGSTOP" );
+   if (VG_(needs).core_errors)
+      VG_(message)(Vg_UserMsg,
+         "Warning: attempt to set %s handler in __NR_sigaction.", 
+         signo == VKI_SIGKILL ? "SIGKILL" : "SIGSTOP" );
 
    SET_EAX(tid, -VKI_EINVAL);
    return;
@@ -939,11 +940,19 @@
    esp = esp_top_of_frame;
    esp -= sizeof(VgSigFrame);
    frame = (VgSigFrame*)esp;
+
+   /* For tracking memory events, indicate the entire frame has been
+    * allocated, but pretend that only the first four words are written */
+   VG_TRACK( new_mem_stack_signal, (Addr)frame, sizeof(VgSigFrame) );
+
    /* Assert that the frame is placed correctly. */
    vg_assert( (sizeof(VgSigFrame) & 0x3) == 0 );
    vg_assert( ((Char*)(&frame->magicE)) + sizeof(UInt) 
               == ((Char*)(esp_top_of_frame)) );
 
+   /* retaddr, sigNo, psigInfo, puContext fields are to be written */
+   VG_TRACK( pre_mem_write, Vg_CoreSignal, tst, "signal handler frame", 
+                            (Addr)esp, 16 );
    frame->retaddr    = (UInt)(&VG_(signalreturn_bogusRA));
    frame->sigNo      = sigNo;
    frame->psigInfo   = (Addr)NULL;
@@ -974,14 +983,9 @@
    /* This thread needs to be marked runnable, but we leave that the
       caller to do. */
 
-   /* Make retaddr, sigNo, psigInfo, puContext fields readable -- at
-      0(%ESP) .. 12(%ESP) */
-   if (VG_(clo_instrument)) {
-      VGM_(make_readable) ( ((Addr)esp)+0,  4 );
-      VGM_(make_readable) ( ((Addr)esp)+4,  4 );
-      VGM_(make_readable) ( ((Addr)esp)+8,  4 );
-      VGM_(make_readable) ( ((Addr)esp)+12, 4 );
-   }
+   /* retaddr, sigNo, psigInfo, puContext fields have been written -- 
+      at 0(%ESP) .. 12(%ESP) */
+   VG_TRACK( post_mem_write, (Addr)esp, 16 );
 
    /* 
    VG_(printf)("pushed signal frame; %%ESP now = %p, next %%EBP = %p\n", 
@@ -1021,8 +1025,7 @@
       tst->m_fpu[i] = frame->fpustate[i];
 
    /* Mark the frame structure as nonaccessible. */
-   if (VG_(clo_instrument))
-      VGM_(make_noaccess)( (Addr)frame, sizeof(VgSigFrame) );
+   VG_TRACK( die_mem_stack_signal, (Addr)frame, sizeof(VgSigFrame) );
 
    /* Restore machine state from the saved context. */
    tst->m_eax     = frame->eax;
@@ -1140,9 +1143,7 @@
          sigwait_args = (UInt*)(tst->m_eax);
          if (NULL != (UInt*)(sigwait_args[2])) {
             *(Int*)(sigwait_args[2]) = sigNo;
-            if (VG_(clo_instrument))
-               VGM_(make_readable)( (Addr)(sigwait_args[2]), 
-                                    sizeof(UInt));
+            VG_TRACK( post_mem_write, (Addr)sigwait_args[2], sizeof(UInt));
          }
 	 SET_EDX(tid, 0);
          tst->status = VgTs_Runnable;
@@ -1194,7 +1195,11 @@
             vg_dcss.dcss_sigpending[sigNo] = False;
             vg_dcss.dcss_destthread[sigNo] = VG_INVALID_THREADID;
             continue; /* for (sigNo = 1; ...) loop */
-	 }
+	 } else if (VG_(ksigismember)(&(tst->sig_mask), sigNo)) {
+            /* signal blocked in specific thread, so we can't
+               deliver it just now */
+            continue; /* for (sigNo = 1; ...) loop */
+         }
       } else {
          /* not directed to a specific thread, so search for a
             suitable candidate */
diff --git a/coregrind/vg_startup.S b/coregrind/vg_startup.S
index 63ee590..d6c202e 100644
--- a/coregrind/vg_startup.S
+++ b/coregrind/vg_startup.S
@@ -26,7 +26,7 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.
 
-  The GNU General Public License is contained in the file LICENSE.
+  The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_constants.h"
diff --git a/coregrind/vg_symtab2.c b/coregrind/vg_symtab2.c
index 8330794..728f228 100644
--- a/coregrind/vg_symtab2.c
+++ b/coregrind/vg_symtab2.c
@@ -25,7 +25,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -37,17 +37,12 @@
 /* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
    dlopen()ed libraries, which is something that KDE3 does a lot.
 
-   Stabs reader greatly improved by Nick Nethercode, Apr 02.
-
-   16 May 02: when notified about munmap, return a Bool indicating
-   whether or not the area being munmapped had executable permissions.
-   This is then used to determine whether or not
-   VG_(invalid_translations) should be called for that area.  In order
-   that this work even if --instrument=no, in this case we still keep
-   track of the mapped executable segments, but do not load any debug
-   info or symbols.
+   Stabs reader greatly improved by Nick Nethercote, Apr 02.
 */
 
+/* Set to True when first debug info search is performed */
+Bool VG_(using_debug_info) = False;
+
 /*------------------------------------------------------------*/
 /*--- Structs n stuff                                      ---*/
 /*------------------------------------------------------------*/
@@ -126,23 +121,14 @@
    SegInfo;
 
 
-/* -- debug helper -- */
-static void ppSegInfo ( SegInfo* si )
-{
-   VG_(printf)("name: %s\n"
-               "start %p, size %d, foffset %d\n",
-               si->filename?si->filename : (UChar*)"NULL",
-               si->start, si->size, si->foffset );
-}
-
 static void freeSegInfo ( SegInfo* si )
 {
    vg_assert(si != NULL);
-   if (si->filename) VG_(free)(VG_AR_SYMTAB, si->filename);
-   if (si->symtab) VG_(free)(VG_AR_SYMTAB, si->symtab);
-   if (si->loctab) VG_(free)(VG_AR_SYMTAB, si->loctab);
-   if (si->strtab) VG_(free)(VG_AR_SYMTAB, si->strtab);
-   VG_(free)(VG_AR_SYMTAB, si);
+   if (si->filename) VG_(arena_free)(VG_AR_SYMTAB, si->filename);
+   if (si->symtab)   VG_(arena_free)(VG_AR_SYMTAB, si->symtab);
+   if (si->loctab)   VG_(arena_free)(VG_AR_SYMTAB, si->loctab);
+   if (si->strtab)   VG_(arena_free)(VG_AR_SYMTAB, si->strtab);
+   VG_(arena_free)(VG_AR_SYMTAB, si);
 }
 
 
@@ -151,23 +137,54 @@
 /*------------------------------------------------------------*/
 
 /* Add a str to the string table, including terminating zero, and
-   return offset of the string in vg_strtab. */
+   return offset of the string in vg_strtab.  Unless it's been seen
+   recently, in which case we find the old index and return that.
+   This avoids the most egregious duplications. */
 
 static __inline__
 Int addStr ( SegInfo* si, Char* str )
 {
+#  define EMPTY    0xffffffff
+#  define NN       5
+   
+   /* prevN[0] has the most recent, prevN[NN-1] the least recent */
+   static UInt     prevN[] = { EMPTY, EMPTY, EMPTY, EMPTY, EMPTY };
+   static SegInfo* curr_si = NULL;
+
    Char* new_tab;
    Int   new_sz, i, space_needed;
-   
+
+   /* Avoid gratuitous duplication:  if we saw `str' within the last NN,
+    * within this segment, return that index.  Saves about 200KB in glibc,
+    * extra time taken is too small to measure.  --NJN 2002-Aug-30 */
+   if (curr_si == si) {
+      for (i = NN-1; i >= 0; i--) {
+         if (EMPTY != prevN[i] &&
+             (0 == VG_(strcmp)(str, &si->strtab[prevN[i]]))) {
+            return prevN[i];
+         }
+      }
+   } else {
+      /* New segment */
+      curr_si = si;
+      for (i = 0; i < 5; i++) prevN[i] = EMPTY;
+   }
+   /* Shuffle prevous ones along, put new one in. */
+   for (i = NN-1; i > 0; i--) prevN[i] = prevN[i-1];
+   prevN[0] = si->strtab_used;
+
+#  undef EMPTY
+
    space_needed = 1 + VG_(strlen)(str);
+
    if (si->strtab_used + space_needed > si->strtab_size) {
       new_sz = 2 * si->strtab_size;
       if (new_sz == 0) new_sz = 5000;
-      new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz);
+      new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz);
       if (si->strtab != NULL) {
          for (i = 0; i < si->strtab_used; i++)
             new_tab[i] = si->strtab[i];
-         VG_(free)(VG_AR_SYMTAB, si->strtab);
+         VG_(arena_free)(VG_AR_SYMTAB, si->strtab);
       }
       si->strtab      = new_tab;
       si->strtab_size = new_sz;
@@ -178,6 +195,7 @@
 
    si->strtab_used += space_needed;
    vg_assert(si->strtab_used <= si->strtab_size);
+
    return si->strtab_used - space_needed;
 }
 
@@ -195,11 +213,11 @@
    if (si->symtab_used == si->symtab_size) {
       new_sz = 2 * si->symtab_size;
       if (new_sz == 0) new_sz = 500;
-      new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
+      new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
       if (si->symtab != NULL) {
          for (i = 0; i < si->symtab_used; i++)
             new_tab[i] = si->symtab[i];
-         VG_(free)(VG_AR_SYMTAB, si->symtab);
+         VG_(arena_free)(VG_AR_SYMTAB, si->symtab);
       }
       si->symtab = new_tab;
       si->symtab_size = new_sz;
@@ -224,11 +242,11 @@
    if (si->loctab_used == si->loctab_size) {
       new_sz = 2 * si->loctab_size;
       if (new_sz == 0) new_sz = 500;
-      new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
+      new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
       if (si->loctab != NULL) {
          for (i = 0; i < si->loctab_used; i++)
             new_tab[i] = si->loctab[i];
-         VG_(free)(VG_AR_SYMTAB, si->loctab);
+         VG_(arena_free)(VG_AR_SYMTAB, si->loctab);
       }
       si->loctab = new_tab;
       si->loctab_size = new_sz;
@@ -732,8 +750,7 @@
                      next_addr = (UInt)stab[i+1].n_value;
                      break;
 
-                  /* Boring one: skip, look for something more
-                     useful. */
+                  /* Boring one: skip, look for something more useful. */
                   case N_RSYM: case N_LSYM: case N_LBRAC: case N_RBRAC: 
                   case N_STSYM: case N_LCSYM: case N_GSYM:
                      i++;
@@ -1006,10 +1023,10 @@
       ++ state_machine_regs.last_file_entry;
       name = data;
       if (*fnames == NULL)
-        *fnames = VG_(malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
+        *fnames = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
       else
-        *fnames = VG_(realloc)(
-                     VG_AR_SYMTAB, *fnames, 
+        *fnames = VG_(arena_realloc)(
+                     VG_AR_SYMTAB, *fnames, /*alignment*/4,
                      sizeof(UInt) 
                         * (state_machine_regs.last_file_entry + 1));
       (*fnames)[state_machine_regs.last_file_entry] = addStr (si,name);
@@ -1136,9 +1153,9 @@
 		semantics, we need to malloc the first time. */
 
              if (fnames == NULL)
-               fnames = VG_(malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
+               fnames = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
              else
-               fnames = VG_(realloc)(VG_AR_SYMTAB, fnames, 
+               fnames = VG_(arena_realloc)(VG_AR_SYMTAB, fnames, /*alignment*/4,
                            sizeof(UInt) 
                               * (state_machine_regs.last_file_entry + 1));
              data += VG_(strlen) ((Char *) data) + 1;
@@ -1281,7 +1298,7 @@
              break;
            }
        }
-      VG_(free)(VG_AR_SYMTAB, fnames);
+      VG_(arena_free)(VG_AR_SYMTAB, fnames);
       fnames = NULL;
     }
 }
@@ -1327,7 +1344,7 @@
    }
    n_oimage = stat_buf.st_size;
 
-   fd = VG_(open_read)(si->filename);
+   fd = VG_(open)(si->filename, VKI_O_RDONLY, 0);
    if (fd == -1) {
       vg_symerr("Can't open .so/.exe to read symbols?!");
       return;
@@ -1650,8 +1667,7 @@
 static SegInfo* segInfo = NULL;
 
 
-static
-void read_symtab_callback ( 
+void VG_(read_symtab_callback) ( 
         Addr start, UInt size, 
         Char rr, Char ww, Char xx, 
         UInt foffset, UChar* filename )
@@ -1686,14 +1702,14 @@
    }
 
    /* Get the record initialised right. */
-   si = VG_(malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
+   si = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
    si->next = segInfo;
    segInfo = si;
 
    si->start    = start;
    si->size     = size;
    si->foffset  = foffset;
-   si->filename = VG_(malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
+   si->filename = VG_(arena_malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
    VG_(strcpy)(si->filename, filename);
 
    si->symtab = NULL;
@@ -1704,15 +1720,12 @@
    si->strtab_size = si->strtab_used = 0;
 
    /* Kludge ... */
-   si->offset 
-      = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
+   si->offset = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
 
    /* And actually fill it up. */
-   if (VG_(clo_instrument) || VG_(clo_cachesim)) {
-      vg_read_lib_symbols ( si );
-      canonicaliseSymtab ( si );
-      canonicaliseLoctab ( si );
-   }
+   vg_read_lib_symbols ( si );
+   canonicaliseSymtab ( si );
+   canonicaliseLoctab ( si );
 }
 
 
@@ -1724,57 +1737,29 @@
    libraries as they are dlopen'd.  Conversely, when the client does
    munmap(), vg_symtab_notify_munmap() throws away any symbol tables
    which happen to correspond to the munmap()d area.  */
-void VG_(read_symbols) ( void )
+void VG_(maybe_read_symbols) ( void )
 {
-   VG_(read_procselfmaps) ( read_symtab_callback );
+   if (!VG_(using_debug_info))
+      return;
 
-   /* Do a sanity check on the symbol tables: ensure that the address
-      space pieces they cover do not overlap (otherwise we are severely
-      hosed).  This is a quadratic algorithm, but there shouldn't be
-      many of them.  
-   */
-   { SegInfo *si, *si2;
-     for (si = segInfo; si != NULL; si = si->next) {
-        /* Check no overlap between *si and those in the rest of the
-           list. */
-        for (si2 = si->next; si2 != NULL; si2 = si2->next) {
-           Addr lo = si->start;
-           Addr hi = si->start + si->size - 1;
-           Addr lo2 = si2->start;
-           Addr hi2 = si2->start + si2->size - 1;
-           Bool overlap;
-           vg_assert(lo < hi);
-	   vg_assert(lo2 < hi2);
-           /* the main assertion */
-           overlap = (lo <= lo2 && lo2 <= hi)
-                      || (lo <= hi2 && hi2 <= hi);
-	   if (overlap) {
-              VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
-              ppSegInfo ( si );
-              ppSegInfo ( si2 );
-              VG_(printf)("\n\n"); 
-              vg_assert(! overlap);
-	   }
-        }
-     }
-   }    
+   VGP_PUSHCC(VgpReadSyms);
+      VG_(read_procselfmaps) ( VG_(read_symtab_callback) );
+   VGP_POPCC(VgpReadSyms);
 }
 
-
 /* When an munmap() call happens, check to see whether it corresponds
    to a segment for a .so, and if so discard the relevant SegInfo.
    This might not be a very clever idea from the point of view of
    accuracy of error messages, but we need to do it in order to
    maintain the no-overlapping invariant.
-
-   16 May 02: Returns a Bool indicating whether or not the discarded
-   range falls inside a known executable segment.  See comment at top
-   of file for why.
 */
-Bool VG_(symtab_notify_munmap) ( Addr start, UInt length )
+void VG_(maybe_unload_symbols) ( Addr start, UInt length )
 {
    SegInfo *prev, *curr;
 
+   if (!VG_(using_debug_info))
+      return;
+
    prev = NULL;
    curr = segInfo;
    while (True) {
@@ -1784,7 +1769,7 @@
       curr = curr->next;
    }
    if (curr == NULL) 
-      return False;
+      return;
 
    VG_(message)(Vg_UserMsg, 
                 "discard syms in %s due to munmap()", 
@@ -1799,7 +1784,7 @@
    }
 
    freeSegInfo(curr);
-   return True;
+   return;
 }
 
 
@@ -1808,13 +1793,22 @@
 /*--- plausible-looking stack dumps.                       ---*/
 /*------------------------------------------------------------*/
 
+static __inline__ void ensure_debug_info_inited ( void )
+{
+   if (!VG_(using_debug_info)) {
+      VG_(using_debug_info) = True;
+      VG_(maybe_read_symbols)();
+   }
+}
+
 /* Find a symbol-table index containing the specified pointer, or -1
    if not found.  Binary search.  */
 
-static Int search_one_symtab ( SegInfo* si, Addr ptr )
+static Int search_one_symtab ( SegInfo* si, Addr ptr,
+                               Bool match_anywhere_in_fun )
 {
    Addr a_mid_lo, a_mid_hi;
-   Int  mid, 
+   Int  mid, size, 
         lo = 0, 
         hi = si->symtab_used-1;
    while (True) {
@@ -1822,7 +1816,10 @@
       if (lo > hi) return -1; /* not found */
       mid      = (lo + hi) / 2;
       a_mid_lo = si->symtab[mid].addr;
-      a_mid_hi = ((Addr)si->symtab[mid].addr) + si->symtab[mid].size - 1;
+      size = ( match_anywhere_in_fun
+             ? si->symtab[mid].size
+             : 1);
+      a_mid_hi = ((Addr)si->symtab[mid].addr) + size - 1;
 
       if (ptr < a_mid_lo) { hi = mid-1; continue; } 
       if (ptr > a_mid_hi) { lo = mid+1; continue; }
@@ -1836,21 +1833,29 @@
    *psi to the relevant SegInfo, and *symno to the symtab entry number
    within that.  If not found, *psi is set to NULL.  */
 
-static void search_all_symtabs ( Addr ptr, SegInfo** psi, Int* symno )
+static void search_all_symtabs ( Addr ptr, /*OUT*/SegInfo** psi, 
+                                           /*OUT*/Int* symno,
+                                 Bool match_anywhere_in_fun )
 {
    Int      sno;
    SegInfo* si;
+
+   ensure_debug_info_inited();
+   VGP_PUSHCC(VgpSearchSyms);
+   
    for (si = segInfo; si != NULL; si = si->next) {
       if (si->start <= ptr && ptr < si->start+si->size) {
-         sno = search_one_symtab ( si, ptr );
+         sno = search_one_symtab ( si, ptr, match_anywhere_in_fun );
          if (sno == -1) goto not_found;
          *symno = sno;
          *psi = si;
+         VGP_POPCC(VgpSearchSyms);
          return;
       }
    }
   not_found:
    *psi = NULL;
+   VGP_POPCC(VgpSearchSyms);
 }
 
 
@@ -1882,54 +1887,84 @@
    *psi to the relevant SegInfo, and *locno to the loctab entry number
    within that.  If not found, *psi is set to NULL.
 */
-static void search_all_loctabs ( Addr ptr, SegInfo** psi, Int* locno )
+static void search_all_loctabs ( Addr ptr, /*OUT*/SegInfo** psi,
+                                           /*OUT*/Int* locno )
 {
    Int      lno;
    SegInfo* si;
+
+   VGP_PUSHCC(VgpSearchSyms);
+
+   ensure_debug_info_inited();
    for (si = segInfo; si != NULL; si = si->next) {
       if (si->start <= ptr && ptr < si->start+si->size) {
          lno = search_one_loctab ( si, ptr );
          if (lno == -1) goto not_found;
          *locno = lno;
          *psi = si;
+         VGP_POPCC(VgpSearchSyms);
          return;
       }
    }
   not_found:
    *psi = NULL;
+   VGP_POPCC(VgpSearchSyms);
 }
 
 
 /* The whole point of this whole big deal: map a code address to a
    plausible symbol name.  Returns False if no idea; otherwise True.
-   Caller supplies buf and nbuf.  If no_demangle is True, don't do
+   Caller supplies buf and nbuf.  If demangle is False, don't do
    demangling, regardless of vg_clo_demangle -- probably because the
    call has come from vg_what_fn_or_object_is_this. */
-Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a, 
-                            Char* buf, Int nbuf )
+static
+Bool get_fnname ( Bool demangle, Addr a, Char* buf, Int nbuf,
+                  Bool match_anywhere_in_fun )
 {
    SegInfo* si;
    Int      sno;
-   search_all_symtabs ( a, &si, &sno );
+   search_all_symtabs ( a, &si, &sno, match_anywhere_in_fun );
    if (si == NULL) 
       return False;
-   if (no_demangle) {
+   if (demangle) {
+      VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
+   } else {
       VG_(strncpy_safely) 
          ( buf, & si->strtab[si->symtab[sno].nmoff], nbuf );
-   } else {
-      VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
    }
    return True;
 }
 
+/* This is available to skins... always demangle C++ names */
+Bool VG_(get_fnname) ( Addr a, Char* buf, Int nbuf )
+{
+   return get_fnname ( /*demangle*/True, a, buf, nbuf,
+                       /*match_anywhere_in_fun*/True );
+}
 
-/* Map a code address to the name of a shared object file.  Returns
-   False if no idea; otherwise False.  Caller supplies buf and
-   nbuf. */
-static
-Bool vg_what_object_is_this ( Addr a, Char* buf, Int nbuf )
+/* This is available to skins... always demangle C++ names,
+   only succeed if 'a' matches first instruction of function. */
+Bool VG_(get_fnname_if_entry) ( Addr a, Char* buf, Int nbuf )
+{
+   return get_fnname ( /*demangle*/True, a, buf, nbuf,
+                       /*match_anywhere_in_fun*/False );
+}
+
+/* This is only available to core... don't demangle C++ names */
+Bool VG_(get_fnname_nodemangle) ( Addr a, Char* buf, Int nbuf )
+{
+   return get_fnname ( /*demangle*/False, a, buf, nbuf,
+                       /*match_anywhere_in_fun*/True );
+}
+
+/* Map a code address to the name of a shared object file or the executable.
+   Returns False if no idea; otherwise True.  Doesn't require debug info.
+   Caller supplies buf and nbuf. */
+Bool VG_(get_objname) ( Addr a, Char* buf, Int nbuf )
 {
    SegInfo* si;
+
+   ensure_debug_info_inited();
    for (si = segInfo; si != NULL; si = si->next) {
       if (si->start <= a && a < si->start+si->size) {
          VG_(strncpy_safely)(buf, si->filename, nbuf);
@@ -1939,27 +1974,39 @@
    return False;
 }
 
-/* Return the name of an erring fn in a way which is useful
-   for comparing against the contents of a suppressions file. 
-   Always writes something to buf.  Also, doesn't demangle the
-   name, because we want to refer to mangled names in the 
-   suppressions file.
-*/
-void VG_(what_obj_and_fun_is_this) ( Addr a,
-                                     Char* obj_buf, Int n_obj_buf,
-                                     Char* fun_buf, Int n_fun_buf )
+
+/* Map a code address to a filename.  Returns True if successful.  */
+Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename )
 {
-   (void)vg_what_object_is_this ( a, obj_buf, n_obj_buf );
-   (void)VG_(what_fn_is_this) ( True, a, fun_buf, n_fun_buf );
+   SegInfo* si;
+   Int      locno;
+   search_all_loctabs ( a, &si, &locno );
+   if (si == NULL) 
+      return False;
+   VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff], 
+                       n_filename);
+   return True;
 }
 
+/* Map a code address to a line number.  Returns True if successful. */
+Bool VG_(get_linenum)( Addr a, UInt* lineno )
+{
+   SegInfo* si;
+   Int      locno;
+   search_all_loctabs ( a, &si, &locno );
+   if (si == NULL) 
+      return False;
+   *lineno = si->loctab[locno].lineno;
+
+   return True;
+}
 
 /* Map a code address to a (filename, line number) pair.  
    Returns True if successful.
 */
-Bool VG_(what_line_is_this)( Addr a, 
-                             UChar* filename, Int n_filename, 
-                             UInt* lineno )
+Bool VG_(get_filename_linenum)( Addr a, 
+                                Char* filename, Int n_filename, 
+                                UInt* lineno )
 {
    SegInfo* si;
    Int      locno;
@@ -2001,11 +2048,13 @@
 
    n = 0;
 
-   know_fnname  = VG_(what_fn_is_this)(False,ec->eips[0], buf_fn, M_VG_ERRTXT);
-   know_objname = vg_what_object_is_this(ec->eips[0], buf_obj, M_VG_ERRTXT);
-   know_srcloc  = VG_(what_line_is_this)(ec->eips[0], 
-                                         buf_srcloc, M_VG_ERRTXT, 
-                                         &lineno);
+   // SSS: factor this repeated code out!
+
+   know_fnname  = VG_(get_fnname) (ec->eips[0], buf_fn,  M_VG_ERRTXT);
+   know_objname = VG_(get_objname)(ec->eips[0], buf_obj, M_VG_ERRTXT);
+   know_srcloc  = VG_(get_filename_linenum)(ec->eips[0], 
+                                            buf_srcloc, M_VG_ERRTXT, 
+                                            &lineno);
 
    APPEND("   at ");
    VG_(sprintf)(ibuf,"0x%x: ", ec->eips[0]);
@@ -2035,11 +2084,11 @@
    VG_(message)(Vg_UserMsg, "%s", buf);
 
    for (i = 1; i < stop_at && ec->eips[i] != 0; i++) {
-      know_fnname  = VG_(what_fn_is_this)(False,ec->eips[i], buf_fn, M_VG_ERRTXT);
-      know_objname = vg_what_object_is_this(ec->eips[i],buf_obj, M_VG_ERRTXT);
-      know_srcloc  = VG_(what_line_is_this)(ec->eips[i], 
-                                          buf_srcloc, M_VG_ERRTXT, 
-                                          &lineno);
+      know_fnname  = VG_(get_fnname) (ec->eips[i], buf_fn,  M_VG_ERRTXT);
+      know_objname = VG_(get_objname)(ec->eips[i], buf_obj, M_VG_ERRTXT);
+      know_srcloc  = VG_(get_filename_linenum)(ec->eips[i], 
+                                               buf_srcloc, M_VG_ERRTXT, 
+                                               &lineno);
       n = 0;
       APPEND("   by ");
       VG_(sprintf)(ibuf,"0x%x: ",ec->eips[i]);
diff --git a/coregrind/vg_syscall.S b/coregrind/vg_syscall.S
index adabbed..52d6091 100644
--- a/coregrind/vg_syscall.S
+++ b/coregrind/vg_syscall.S
@@ -26,7 +26,7 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.
 
-  The GNU General Public License is contained in the file LICENSE.
+  The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_constants.h"
diff --git a/coregrind/vg_syscalls.c b/coregrind/vg_syscalls.c
new file mode 100644
index 0000000..a500deb
--- /dev/null
+++ b/coregrind/vg_syscalls.c
@@ -0,0 +1,3164 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Update the byte permission maps following a system call.     ---*/
+/*---                                             vg_syscall_mem.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_include.h"
+
+/* vg_unsafe.h should NOT be included into any file except this
+   one. */
+#include "vg_unsafe.h"
+
+
+/* All system calls are channelled through here, doing two things:
+
+   * notify the skin of the memory events (reads, writes) happening
+
+   * perform the syscall, usually by passing it along to the kernel
+     unmodified.  However, because we simulate signals ourselves,
+     signal-related syscalls are routed to vg_signal.c, and are not
+     delivered to the kernel.
+
+   A magical piece of assembly code, vg_do_syscall(), in vg_syscall.S
+   does the tricky bit of passing a syscall to the kernel, whilst
+   having the simulator retain control.
+*/
+
+#define SYSCALL_TRACK(fn, args...)  VG_TRACK(fn, Vg_CoreSysCall, ## args)
+
+#define MAYBE_PRINTF(format, args...)  \
+   if (VG_(clo_trace_syscalls))        \
+      VG_(printf)(format, ## args)
+
+/* ---------------------------------------------------------------------
+   Doing mmap, munmap, mremap, mprotect
+   ------------------------------------------------------------------ */
+
+// Nb: this isn't done as precisely as possible, but it seems that programs
+// are usually sufficiently well-behaved that the more obscure corner cases
+// aren't important.  Various comments in the few functions below give more
+// details... njn 2002-Sep-17
+
+/* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
+   munmap, mprotect (and mremap??) work at the page level.  So addresses
+   and lengths must be adjusted for this. */
+
+/* Mash around start and length so that the area exactly covers
+   an integral number of pages.  If we don't do that, memcheck's
+   idea of addressible memory diverges from that of the
+   kernel's, which causes the leak detector to crash. */
+static 
+void mash_addr_and_len( Addr* a, UInt* len)
+{
+   while (( *a         % VKI_BYTES_PER_PAGE) > 0) { (*a)--; (*len)++; }
+   while (((*a + *len) % VKI_BYTES_PER_PAGE) > 0) {         (*len)++; }
+}
+
+static
+void mmap_segment ( Addr a, UInt len, UInt prot, Int fd )
+{
+   Bool nn, rr, ww, xx;
+
+   /* Records segment, reads debug symbols if necessary */
+   if (prot & PROT_EXEC && fd != -1)
+      VG_(new_exe_segment) ( a, len );
+
+   nn = prot & PROT_NONE;
+   rr = prot & PROT_READ;
+   ww = prot & PROT_WRITE;
+   xx = prot & PROT_EXEC;
+
+   VG_TRACK( new_mem_mmap, a, len, nn, rr, ww, xx );
+}
+
+static
+void munmap_segment ( Addr a, UInt len )
+{
+   /* Addr orig_a   = a;
+      Addr orig_len = len; */
+
+   mash_addr_and_len(&a, &len);
+   /*
+   VG_(printf)("MUNMAP: correct (%p for %d) to (%p for %d) %s\n", 
+      orig_a, orig_len, a, len, (orig_a!=start || orig_len!=length) 
+                                    ? "CHANGE" : "");
+   */
+
+   /* Invalidate translations as necessary (also discarding any basic
+      block-specific info retained by the skin) and unload any debug
+      symbols. */
+   // This doesn't handle partial unmapping of exe segs correctly, if that
+   // ever happens...
+   VG_(remove_if_exe_segment) ( a, len );
+
+   VG_TRACK( die_mem_munmap, a, len );
+}
+
+static 
+void mprotect_segment ( Addr a, UInt len, Int prot )
+{
+   Bool nn, rr, ww, xx;
+   nn = prot & PROT_NONE;
+   rr = prot & PROT_READ;
+   ww = prot & PROT_WRITE;
+   xx = prot & PROT_EXEC;
+
+   // if removing exe permission, should check and remove from exe_seg list
+   // if adding, should check and add to exe_seg list
+   // easier to ignore both cases -- both v. unlikely?
+   mash_addr_and_len(&a, &len);
+   VG_TRACK( change_mem_mprotect, a, len, nn, rr, ww, xx );
+}
+
+static 
+void mremap_segment ( old_addr, old_size, new_addr, new_size )
+{
+   /* If the block moves, assume new and old blocks can't overlap; seems to
+    * be valid judging from Linux kernel code in mm/mremap.c */
+   vg_assert(old_addr == new_addr         ||
+             old_addr+old_size < new_addr ||
+             new_addr+new_size < old_addr);
+
+   if (new_size < old_size) {
+      // if exe_seg
+      //    unmap old symbols from old_addr+new_size..old_addr+new_size
+      //    update exe_seg size = new_size
+      //    update exe_seg addr = new_addr...
+      VG_TRACK( copy_mem_remap, old_addr, new_addr, new_size );
+      VG_TRACK( die_mem_munmap, old_addr+new_size, old_size-new_size );
+
+   } else {
+      // if exe_seg
+      //    map new symbols from new_addr+old_size..new_addr+new_size
+      //    update exe_seg size = new_size
+      //    update exe_seg addr = new_addr...
+      VG_TRACK( copy_mem_remap, old_addr, new_addr, old_size );
+      // what should the permissions on the new extended part be??
+      // using 'rwx'
+      VG_TRACK( new_mem_mmap,   new_addr+old_size, new_size-old_size,
+                                False, True, True, True );
+   }
+}
+
+
+/* Is this a Linux kernel error return value? */
+/* From:
+   http://sources.redhat.com/cgi-bin/cvsweb.cgi/libc/sysdeps/unix/sysv/
+   linux/i386/sysdep.h?
+   rev=1.28&content-type=text/x-cvsweb-markup&cvsroot=glibc
+
+   \begin{quote}:
+
+   Linux uses a negative return value to indicate syscall errors,
+   unlike most Unices, which use the condition codes' carry flag.
+
+   Since version 2.1 the return value of a system call might be
+   negative even if the call succeeded.  E.g., the `lseek' system call
+   might return a large offset.  Therefore we must not anymore test
+   for < 0, but test for a real error by making sure the value in %eax
+   is a real error number.  Linus said he will make sure the no syscall
+   returns a value in -1 .. -4095 as a valid result so we can savely
+   test with -4095.  
+
+   END QUOTE
+*/
+Bool VG_(is_kerror) ( Int res )
+{
+   if (res >= -4095 && res <= -1)
+      return True;
+   else
+      return False;
+}
+
+static
+UInt get_shm_size ( Int shmid )
+{
+   struct shmid_ds buf;
+   long __res;
+    __asm__ volatile ( "int $0x80"
+                       : "=a" (__res)
+                       : "0" (__NR_ipc),
+                         "b" ((long)(24) /*IPCOP_shmctl*/),
+                         "c" ((long)(shmid)),
+                         "d" ((long)(IPC_STAT)),
+                         "S" ((long)(0)),
+                         "D" ((long)(&buf)) );
+    if ( VG_(is_kerror) ( __res ) )
+       return 0;
+ 
+   return buf.shm_segsz;
+}
+ 
+static
+Char *strdupcat ( const Char *s1, const Char *s2, ArenaId aid )
+{
+   UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
+   Char *result = VG_(arena_malloc) ( aid, len );
+   VG_(strcpy) ( result, s1 );
+   VG_(strcat) ( result, s2 );
+   return result;
+}
+
+static 
+void pre_mem_read_sendmsg ( ThreadState* tst, 
+                            Char *msg, UInt base, UInt size )
+{
+   Char *outmsg = strdupcat ( "socketcall.sendmsg", msg, VG_AR_TRANSIENT );
+   SYSCALL_TRACK( pre_mem_read, tst, outmsg, base, size );
+
+   VG_(arena_free) ( VG_AR_TRANSIENT, outmsg );
+}
+
+static 
+void pre_mem_write_recvmsg ( ThreadState* tst, 
+                             Char *msg, UInt base, UInt size )
+{
+   Char *outmsg = strdupcat ( "socketcall.recvmsg", msg, VG_AR_TRANSIENT );
+   SYSCALL_TRACK( pre_mem_write, tst, outmsg, base, size );
+   VG_(arena_free) ( VG_AR_TRANSIENT, outmsg );
+}
+
+static
+void post_mem_write_recvmsg ( ThreadState* tst,
+                              Char *fieldName, UInt base, UInt size )
+{
+   VG_TRACK( post_mem_write, base, size );
+}
+ 
+static
+void msghdr_foreachfield ( 
+        ThreadState* tst, 
+        struct msghdr *msg, 
+        void (*foreach_func)( ThreadState*, Char *, UInt, UInt ) 
+     )
+{
+   if ( !msg )
+      return;
+
+   foreach_func ( tst, "(msg)", (Addr)msg, sizeof( struct msghdr ) );
+
+   if ( msg->msg_name )
+      foreach_func ( tst, 
+                     "(msg.msg_name)", 
+                     (Addr)msg->msg_name, msg->msg_namelen );
+
+   if ( msg->msg_iov ) {
+      struct iovec *iov = msg->msg_iov;
+      UInt i;
+
+      foreach_func ( tst, 
+                     "(msg.msg_iov)", 
+                     (Addr)iov, msg->msg_iovlen * sizeof( struct iovec ) );
+
+      for ( i = 0; i < msg->msg_iovlen; ++i, ++iov )
+         foreach_func ( tst, 
+                        "(msg.msg_iov[i]", 
+                        (Addr)iov->iov_base, iov->iov_len );
+   }
+
+   if ( msg->msg_control )
+      foreach_func ( tst, 
+                     "(msg.msg_control)", 
+                     (Addr)msg->msg_control, msg->msg_controllen );
+}
+
+static
+void pre_mem_read_sockaddr ( ThreadState* tst,
+                                 Char *description,
+                                 struct sockaddr *sa, UInt salen )
+{
+   Char *outmsg = VG_(arena_malloc) ( VG_AR_TRANSIENT, 
+                                      strlen( description ) + 30 );
+
+   VG_(sprintf) ( outmsg, description, ".sa_family" );
+   SYSCALL_TRACK( pre_mem_read, tst, outmsg, (UInt) &sa->sa_family, sizeof (sa_family_t));
+               
+   switch (sa->sa_family) {
+                  
+      case AF_UNIX:
+         VG_(sprintf) ( outmsg, description, ".sun_path" );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, outmsg,
+            (UInt) ((struct sockaddr_un *) sa)->sun_path);
+         break;
+                     
+      case AF_INET:
+         VG_(sprintf) ( outmsg, description, ".sin_port" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in *) sa)->sin_port,
+            sizeof (((struct sockaddr_in *) sa)->sin_port));
+         VG_(sprintf) ( outmsg, description, ".sin_addr" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in *) sa)->sin_addr,
+            sizeof (struct in_addr));
+         break;
+                           
+      case AF_INET6:
+         VG_(sprintf) ( outmsg, description, ".sin6_port" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in6 *) sa)->sin6_port,
+            sizeof (((struct sockaddr_in6 *) sa)->sin6_port));
+         VG_(sprintf) ( outmsg, description, ".sin6_flowinfo" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in6 *) sa)->sin6_flowinfo,
+            sizeof (uint32_t));
+         VG_(sprintf) ( outmsg, description, ".sin6_addr" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in6 *) sa)->sin6_addr,
+            sizeof (struct in6_addr));
+#        ifndef GLIBC_2_1
+         VG_(sprintf) ( outmsg, description, ".sin6_scope_id" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in6 *) sa)->sin6_scope_id,
+            sizeof (uint32_t));
+#        endif
+         break;
+               
+      default:
+         VG_(sprintf) ( outmsg, description, "" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg, (UInt) sa, salen );
+         break;
+   }
+   
+   VG_(arena_free) ( VG_AR_TRANSIENT, outmsg );
+}
+
+/* Dereference a pointer to a UInt. */
+static UInt deref_UInt ( ThreadState* tst, Addr a, Char* s )
+{
+   UInt* a_p = (UInt*)a;
+   SYSCALL_TRACK( pre_mem_read, tst, s, (Addr)a_p, sizeof(UInt) );
+   if (a_p == NULL)
+      return 0;
+   else
+      return *a_p;
+}
+
+/* Dereference a pointer to a pointer. */
+static Addr deref_Addr ( ThreadState* tst, Addr a, Char* s )
+{
+   Addr* a_p = (Addr*)a;
+   SYSCALL_TRACK( pre_mem_read, tst, s, (Addr)a_p, sizeof(Addr) );
+   return *a_p;
+}
+
+static 
+void buf_and_len_pre_check( ThreadState* tst, Addr buf_p, Addr buflen_p,
+                            Char* buf_s, Char* buflen_s )
+{
+   if (VG_(track_events).pre_mem_write) {
+      UInt buflen_in = deref_UInt( tst, buflen_p, buflen_s);
+      if (buflen_in > 0) {
+         VG_(track_events).pre_mem_write ( Vg_CoreSysCall,
+                                           tst, buf_s, buf_p, buflen_in );
+      }
+   }
+}
+
+static 
+void buf_and_len_post_check( ThreadState* tst, Int res,
+                             Addr buf_p, Addr buflen_p, Char* s )
+{
+   if (!VG_(is_kerror)(res) && VG_(track_events).post_mem_write) {
+      UInt buflen_out = deref_UInt( tst, buflen_p, s);
+      if (buflen_out > 0 && buf_p != (Addr)NULL) {
+         VG_(track_events).post_mem_write ( buf_p, buflen_out );
+      }
+   }
+}
+
+/* ---------------------------------------------------------------------
+   Data seg end, for brk()
+   ------------------------------------------------------------------ */
+
+/* Records the current end of the data segment so we can make sense of
+   calls to brk(). */
+Addr curr_dataseg_end;
+
+void VG_(init_dataseg_end_for_brk) ( void )
+{
+   curr_dataseg_end = (Addr)VG_(brk)(0);
+   if (curr_dataseg_end == (Addr)(-1))
+      VG_(panic)("can't determine data-seg end for brk()");
+   if (0)
+      VG_(printf)("DS END is %p\n", (void*)curr_dataseg_end);
+}
+
+/* ---------------------------------------------------------------------
+   The Main Entertainment ...
+   ------------------------------------------------------------------ */
+
+void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid )
+{
+   ThreadState* tst;
+   UInt         syscallno, arg1, arg2, arg3, arg4, arg5;
+   /* Do not make this unsigned! */
+   Int res;
+   void* pre_res = 0;   /* shut gcc up */
+
+   VGP_PUSHCC(VgpCoreSysWrap);
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   tst              = & VG_(threads)[tid];
+   syscallno        = tst->m_eax;
+   arg1             = tst->m_ebx;
+   arg2             = tst->m_ecx;
+   arg3             = tst->m_edx;
+   arg4             = tst->m_esi;
+   arg5             = tst->m_edi;
+
+   /* Do any pre-syscall actions */
+   if (VG_(needs).syscall_wrapper) {
+      VGP_PUSHCC(VgpSkinSysWrap);
+      pre_res = SK_(pre_syscall)(tid, syscallno, /*isBlocking*/False);
+      VGP_POPCC(VgpSkinSysWrap);
+   }
+
+   /* the syscall no is in %eax.  For syscalls with <= 5 args,
+      args 1 .. 5 to the syscall are in %ebx %ecx %edx %esi %edi.
+      For calls with > 5 args, %ebx points to a lump of memory
+      containing the args.
+
+      The result is returned in %eax.  If this value >= 0, the call
+      succeeded, and this is the return value.  If < 0, it failed, and
+      the negation of this value is errno.  To be more specific, 
+      if res is in the range -EMEDIUMTYPE (-124) .. -EPERM (-1)
+      (kernel 2.4.9 sources, include/asm-i386/errno.h)
+      then it indicates an error.  Otherwise it doesn't.
+
+      Dirk Mueller (mueller@kde.org) says that values -4095 .. -1
+      (inclusive?) indicate error returns.  Not sure where the -4095
+      comes from.
+   */
+
+   MAYBE_PRINTF("SYSCALL[%d,%d](%3d): ", 
+                  VG_(getpid)(), tid, syscallno);
+
+   switch (syscallno) {
+
+      case __NR_exit:
+         VG_(panic)("syscall exit() not caught by the scheduler?!");
+         break;
+
+      case __NR_clone:
+         VG_(unimplemented)
+            ("clone(): not supported by Valgrind.\n   "
+             "We do now support programs linked against\n   "
+             "libpthread.so, though.  Re-run with -v and ensure that\n   "
+             "you are picking up Valgrind's implementation of libpthread.so.");
+         break;
+
+#     if defined(__NR_modify_ldt)
+      case __NR_modify_ldt:
+         VG_(nvidia_moan)();
+         VG_(unimplemented)
+            ("modify_ldt(): I (JRS) haven't investigated this yet; sorry.");
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls !!!!!!!!!!!!!!!!!!!!! */
+
+#     if defined(__NR_vhangup)
+      case __NR_vhangup: /* syscall 111 */
+         /* int vhangup(void); */
+         MAYBE_PRINTF("vhangup()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_iopl)
+      case __NR_iopl: /* syscall 110 */
+         /* int iopl(int level); */
+         MAYBE_PRINTF("iopl ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_getxattr)
+      case __NR_getxattr: /* syscall 229 */
+         /* ssize_t getxattr (const char *path, const char* name,
+                              void* value, size_t size); */
+         MAYBE_PRINTF("getxattr ( %p, %p, %p, %d )\n", 
+                        arg1,arg2,arg3, arg4);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "getxattr(path)", arg1 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "getxattr(name)", arg2 );
+         SYSCALL_TRACK( pre_mem_write, tst, "getxattr(value)", arg3, arg4 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0 
+                                  && arg3 != (Addr)NULL) {
+            VG_TRACK( post_mem_write, arg3, res );
+         }
+         break;
+#     endif
+      
+#     if defined(__NR_quotactl)
+      case __NR_quotactl: /* syscall 131 */
+         /* int quotactl(int cmd, char *special, int uid, caddr_t addr); */
+         MAYBE_PRINTF("quotactl (0x%x, %p, 0x%x, 0x%x )\n", 
+                        arg1,arg2,arg3, arg4);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "quotactl(special)", arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_truncate64)
+      case __NR_truncate64: /* syscall 193 */
+         /* int truncate64(const char *path, off64_t length); */
+         MAYBE_PRINTF("truncate64 ( %p, %lld )\n",
+                        arg1, ((ULong)arg2) | (((ULong) arg3) << 32));
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "truncate64(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_fdatasync)
+      case __NR_fdatasync: /* syscall 148 */
+         /* int fdatasync(int fd); */
+         MAYBE_PRINTF("fdatasync ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_msync) /* syscall 144 */
+      case __NR_msync:
+         /* int msync(const void *start, size_t length, int flags); */
+         MAYBE_PRINTF("msync ( %p, %d, %d )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_read, tst, "msync(start)", arg1, arg2 );
+         KERNEL_DO_SYSCALL(tid,res);  
+         break;
+#     endif
+
+#     if defined(__NR_getpmsg) /* syscall 188 */
+      case __NR_getpmsg: 
+      {
+      /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
+      /* int getpmsg(int fd, struct strbuf *ctrl, struct strbuf *data, 
+                             int *bandp, int *flagsp); */
+      struct strbuf {
+         int     maxlen;         /* no. of bytes in buffer */
+         int     len;            /* no. of bytes returned */
+         caddr_t buf;            /* pointer to data */
+      };
+      struct strbuf *ctrl;
+      struct strbuf *data;
+      MAYBE_PRINTF("getpmsg ( %d, %p, %p, %p, %p )\n",
+                      arg1,arg2,arg3,arg4,arg5);
+      ctrl = (struct strbuf *)arg2;
+      data = (struct strbuf *)arg3;
+      if (ctrl && ctrl->maxlen > 0)
+          SYSCALL_TRACK( pre_mem_write,tst, "getpmsg(ctrl)", 
+                                (UInt)ctrl->buf, ctrl->maxlen);
+      if (data && data->maxlen > 0)
+          SYSCALL_TRACK( pre_mem_write,tst, "getpmsg(data)", 
+                                 (UInt)data->buf, data->maxlen);
+      if (arg4)
+          SYSCALL_TRACK( pre_mem_write,tst, "getpmsg(bandp)", 
+                                (UInt)arg4, sizeof(int));
+      if (arg5)
+          SYSCALL_TRACK( pre_mem_write,tst, "getpmsg(flagsp)", 
+                                (UInt)arg5, sizeof(int));
+      KERNEL_DO_SYSCALL(tid,res);
+      if (!VG_(is_kerror)(res) && res == 0 && ctrl && ctrl->len > 0) {
+         VG_TRACK( post_mem_write, (UInt)ctrl->buf, ctrl->len);
+      }
+      if (!VG_(is_kerror)(res) && res == 0 && data && data->len > 0) {
+         VG_TRACK( post_mem_write, (UInt)data->buf, data->len);
+      }
+      }
+      break;
+#     endif
+
+
+#     if defined(__NR_putpmsg) /* syscall 189 */
+      case __NR_putpmsg: 
+      {
+      /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
+      /* int putpmsg(int fd, struct strbuf *ctrl, struct strbuf *data, 
+                             int band, int flags); */
+      struct strbuf {
+         int     maxlen;         /* no. of bytes in buffer */
+         int     len;            /* no. of bytes returned */
+         caddr_t buf;            /* pointer to data */
+      };
+      struct strbuf *ctrl;
+      struct strbuf *data;
+      MAYBE_PRINTF("putpmsg ( %d, %p, %p, %d, %d )\n",
+                     arg1,arg2,arg3,arg4,arg5);
+      ctrl = (struct strbuf *)arg2;
+      data = (struct strbuf *)arg3;
+      if (ctrl && ctrl->len > 0)
+          SYSCALL_TRACK( pre_mem_read,tst, "putpmsg(ctrl)",
+                                (UInt)ctrl->buf, ctrl->len);
+      if (data && data->len > 0)
+          SYSCALL_TRACK( pre_mem_read,tst, "putpmsg(data)",
+                                (UInt)data->buf, data->len);
+      KERNEL_DO_SYSCALL(tid,res);
+      }
+      break;
+#     endif
+
+      case __NR_getitimer: /* syscall 105 */
+         /* int getitimer(int which, struct itimerval *value); */
+         MAYBE_PRINTF("getitimer ( %d, %p )\n", arg1, arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "getitimer(timer)", arg2, 
+                           sizeof(struct itimerval) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg2 != (Addr)NULL) {
+            VG_TRACK( post_mem_write,arg2, sizeof(struct itimerval));
+         }
+         break;
+
+#     if defined(__NR_syslog)
+      case __NR_syslog: /* syscall 103 */
+         /* int syslog(int type, char *bufp, int len); */
+         MAYBE_PRINTF("syslog (%d, %p, %d)\n",arg1,arg2,arg3);
+         switch(arg1) {
+            case 2: case 3: case 4:
+               SYSCALL_TRACK( pre_mem_write, tst, "syslog(buf)", arg2, arg3);
+	       break;
+            default: 
+               break;
+         }
+         KERNEL_DO_SYSCALL(tid, res);
+         if (!VG_(is_kerror)(res)) {
+            switch (arg1) {
+               case 2: case 3: case 4:
+                  VG_TRACK( post_mem_write, arg2, arg3 );
+                  break;
+               default:
+                  break;
+            }
+         }
+         break;
+#     endif
+
+      case __NR_personality: /* syscall 136 */
+         /* int personality(unsigned long persona); */
+         MAYBE_PRINTF("personality ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_chroot: /* syscall 61 */
+         /* int chroot(const char *path); */
+         MAYBE_PRINTF("chroot ( %p )\n", arg1);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "chroot(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_madvise)
+      case __NR_madvise: /* syscall 219 */
+         /* int madvise(void *start, size_t length, int advice ); */
+         MAYBE_PRINTF("madvise ( %p, %d, %d )\n", arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_mremap)
+      /* Treating it like an munmap() followed by a mmap() */
+      case __NR_mremap: /* syscall 163 */
+         /* void* mremap(void * old_address, size_t old_size, 
+                         size_t new_size, unsigned long flags); */
+         MAYBE_PRINTF("mremap ( %p, %d, %d, 0x%x )\n", 
+                        arg1, arg2, arg3, arg4);
+         SYSCALL_TRACK( pre_mem_write, tst, "mremap(old_address)", arg1, arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            mremap_segment( arg1, arg2, (Addr)res, arg3 );
+         }
+         break;         
+#     endif
+
+      case __NR_nice: /* syscall 34 */
+         /* int nice(int inc); */
+         MAYBE_PRINTF("nice ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      /* !!!!!!!!!! New, untested syscalls, 14 Mar 02 !!!!!!!!!! */
+
+#     if defined(__NR_setresgid32)
+      case __NR_setresgid32: /* syscall 210 */
+         /* int setresgid(gid_t rgid, gid_t egid, gid_t sgid); */
+         MAYBE_PRINTF("setresgid32 ( %d, %d, %d )\n", arg1, arg2, arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setfsuid32)
+      case __NR_setfsuid32: /* syscall 215 */
+         /* int setfsuid(uid_t fsuid); */
+          MAYBE_PRINTF("setfsuid ( %d )\n", arg1);
+          KERNEL_DO_SYSCALL(tid,res);
+          break;
+#     endif
+
+#     if defined(__NR__sysctl)
+      case __NR__sysctl:
+      /* int _sysctl(struct __sysctl_args *args); */
+         MAYBE_PRINTF("_sysctl ( %p )\n", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "_sysctl(args)", arg1, 
+                            sizeof(struct __sysctl_args) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg1, sizeof(struct __sysctl_args) );
+         break;
+#     endif
+
+#     if defined(__NR_sched_getscheduler)
+      case __NR_sched_getscheduler:
+         /* int sched_getscheduler(pid_t pid); */
+         MAYBE_PRINTF("sched_getscheduler ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_sched_setscheduler)
+      case __NR_sched_setscheduler:
+         /* int sched_setscheduler(pid_t pid, int policy, 
+                const struct sched_param *p); */
+         MAYBE_PRINTF("sched_setscheduler ( %d, %d, %p )\n",arg1,arg2,arg3);
+         if (arg3 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_read, tst,
+                              "sched_setscheduler(struct sched_param *p)", 
+                              arg3, sizeof(struct sched_param));
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_mlock)
+      case __NR_mlock:
+         /* int mlock(const void * addr, size_t len) */
+         MAYBE_PRINTF("mlock ( %p, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_mlockall)
+      case __NR_mlockall:
+         /* int mlockall(int flags); */
+         MAYBE_PRINTF("mlockall ( %x )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_munlockall)
+      case __NR_munlockall:
+         /* int munlockall(void); */
+         MAYBE_PRINTF("munlockall ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#if   defined(__NR_sched_get_priority_max)
+      case __NR_sched_get_priority_max:
+         /* int sched_get_priority_max(int policy); */
+         MAYBE_PRINTF("sched_get_priority_max ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#if   defined(__NR_sched_get_priority_min)
+      case __NR_sched_get_priority_min: /* syscall 160 */
+         /* int sched_get_priority_min(int policy); */
+         MAYBE_PRINTF("sched_get_priority_min ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#if   defined(__NR_setpriority)
+      case __NR_setpriority: /* syscall 97 */
+         /* int setpriority(int which, int who, int prio); */
+         MAYBE_PRINTF("setpriority ( %d, %d, %d )\n", arg1, arg2, arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#if   defined(__NR_getpriority)
+      case __NR_getpriority: /* syscall 96 */
+         /* int getpriority(int which, int who); */
+         MAYBE_PRINTF("getpriority ( %d, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setfsgid)
+      case __NR_setfsgid: /* syscall 139 */
+         /* int setfsgid(gid_t gid); */
+         MAYBE_PRINTF("setfsgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setregid)
+      case __NR_setregid: /* syscall 71 */
+         /* int setregid(gid_t rgid, gid_t egid); */
+         MAYBE_PRINTF("setregid ( %d, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setresuid)
+      case __NR_setresuid: /* syscall 164 */
+         /* int setresuid(uid_t ruid, uid_t euid, uid_t suid); */
+         MAYBE_PRINTF("setresuid ( %d, %d, %d )\n", arg1, arg2, arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setfsuid)
+      case __NR_setfsuid: /* syscall 138 */
+         /* int setfsuid(uid_t uid); */
+         MAYBE_PRINTF("setfsuid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 8 Mar 02 !!!!!!!!!!! */
+
+#     if defined(__NR_sendfile)
+      case __NR_sendfile: /* syscall 187 */
+         /* ssize_t sendfile(int out_fd, int in_fd, off_t *offset, 
+                             size_t count) */
+         MAYBE_PRINTF("sendfile ( %d, %d, %p, %d )\n",arg1,arg2,arg3,arg4);
+         if (arg3 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "sendfile(offset)", arg3, sizeof(off_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg3 != (UInt)NULL) {
+            VG_TRACK( post_mem_write, arg3, sizeof( off_t ) );
+         }
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 7 Mar 02 !!!!!!!!!!! */
+
+#     if defined(__NR_pwrite)
+      case __NR_pwrite: /* syscall 181 */
+         /* ssize_t pwrite (int fd, const void *buf, size_t nbytes,
+                            off_t offset); */
+         MAYBE_PRINTF("pwrite ( %d, %p, %d, %d )\n", arg1, arg2, arg3, arg4);
+         SYSCALL_TRACK( pre_mem_read, tst, "pwrite(buf)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 6 Mar 02 !!!!!!!!!!! */
+
+      case __NR_sync: /* syscall 36 */
+         /* int sync(); */
+         MAYBE_PRINTF("sync ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break; 
+ 
+      case __NR_fstatfs: /* syscall 100 */
+         /* int fstatfs(int fd, struct statfs *buf); */
+         MAYBE_PRINTF("fstatfs ( %d, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "stat(buf)", arg2, sizeof(struct statfs) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct statfs) );
+         break;
+
+      /* !!!!!!!!!! New, untested syscalls, 4 Mar 02 !!!!!!!!!!! */
+
+      case __NR_pause: /* syscall 29 */
+         /* int pause(void); */
+         MAYBE_PRINTF("pause ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_getsid: /* syscall 147 */
+         /* pid_t getsid(pid_t pid); */
+         MAYBE_PRINTF("getsid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_pread)
+      case __NR_pread: /* syscall 180 */
+         /* ssize_t pread(int fd, void *buf, size_t count, off_t offset); */
+         MAYBE_PRINTF("pread ( %d, %p, %d, %d ) ...\n",arg1,arg2,arg3,arg4);
+         SYSCALL_TRACK( pre_mem_write, tst, "pread(buf)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         MAYBE_PRINTF("SYSCALL[%d]       pread ( %d, %p, %d, %d ) --> %d\n",
+                        VG_(getpid)(),
+                        arg1, arg2, arg3, arg4, res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            VG_TRACK( post_mem_write, arg2, res );
+         }
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 27 Feb 02 !!!!!!!!!! */
+
+      case __NR_mknod: /* syscall 14 */
+         /* int mknod(const char *pathname, mode_t mode, dev_t dev); */
+         MAYBE_PRINTF("mknod ( %p, 0x%x, 0x%x )\n", arg1, arg2, arg3 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "mknod(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_flock: /* syscall 143 */
+         /* int flock(int fd, int operation); */
+         MAYBE_PRINTF("flock ( %d, %d )\n", arg1, arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_rt_sigsuspend)
+      /* Viewed with great suspicion by me, but, hey, let's do it
+         anyway ... */
+      case __NR_rt_sigsuspend: /* syscall 179 */
+         /* int sigsuspend(const sigset_t *mask); */
+         MAYBE_PRINTF("sigsuspend ( %p )\n", arg1 );
+         if (arg1 != (Addr)NULL) {
+            /* above NULL test is paranoia */
+            SYSCALL_TRACK( pre_mem_read, tst, "sigsuspend(mask)", arg1, 
+                              sizeof(vki_ksigset_t) );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_init_module: /* syscall 128 */
+         /* int init_module(const char *name, struct module *image); */
+         MAYBE_PRINTF("init_module ( %p, %p )\n", arg1, arg2 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "init_module(name)", arg1 );
+         SYSCALL_TRACK( pre_mem_read, tst, "init_module(image)", arg2, 
+                           VKI_SIZEOF_STRUCT_MODULE );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_ioperm: /* syscall 101 */
+         /* int ioperm(unsigned long from, unsigned long num, int turn_on); */
+         MAYBE_PRINTF("ioperm ( %d, %d, %d )\n", arg1, arg2, arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_capget: /* syscall 184 */
+         /* int capget(cap_user_header_t header, cap_user_data_t data); */
+         MAYBE_PRINTF("capget ( %p, %p )\n", arg1, arg2 );
+         SYSCALL_TRACK( pre_mem_read, tst, "capget(header)", arg1, 
+                                             sizeof(vki_cap_user_header_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "capget(data)", arg2, 
+                                           sizeof( vki_cap_user_data_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg2 != (Addr)NULL)
+            VG_TRACK( post_mem_write, arg2, sizeof( vki_cap_user_data_t) );
+         break;
+
+      /* !!!!!!!!!!!!!!!!!!!!! mutant ones !!!!!!!!!!!!!!!!!!!!! */
+
+      case __NR_execve:
+         /* int execve (const char *filename, 
+                        char *const argv [], 
+                        char *const envp[]); */
+         MAYBE_PRINTF("execve ( %p(%s), %p, %p ) --- NOT CHECKED\n", 
+                        arg1, arg1, arg2, arg3);
+         /* Resistance is futile.  Nuke all other threads.  POSIX
+            mandates this. */
+            VG_(nuke_all_threads_except)( tid );
+         /* Make any binding for LD_PRELOAD disappear, so that child
+            processes don't get traced into. */
+         if (!VG_(clo_trace_children)) {
+            Int i;
+            Char** envp = (Char**)arg3;
+            Char*  ld_preload_str = NULL;
+            Char*  ld_library_path_str = NULL;
+            for (i = 0; envp[i] != NULL; i++) {
+               if (VG_(strncmp)(envp[i], "LD_PRELOAD=", 11) == 0)
+                  ld_preload_str = &envp[i][11];
+               if (VG_(strncmp)(envp[i], "LD_LIBRARY_PATH=", 16) == 0)
+                  ld_library_path_str = &envp[i][16];
+            }
+            VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH)(
+	       ld_preload_str, ld_library_path_str );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         /* Should we still be alive here?  Don't think so. */
+         /* Actually, above comment is wrong.  execve can fail, just
+            like any other syscall -- typically the file to exec does
+            not exist.  Hence: */
+         vg_assert(VG_(is_kerror)(res));
+         break;
+
+      /* !!!!!!!!!!!!!!!!!!!!!     end     !!!!!!!!!!!!!!!!!!!!! */
+
+      case __NR_access: /* syscall 33 */
+         /* int access(const char *pathname, int mode); */
+         MAYBE_PRINTF("access ( %p, %d )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "access(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_alarm: /* syscall 27 */
+         /* unsigned int alarm(unsigned int seconds); */
+         MAYBE_PRINTF("alarm ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_brk: /* syscall 45 */
+         /* Haven't a clue if this is really right. */
+         /* int brk(void *end_data_segment); */
+         MAYBE_PRINTF("brk ( %p ) --> ",arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         MAYBE_PRINTF("0x%x\n", res);
+
+         if (!VG_(is_kerror)(res)) {
+            if (arg1 == 0) {
+               /* Just asking where the current end is. (???) */
+               curr_dataseg_end = res;
+            } else
+            if (arg1 < curr_dataseg_end) {
+               /* shrinking the data segment. */
+               VG_TRACK( die_mem_brk, (Addr)arg1, 
+                                      curr_dataseg_end-arg1 );
+               curr_dataseg_end = arg1;
+            } else
+            if (arg1 > curr_dataseg_end && res != 0) {
+               /* asked for more memory, and got it */
+               /* 
+               VG_(printf)("BRK: new area %x .. %x\n", 
+                           VG_(curr_dataseg_end, arg1-1 );
+               */
+               VG_TRACK( new_mem_brk, (Addr)curr_dataseg_end, 
+                                         arg1-curr_dataseg_end );
+               curr_dataseg_end = arg1;         
+            }
+         }
+         break;
+
+      case __NR_chdir: /* syscall 12 */
+         /* int chdir(const char *path); */
+         MAYBE_PRINTF("chdir ( %p )\n", arg1);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "chdir(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_chmod: /* syscall 15 */
+         /* int chmod(const char *path, mode_t mode); */
+         MAYBE_PRINTF("chmod ( %p, %d )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "chmod(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_chown32)
+      case __NR_chown32: /* syscall 212 */
+#     endif
+#     if defined(__NR_lchown32)
+      case __NR_lchown32: /* syscall 198 */
+#     endif
+      case __NR_chown: /* syscall 16 */
+         /* int chown(const char *path, uid_t owner, gid_t group); */
+         MAYBE_PRINTF("chown ( %p, 0x%x, 0x%x )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "chown(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_close: /* syscall 6 */
+         /* int close(int fd); */
+         MAYBE_PRINTF("close ( %d )\n",arg1);
+         /* Detect and negate attempts by the client to close Valgrind's
+            logfile fd ... */
+         if (arg1 == VG_(clo_logfile_fd)) {
+            VG_(message)(Vg_UserMsg, 
+              "Warning: client attempted to close "
+               "Valgrind's logfile fd (%d).", 
+               VG_(clo_logfile_fd));
+            VG_(message)(Vg_UserMsg, 
+              "   Use --logfile-fd=<number> to select an "
+              "alternative logfile fd." );
+            /* Pretend the close succeeded, regardless.  (0 == success) */
+            res = 0;
+            SET_EAX(tid, res);
+         } else {
+            KERNEL_DO_SYSCALL(tid,res);
+         }
+         break;
+
+      case __NR_dup: /* syscall 41 */
+         /* int dup(int oldfd); */
+         MAYBE_PRINTF("dup ( %d ) --> ", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         MAYBE_PRINTF("%d\n", res);
+         break;
+
+      case __NR_dup2: /* syscall 63 */
+         /* int dup2(int oldfd, int newfd); */
+         MAYBE_PRINTF("dup2 ( %d, %d ) ...\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         MAYBE_PRINTF("SYSCALL[%d]       dup2 ( %d, %d ) = %d\n", 
+                        VG_(getpid)(), 
+                        arg1, arg2, res);
+         break;
+
+      case __NR_fcntl: /* syscall 55 */
+         /* int fcntl(int fd, int cmd, int arg); */
+         MAYBE_PRINTF("fcntl ( %d, %d, %d )\n",arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_fchdir: /* syscall 133 */
+         /* int fchdir(int fd); */
+         MAYBE_PRINTF("fchdir ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_fchown32)
+      case __NR_fchown32: /* syscall 207 */
+#     endif
+      case __NR_fchown: /* syscall 95 */
+         /* int fchown(int filedes, uid_t owner, gid_t group); */
+         MAYBE_PRINTF("fchown ( %d, %d, %d )\n", arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_fchmod: /* syscall 94 */
+         /* int fchmod(int fildes, mode_t mode); */
+         MAYBE_PRINTF("fchmod ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_fcntl64)
+      case __NR_fcntl64: /* syscall 221 */
+         /* I don't know what the prototype for this is supposed to be. */
+         /* ??? int fcntl(int fd, int cmd); */
+         MAYBE_PRINTF("fcntl64 (?!) ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_fstat: /* syscall 108 */
+         /* int fstat(int filedes, struct stat *buf); */
+         MAYBE_PRINTF("fstat ( %d, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "fstat", arg2, sizeof(struct stat) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat) );
+         break;
+
+      case __NR_vfork: /* syscall 190 */
+         /* pid_t vfork(void); */
+         MAYBE_PRINTF("vfork ( ) ... becomes ... ");
+         /* KLUDGE: we prefer to do a fork rather than vfork. 
+            vfork gives a SIGSEGV, and the stated semantics looks
+            pretty much impossible for us. */
+         tst->m_eax = __NR_fork;
+         /* fall through ... */
+      case __NR_fork: /* syscall 2 */
+         /* pid_t fork(void); */
+         MAYBE_PRINTF("fork ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         if (res == 0) {
+            /* I am the child.  Nuke all other threads which I might
+               have inherited from my parent.  POSIX mandates this. */
+            VG_(nuke_all_threads_except)( tid );
+         }
+         break;
+
+      case __NR_fsync: /* syscall 118 */
+         /* int fsync(int fd); */
+         MAYBE_PRINTF("fsync ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_ftruncate: /* syscall 93 */
+         /* int ftruncate(int fd, size_t length); */
+         MAYBE_PRINTF("ftruncate ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_ftruncate64)
+      case __NR_ftruncate64: /* syscall 194 */
+         /* int ftruncate64(int fd, off64_t length); */
+         MAYBE_PRINTF("ftruncate64 ( %d, %lld )\n", 
+                        arg1,arg2|((long long) arg3 << 32));
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_getdents: /* syscall 141 */
+         /* int getdents(unsigned int fd, struct dirent *dirp, 
+                         unsigned int count); */
+         MAYBE_PRINTF("getdents ( %d, %p, %d )\n",arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getdents(dirp)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0)
+            VG_TRACK( post_mem_write, arg2, res );
+         break;
+
+#     if defined(__NR_getdents64)
+      case __NR_getdents64: /* syscall 220 */
+         /* int getdents(unsigned int fd, struct dirent64 *dirp, 
+                         unsigned int count); */
+         MAYBE_PRINTF("getdents64 ( %d, %p, %d )\n",arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getdents64(dirp)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0)
+            VG_TRACK( post_mem_write, arg2, res );
+         break;
+#     endif
+
+#     if defined(__NR_getgroups32)
+      case __NR_getgroups32: /* syscall 205 */
+#     endif
+      case __NR_getgroups: /* syscall 80 */
+         /* int getgroups(int size, gid_t list[]); */
+         MAYBE_PRINTF("getgroups ( %d, %p )\n", arg1, arg2);
+         if (arg1 > 0)
+            SYSCALL_TRACK( pre_mem_write, tst, "getgroups(list)", arg2, 
+                               arg1 * sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (arg1 > 0 && !VG_(is_kerror)(res) && res > 0)
+            VG_TRACK( post_mem_write, arg2, res * sizeof(gid_t) );
+         break;
+
+      case __NR_getcwd: /* syscall 183 */
+         /* char *getcwd(char *buf, size_t size); */
+         MAYBE_PRINTF("getcwd ( %p, %d )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "getcwd(buf)", arg1, arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res != (Addr)NULL)
+            VG_TRACK( post_mem_write, arg1, arg2 );
+         /* Not really right -- really we should have the asciiz
+            string starting at arg1 readable, or up to arg2 bytes,
+            whichever finishes first. */
+         break;
+
+      case __NR_geteuid: /* syscall 49 */
+         /* uid_t geteuid(void); */
+         MAYBE_PRINTF("geteuid ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_geteuid32)
+      case __NR_geteuid32: /* syscall 201 */
+         /* ?? uid_t geteuid32(void); */
+         MAYBE_PRINTF("geteuid32(?) ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_getegid: /* syscall 50 */
+         /* gid_t getegid(void); */
+         MAYBE_PRINTF("getegid ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_getegid32)
+      case __NR_getegid32: /* syscall 202 */
+         /* gid_t getegid32(void); */
+         MAYBE_PRINTF("getegid32 ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_getgid: /* syscall 47 */
+         /* gid_t getgid(void); */
+         MAYBE_PRINTF("getgid ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_getgid32)
+      case __NR_getgid32: /* syscall 200 */
+         /* gid_t getgid32(void); */
+         MAYBE_PRINTF("getgid32 ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_getpid: /* syscall 20 */
+         /* pid_t getpid(void); */
+         MAYBE_PRINTF("getpid ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_getpgid: /* syscall 132 */
+         /* pid_t getpgid(pid_t pid); */
+         MAYBE_PRINTF("getpgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_getpgrp: /* syscall 65 */
+         /* pid_t getpprp(void); */
+         MAYBE_PRINTF("getpgrp ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_getppid: /* syscall 64 */
+         /* pid_t getppid(void); */
+         MAYBE_PRINTF("getppid ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_getresgid: /* syscall 171 */
+         /* int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); */
+         MAYBE_PRINTF("getresgid ( %p, %p, %p )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid(rgid)", arg1, sizeof(gid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid(egid)", arg2, sizeof(gid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid(sgid)", arg3, sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg1, sizeof(gid_t) );
+            VG_TRACK( post_mem_write, arg2, sizeof(gid_t) );
+            VG_TRACK( post_mem_write, arg3, sizeof(gid_t) );
+         }
+         break;
+
+#     if defined(__NR_getresgid32)
+      case __NR_getresgid32: /* syscall 211 */
+         /* int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); */
+         MAYBE_PRINTF("getresgid32 ( %p, %p, %p )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid32(rgid)", arg1, sizeof(gid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid32(egid)", arg2, sizeof(gid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid32(sgid)", arg3, sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg1, sizeof(gid_t) );
+            VG_TRACK( post_mem_write, arg2, sizeof(gid_t) );
+            VG_TRACK( post_mem_write, arg3, sizeof(gid_t) );
+         }
+         break;
+#     endif
+
+      case __NR_getresuid: /* syscall 165 */
+         /* int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); */
+         MAYBE_PRINTF("getresuid ( %p, %p, %p )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid(ruid)", arg1, sizeof(uid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid(euid)", arg2, sizeof(uid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid(suid)", arg3, sizeof(uid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg1, sizeof(uid_t) );
+            VG_TRACK( post_mem_write, arg2, sizeof(uid_t) );
+            VG_TRACK( post_mem_write, arg3, sizeof(uid_t) );
+         }
+         break;
+
+#     if defined(__NR_getresuid32)
+      case __NR_getresuid32: /* syscall 209 */
+         /* int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); */
+         MAYBE_PRINTF("getresuid32 ( %p, %p, %p )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid32(ruid)", arg1, sizeof(uid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid32(euid)", arg2, sizeof(uid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid32(suid)", arg3, sizeof(uid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg1, sizeof(uid_t) );
+            VG_TRACK( post_mem_write, arg2, sizeof(uid_t) );
+            VG_TRACK( post_mem_write, arg3, sizeof(uid_t) );
+         }
+         break;
+#     endif
+
+#     if defined(__NR_ugetrlimit)
+      case __NR_ugetrlimit: /* syscall 191 */
+#     endif
+      case __NR_getrlimit: /* syscall 76 */
+         /* int getrlimit (int resource, struct rlimit *rlim); */
+         MAYBE_PRINTF("getrlimit ( %d, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "getrlimit(rlim)", arg2, 
+                           sizeof(struct rlimit) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0)
+            VG_TRACK( post_mem_write, arg2, sizeof(struct rlimit) );
+         break;
+
+      case __NR_getrusage: /* syscall 77 */
+         /* int getrusage (int who, struct rusage *usage); */
+         MAYBE_PRINTF("getrusage ( %d, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "getrusage(usage)", arg2, 
+                           sizeof(struct rusage) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0)
+            VG_TRACK( post_mem_write,arg2, sizeof(struct rusage) );
+         break;
+
+      case __NR_gettimeofday: /* syscall 78 */
+         /* int gettimeofday(struct timeval *tv, struct timezone *tz); */
+         MAYBE_PRINTF("gettimeofday ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "gettimeofday(tv)", arg1, 
+                           sizeof(struct timeval) );
+         if (arg2 != 0)
+            SYSCALL_TRACK( pre_mem_write, tst, "gettimeofday(tz)", arg2, 
+                              sizeof(struct timezone) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg1, sizeof(struct timeval) );
+            if (arg2 != 0)
+               VG_TRACK( post_mem_write, arg2, sizeof(struct timezone) );
+         }
+         break;
+
+      case __NR_getuid: /* syscall 24 */
+         /* uid_t getuid(void); */
+         MAYBE_PRINTF("getuid ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_getuid32)
+      case __NR_getuid32: /* syscall 199 */
+         /* ???uid_t getuid32(void); */
+         MAYBE_PRINTF("getuid32 ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_ipc: /* syscall 117 */
+         /* int ipc ( unsigned int call, int first, int second, 
+                      int third, void *ptr, long fifth); */
+         {
+         UInt arg6 = tst->m_ebp;
+
+         MAYBE_PRINTF("ipc ( %d, %d, %d, %d, %p, %d )\n",
+                        arg1,arg2,arg3,arg4,arg5,arg6);
+         switch (arg1 /* call */) {
+            case 1: /* IPCOP_semop */
+               SYSCALL_TRACK( pre_mem_read, tst, "semop(sops)", arg5, 
+                                  arg3 * sizeof(struct sembuf) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case 2: /* IPCOP_semget */
+            case 3: /* IPCOP_semctl */
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case 11: /* IPCOP_msgsnd */
+               {
+                  struct msgbuf *msgp = (struct msgbuf *)arg5;
+                  Int msgsz = arg3;
+
+                  SYSCALL_TRACK( pre_mem_read, tst, "msgsnd(msgp->mtype)", 
+                                     (UInt)&msgp->mtype, sizeof(msgp->mtype) );
+                  SYSCALL_TRACK( pre_mem_read, tst, "msgsnd(msgp->mtext)", 
+                                     (UInt)msgp->mtext, msgsz );
+
+                  KERNEL_DO_SYSCALL(tid,res);
+                  break;
+               }
+            case 12: /* IPCOP_msgrcv */
+               {
+                  struct msgbuf *msgp;
+                  Int msgsz = arg3;
+ 
+                  msgp = (struct msgbuf *)deref_Addr( tst,
+                            (Addr) (&((struct ipc_kludge *)arg5)->msgp),
+                            "msgrcv(msgp)" );
+
+                  SYSCALL_TRACK( pre_mem_write, tst, "msgrcv(msgp->mtype)", 
+                                     (UInt)&msgp->mtype, sizeof(msgp->mtype) );
+                  SYSCALL_TRACK( pre_mem_write, tst, "msgrcv(msgp->mtext)", 
+                                     (UInt)msgp->mtext, msgsz );
+
+                  KERNEL_DO_SYSCALL(tid,res);
+
+                  if ( !VG_(is_kerror)(res) && res > 0 ) {
+                     VG_TRACK( post_mem_write, (UInt)&msgp->mtype, sizeof(msgp->mtype) );
+                     VG_TRACK( post_mem_write, (UInt)msgp->mtext, res );
+                  }
+                  break;
+               }
+            case 13: /* IPCOP_msgget */
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case 14: /* IPCOP_msgctl */
+               {
+                  switch (arg3 /* cmd */) {
+                     case IPC_STAT:
+                        SYSCALL_TRACK( pre_mem_write, tst, "msgctl(buf)", arg5, 
+                                           sizeof(struct msqid_ds) );
+                        KERNEL_DO_SYSCALL(tid,res);
+                        if ( !VG_(is_kerror)(res) && res > 0 ) {
+                           VG_TRACK( post_mem_write, arg5, sizeof(struct msqid_ds) );
+                        }
+                        break;
+                     case IPC_SET:
+                        SYSCALL_TRACK( pre_mem_read, tst, "msgctl(buf)", arg5, 
+                                           sizeof(struct msqid_ds) );
+                        KERNEL_DO_SYSCALL(tid,res);
+                        break;
+#                    if defined(IPC_64)
+                     case IPC_STAT|IPC_64:
+                        SYSCALL_TRACK( pre_mem_write, tst, "msgctl(buf)", arg5, 
+                                           sizeof(struct msqid64_ds) );
+                        KERNEL_DO_SYSCALL(tid,res);
+                        if ( !VG_(is_kerror)(res) && res > 0 ) {
+                           VG_TRACK( post_mem_write, arg5, sizeof(struct msqid64_ds) );
+                        }
+                        break;
+#                    endif
+#                    if defined(IPC_64)
+                     case IPC_SET|IPC_64:
+                        SYSCALL_TRACK( pre_mem_read, tst, "msgctl(buf)", arg5, 
+                                           sizeof(struct msqid64_ds) );
+                        KERNEL_DO_SYSCALL(tid,res);
+                        break;
+#                    endif
+                     default:
+                        KERNEL_DO_SYSCALL(tid,res);
+                        break;
+                  }
+                  break;
+               }
+            case 21: /* IPCOP_shmat */
+               {
+                  Int shmid = arg2;
+                  /*Int shmflag = arg3;*/
+                  Addr addr;
+
+                  KERNEL_DO_SYSCALL(tid,res);
+
+                  if ( VG_(is_kerror) ( res ) )
+                     break;
+                  
+                  /* force readability. before the syscall it is
+                   * indeed uninitialized, as can be seen in
+                   * glibc/sysdeps/unix/sysv/linux/shmat.c */
+                  VG_TRACK( post_mem_write, arg4, sizeof( ULong ) );
+
+                  addr = deref_Addr ( tst, arg4, "shmat(addr)" );
+                  if ( addr > 0 ) { 
+                     UInt segmentSize = get_shm_size ( shmid );
+                     if ( segmentSize > 0 ) {
+                        /* we don't distinguish whether it's read-only or
+                         * read-write -- it doesn't matter really. */
+                        VG_TRACK( post_mem_write, addr, segmentSize );
+                     }
+                  }
+                  break;
+               }
+            case 22: /* IPCOP_shmdt */
+                  KERNEL_DO_SYSCALL(tid,res);
+                  /* ### FIXME: this should call make_noaccess on the
+                   * area passed to shmdt. But there's no way to
+                   * figure out the size of the shared memory segment
+                   * just from the address...  Maybe we want to keep a
+                   * copy of the exiting mappings inside valgrind? */
+                  break;
+            case 23: /* IPCOP_shmget */
+                KERNEL_DO_SYSCALL(tid,res);
+                break;
+            case 24: /* IPCOP_shmctl */
+	      /* Subject: shmctl: The True Story
+                    Date: Thu, 9 May 2002 18:07:23 +0100 (BST)
+                    From: Reuben Thomas <rrt@mupsych.org>
+                      To: Julian Seward <jseward@acm.org>
+
+                 1. As you suggested, the syscall subop is in arg1.
+
+                 2. There are a couple more twists, so the arg order
+                    is actually:
+
+                 arg1 syscall subop
+                 arg2 file desc
+                 arg3 shm operation code (can have IPC_64 set)
+                 arg4 0 ??? is arg3-arg4 a 64-bit quantity when IPC_64
+                        is defined?
+                 arg5 pointer to buffer
+
+                 3. With this in mind, I've amended the case as below:
+	      */
+               {
+                  UInt cmd = arg3;
+                  Bool out_arg = False;
+                  if ( arg5 ) {
+#                    if defined(IPC_64)
+                     cmd = cmd & (~IPC_64);
+#                    endif
+                     out_arg = cmd == SHM_STAT || cmd == IPC_STAT;
+                     if ( out_arg )
+                        SYSCALL_TRACK( pre_mem_write, tst, 
+                           "shmctl(SHM_STAT or IPC_STAT,buf)", 
+                           arg5, sizeof(struct shmid_ds) );
+                     else
+                        SYSCALL_TRACK( pre_mem_read, tst, 
+                           "shmctl(SHM_XXXX,buf)", 
+                           arg5, sizeof(struct shmid_ds) );
+                  }
+                  KERNEL_DO_SYSCALL(tid,res);
+                  if ( arg5 && !VG_(is_kerror)(res) && res == 0 && out_arg )
+                          VG_TRACK( post_mem_write, arg5, sizeof(struct shmid_ds) );
+               }
+               break;
+            default:
+               VG_(message)(Vg_DebugMsg,
+                            "FATAL: unhandled syscall(ipc) %d",
+                            arg1 );
+               VG_(panic)("... bye!\n");
+               break; /*NOTREACHED*/
+         }
+         }
+         break;
+
+      case __NR_ioctl: /* syscall 54 */
+         /* int ioctl(int d, int request, ...)
+            [The  "third"  argument  is traditionally char *argp, 
+             and will be so named for this discussion.]
+         */
+         /*
+         VG_(message)(
+            Vg_DebugMsg, 
+            "is an IOCTL,  request = 0x%x,   d = %d,   argp = 0x%x", 
+            arg2,arg1,arg3);
+         */
+         MAYBE_PRINTF("ioctl ( %d, 0x%x, %p )\n",arg1,arg2,arg3);
+         switch (arg2 /* request */) {
+            case TCSETS:
+            case TCSETSW:
+            case TCSETSF:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(TCSET{S,SW,SF})", arg3, 
+                                 VKI_SIZEOF_STRUCT_TERMIOS );
+               KERNEL_DO_SYSCALL(tid,res);
+               break; 
+            case TCGETS:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(TCGETS)", arg3, 
+                                 VKI_SIZEOF_STRUCT_TERMIOS );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, VKI_SIZEOF_STRUCT_TERMIOS );
+               break;
+            case TCSETA:
+            case TCSETAW:
+            case TCSETAF:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(TCSET{A,AW,AF})", arg3,
+                                 VKI_SIZEOF_STRUCT_TERMIO );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case TCGETA:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(TCGETA)", arg3,
+                                 VKI_SIZEOF_STRUCT_TERMIO );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, VKI_SIZEOF_STRUCT_TERMIO );
+               break;
+            case TCSBRK:
+            case TCXONC:
+            case TCSBRKP:
+            case TCFLSH:
+               /* These just take an int by value */
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case TIOCGWINSZ:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(TIOCGWINSZ)", arg3, 
+                                 sizeof(struct winsize) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, sizeof(struct winsize) );
+               break;
+            case TIOCSWINSZ:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(TIOCSWINSZ)", arg3, 
+                                 sizeof(struct winsize) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case TIOCGPGRP:
+               /* Get process group ID for foreground processing group. */
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(TIOCGPGRP)", arg3,
+                                 sizeof(pid_t) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, sizeof(pid_t) );
+               break;
+            case TIOCSPGRP:
+               /* Set a process group ID? */
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(TIOCGPGRP)", arg3,
+                                 sizeof(pid_t) );
+               KERNEL_DO_SYSCALL(tid,res); 
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, sizeof(pid_t) );
+               break;
+            case TIOCGPTN: /* Get Pty Number (of pty-mux device) */
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(TIOCGPTN)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                   VG_TRACK( post_mem_write, arg3, sizeof(int));
+               break;
+            case TIOCSCTTY:
+               /* Just takes an int value.  */
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case TIOCSPTLCK: /* Lock/unlock Pty */
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(TIOCSPTLCK)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case FIONBIO:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(FIONBIO)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case FIOASYNC:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(FIOASYNC)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case FIONREAD:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(FIONREAD)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, sizeof(int) );
+               break;
+
+            /* If you get compilation problems here, change the #if
+               1 to #if 0 and get rid of <scsi/sg.h> in
+               vg_unsafe.h. */
+#       if 1
+            case SG_SET_COMMAND_Q:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(SG_SET_COMMAND_Q)", 
+                                 arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+#           if defined(SG_IO)
+            case SG_IO:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(SG_IO)", arg3, 
+                                 sizeof(struct sg_io_hdr) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct sg_io_hdr));
+               break;
+#           endif /* SG_IO */
+            case SG_GET_SCSI_ID:
+               /* Note: sometimes sg_scsi_id is called sg_scsi_id_t */
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(SG_GET_SCSI_ID)", arg3, 
+                                 sizeof(struct sg_scsi_id) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct sg_scsi_id));
+               break;
+            case SG_SET_RESERVED_SIZE:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(SG_SET_RESERVED_SIZE)", 
+                                 arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case SG_SET_TIMEOUT:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(SG_SET_TIMEOUT)", arg3, 
+                                 sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case SG_GET_RESERVED_SIZE:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(SG_GET_RESERVED_SIZE)", arg3, 
+                                 sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(int));
+               break;
+            case SG_GET_TIMEOUT:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(SG_GET_TIMEOUT)", arg3, 
+                                 sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(int));
+               break;
+            case SG_GET_VERSION_NUM:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(SG_GET_VERSION_NUM)", 
+                                 arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+#       endif
+
+            case IIOCGETCPS:
+               /* In early 2.4 kernels, ISDN_MAX_CHANNELS was only defined
+                * when KERNEL was. I never saw a larger value than 64 though */
+#              ifndef ISDN_MAX_CHANNELS
+#              define ISDN_MAX_CHANNELS 64
+#              endif
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(IIOCGETCPS)", arg3,
+                                 ISDN_MAX_CHANNELS 
+                                 * 2 * sizeof(unsigned long) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, ISDN_MAX_CHANNELS 
+                                        * 2 * sizeof(unsigned long) );
+               break;
+            case IIOCNETGPN:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(IIOCNETGPN)",
+                                 (UInt)&((isdn_net_ioctl_phone *)arg3)->name,
+                                 sizeof(((isdn_net_ioctl_phone *)arg3)->name) );
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(IIOCNETGPN)", arg3,
+                                 sizeof(isdn_net_ioctl_phone) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, sizeof(isdn_net_ioctl_phone) );
+               break;
+
+            /* These all use struct ifreq AFAIK */
+            case SIOCGIFINDEX:
+            case SIOCGIFFLAGS:        /* get flags                    */
+            case SIOCGIFHWADDR:       /* Get hardware address         */
+            case SIOCGIFMTU:          /* get MTU size                 */
+            case SIOCGIFADDR:         /* get PA address               */
+            case SIOCGIFNETMASK:      /* get network PA mask          */
+            case SIOCGIFMETRIC:       /* get metric                   */
+            case SIOCGIFMAP:          /* Get device parameters        */
+            case SIOCGIFTXQLEN:       /* Get the tx queue length      */
+            case SIOCGIFDSTADDR:      /* get remote PA address        */
+            case SIOCGIFBRDADDR:      /* get broadcast PA address     */
+            case SIOCGIFNAME:         /* get iface name               */
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(SIOCGIFINDEX)", arg3, 
+                                sizeof(struct ifreq));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct ifreq));
+               break;
+            case SIOCGIFCONF:         /* get iface list               */
+               /* WAS:
+               SYSCALL_TRACK( pre_mem_write,"ioctl(SIOCGIFCONF)", arg3, 
+                                sizeof(struct ifconf));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct ifconf));
+               */
+               SYSCALL_TRACK( pre_mem_read,tst, "ioctl(SIOCGIFCONF)", arg3, 
+                                sizeof(struct ifconf));
+               if ( arg3 ) {
+                  // TODO len must be readable and writable
+                  // buf pointer only needs to be readable
+                  struct ifconf *ifc = (struct ifconf *) arg3;
+                  SYSCALL_TRACK( pre_mem_write,tst, "ioctl(SIOCGIFCONF).ifc_buf",
+                                   (Addr)(ifc->ifc_buf), (UInt)(ifc->ifc_len) );
+               }
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0 && arg3 ) {
+                  struct ifconf *ifc = (struct ifconf *) arg3;
+                  if (ifc->ifc_buf != NULL)
+                     VG_TRACK( post_mem_write, (Addr)(ifc->ifc_buf), 
+                                     (UInt)(ifc->ifc_len) );
+               }
+               break;
+            case SIOCGSTAMP:
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(SIOCGSTAMP)", arg3, 
+                                sizeof(struct timeval));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct timeval));
+               break;
+            case SIOCGRARP:           /* get RARP table entry         */
+            case SIOCGARP:            /* get ARP table entry          */
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(SIOCGARP)", arg3, 
+                                sizeof(struct arpreq));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct arpreq));
+               break;
+                    
+            case SIOCSIFFLAGS:        /* set flags                    */
+            case SIOCSIFMAP:          /* Set device parameters        */
+            case SIOCSIFTXQLEN:       /* Set the tx queue length      */
+            case SIOCSIFDSTADDR:      /* set remote PA address        */
+            case SIOCSIFBRDADDR:      /* set broadcast PA address     */
+            case SIOCSIFNETMASK:      /* set network PA mask          */
+            case SIOCSIFMETRIC:       /* set metric                   */
+            case SIOCSIFADDR:         /* set PA address               */
+            case SIOCSIFMTU:          /* set MTU size                 */
+            case SIOCSIFHWADDR:       /* set hardware address         */
+               SYSCALL_TRACK( pre_mem_read,tst,"ioctl(SIOCSIFFLAGS)", arg3, 
+                                sizeof(struct ifreq));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            /* Routing table calls.  */
+            case SIOCADDRT:           /* add routing table entry      */
+            case SIOCDELRT:           /* delete routing table entry   */
+               SYSCALL_TRACK( pre_mem_read,tst,"ioctl(SIOCADDRT/DELRT)", arg3, 
+                                sizeof(struct rtentry));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            /* RARP cache control calls. */
+            case SIOCDRARP:           /* delete RARP table entry      */
+            case SIOCSRARP:           /* set RARP table entry         */
+            /* ARP cache control calls. */
+            case SIOCSARP:            /* set ARP table entry          */
+            case SIOCDARP:            /* delete ARP table entry       */
+               SYSCALL_TRACK( pre_mem_read,tst, "ioctl(SIOCSIFFLAGS)", arg3, 
+                                sizeof(struct ifreq));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SIOCSPGRP:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(SIOCSPGRP)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            /* linux/soundcard interface (OSS) */
+            case SNDCTL_SEQ_GETOUTCOUNT:
+            case SNDCTL_SEQ_GETINCOUNT:
+            case SNDCTL_SEQ_PERCMODE:
+            case SNDCTL_SEQ_TESTMIDI:
+            case SNDCTL_SEQ_RESETSAMPLES:
+            case SNDCTL_SEQ_NRSYNTHS:
+            case SNDCTL_SEQ_NRMIDIS:
+            case SNDCTL_SEQ_GETTIME:
+            case SNDCTL_DSP_GETFMTS:
+            case SNDCTL_DSP_GETTRIGGER:
+            case SNDCTL_DSP_GETODELAY:
+#           if defined(SNDCTL_DSP_GETSPDIF)
+            case SNDCTL_DSP_GETSPDIF:
+#           endif
+            case SNDCTL_DSP_GETCAPS:
+            case SOUND_PCM_READ_RATE:
+            case SOUND_PCM_READ_CHANNELS:
+            case SOUND_PCM_READ_BITS:
+            case (SOUND_PCM_READ_BITS|0x40000000): /* what the fuck ? */
+            case SOUND_PCM_READ_FILTER:
+               SYSCALL_TRACK( pre_mem_write,tst,"ioctl(SNDCTL_XXX|SOUND_XXX (SIOR, int))", 
+                                arg3,
+                                sizeof(int));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(int));
+               break;
+            case SNDCTL_SEQ_CTRLRATE:
+            case SNDCTL_DSP_SPEED:
+            case SNDCTL_DSP_STEREO:
+            case SNDCTL_DSP_GETBLKSIZE: 
+            case SNDCTL_DSP_CHANNELS:
+            case SOUND_PCM_WRITE_FILTER:
+            case SNDCTL_DSP_SUBDIVIDE:
+            case SNDCTL_DSP_SETFRAGMENT:
+#           if defined(SNDCTL_DSP_GETCHANNELMASK)
+            case SNDCTL_DSP_GETCHANNELMASK:
+#           endif
+#           if defined(SNDCTL_DSP_BIND_CHANNEL)
+            case SNDCTL_DSP_BIND_CHANNEL:
+#           endif
+            case SNDCTL_TMR_TIMEBASE:
+            case SNDCTL_TMR_TEMPO:
+            case SNDCTL_TMR_SOURCE:
+            case SNDCTL_MIDI_PRETIME:
+            case SNDCTL_MIDI_MPUMODE:
+               SYSCALL_TRACK( pre_mem_read,tst, "ioctl(SNDCTL_XXX|SOUND_XXX "
+                                     "(SIOWR, int))", 
+                                arg3, sizeof(int));
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(SNDCTL_XXX|SOUND_XXX "
+                                     "(SIOWR, int))", 
+                                arg3, sizeof(int));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case SNDCTL_DSP_GETOSPACE:
+            case SNDCTL_DSP_GETISPACE:
+               SYSCALL_TRACK( pre_mem_write,tst, 
+                                "ioctl(SNDCTL_XXX|SOUND_XXX "
+                                "(SIOR, audio_buf_info))", arg3,
+                                sizeof(audio_buf_info));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(audio_buf_info));
+               break;
+            case SNDCTL_DSP_SETTRIGGER:
+               SYSCALL_TRACK( pre_mem_read,tst, "ioctl(SNDCTL_XXX|SOUND_XXX (SIOW, int))", 
+                                arg3, sizeof(int));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            /* Real Time Clock (/dev/rtc) ioctls */
+#           ifndef GLIBC_2_1
+            case RTC_UIE_ON:
+            case RTC_UIE_OFF:
+            case RTC_AIE_ON:
+            case RTC_AIE_OFF:
+            case RTC_PIE_ON:
+            case RTC_PIE_OFF:
+            case RTC_IRQP_SET:
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case RTC_RD_TIME:
+            case RTC_ALM_READ:
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(RTC_RD_TIME/ALM_READ)", arg3,
+                                sizeof(struct rtc_time));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct rtc_time));
+               break;
+            case RTC_ALM_SET:
+               SYSCALL_TRACK( pre_mem_read,tst, "ioctl(RTC_ALM_SET)", arg3,
+                                sizeof(struct rtc_time));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case RTC_IRQP_READ:
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(RTC_IRQP_READ)", arg3,
+                                sizeof(unsigned long));
+               KERNEL_DO_SYSCALL(tid,res);
+               if(!VG_(is_kerror) && res == 0)
+                   VG_TRACK( post_mem_write,arg3, sizeof(unsigned long));
+               break;
+#           endif /* GLIBC_2_1 */
+
+#           ifdef BLKGETSIZE
+            case BLKGETSIZE:
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(BLKGETSIZE)", arg3,
+                                sizeof(unsigned long));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(unsigned long));
+               break;
+#           endif /* BLKGETSIZE */
+
+            /* CD ROM stuff (??)  */
+            case CDROMSUBCHNL:
+                SYSCALL_TRACK( pre_mem_read,tst, "ioctl(CDROMSUBCHNL (cdsc_format, char))",
+                   (int) &(((struct cdrom_subchnl *) arg3)->cdsc_format), 
+                   sizeof(((struct cdrom_subchnl *) arg3)->cdsc_format));
+                SYSCALL_TRACK( pre_mem_write,tst, "ioctl(CDROMSUBCHNL)", arg3, 
+                   sizeof(struct cdrom_subchnl));
+                KERNEL_DO_SYSCALL(tid,res);
+                if (!VG_(is_kerror)(res) && res == 0)
+                   VG_TRACK( post_mem_write,arg3, sizeof(struct cdrom_subchnl));
+                break;
+            case CDROMREADTOCHDR:
+                SYSCALL_TRACK( pre_mem_write,tst, "ioctl(CDROMREADTOCHDR)", arg3, 
+                   sizeof(struct cdrom_tochdr));
+                KERNEL_DO_SYSCALL(tid,res);
+                if (!VG_(is_kerror)(res) && res == 0)
+                   VG_TRACK( post_mem_write,arg3, sizeof(struct cdrom_tochdr));
+                break;
+            case CDROMREADTOCENTRY:
+                 SYSCALL_TRACK( pre_mem_read,tst, "ioctl(CDROMREADTOCENTRY (cdte_format, char))",
+                    (int) &(((struct cdrom_tocentry *) arg3)->cdte_format), 
+                    sizeof(((struct cdrom_tocentry *) arg3)->cdte_format));
+                 SYSCALL_TRACK( pre_mem_read,tst, "ioctl(CDROMREADTOCENTRY (cdte_track, char))",
+                    (int) &(((struct cdrom_tocentry *) arg3)->cdte_track), 
+                    sizeof(((struct cdrom_tocentry *) arg3)->cdte_track));
+                 SYSCALL_TRACK( pre_mem_write,tst, "ioctl(CDROMREADTOCENTRY)", arg3, 
+                    sizeof(struct cdrom_tocentry));
+                 KERNEL_DO_SYSCALL(tid,res);
+                 if (!VG_(is_kerror)(res) && res == 0)
+                    VG_TRACK( post_mem_write,arg3, sizeof(struct cdrom_tochdr));
+                 break;
+            case CDROMPLAYMSF:
+                 SYSCALL_TRACK( pre_mem_read,tst, "ioctl(CDROMPLAYMSF)", arg3, 
+                    sizeof(struct cdrom_msf));
+                 KERNEL_DO_SYSCALL(tid,res);
+                 break;
+            /* We don't have any specific information on it, so
+               try to do something reasonable based on direction and
+               size bits.  The encoding scheme is described in
+               /usr/include/asm/ioctl.h.  
+
+               According to Simon Hausmann, _IOC_READ means the kernel
+               writes a value to the ioctl value passed from the user
+               space and the other way around with _IOC_WRITE. */
+            default: {
+               UInt dir  = _IOC_DIR(arg2);
+               UInt size = _IOC_SIZE(arg2);
+               if (/* size == 0 || */ dir == _IOC_NONE) {
+                  VG_(message)(Vg_UserMsg, 
+                     "Warning: noted but unhandled ioctl 0x%x"
+                     " with no size/direction hints",
+                     arg2); 
+                  VG_(message)(Vg_UserMsg, 
+                     "   This could cause spurious value errors"
+                     " to appear.");
+                  VG_(message)(Vg_UserMsg, 
+                     "   See README_MISSING_SYSCALL_OR_IOCTL for guidance on"
+                     " writing a proper wrapper." );
+               } else {
+                  if ((dir & _IOC_WRITE) && size > 0)
+                     SYSCALL_TRACK( pre_mem_read,tst, "ioctl(generic)", arg3, size);
+                  if ((dir & _IOC_READ) && size > 0)
+                     SYSCALL_TRACK( pre_mem_write,tst, "ioctl(generic)", arg3, size);
+               }
+               KERNEL_DO_SYSCALL(tid,res);
+               if (size > 0 && (dir & _IOC_READ)
+                   && !VG_(is_kerror)(res) && res == 0
+                   && arg3 != (Addr)NULL)
+                  VG_TRACK( post_mem_write,arg3, size);
+               break;
+            }
+         }
+         break;
+
+      case __NR_kill: /* syscall 37 */
+         /* int kill(pid_t pid, int sig); */
+         MAYBE_PRINTF("kill ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_link: /* syscall 9 */
+         /* int link(const char *oldpath, const char *newpath); */
+         MAYBE_PRINTF("link ( %p, %p)\n", arg1, arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "link(oldpath)", arg1);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "link(newpath)", arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_lseek: /* syscall 19 */
+         /* off_t lseek(int fildes, off_t offset, int whence); */
+         MAYBE_PRINTF("lseek ( %d, %d, %d )\n",arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR__llseek: /* syscall 140 */
+         /* int _llseek(unsigned int fd, unsigned long offset_high,       
+                        unsigned long  offset_low, 
+                        loff_t * result, unsigned int whence); */
+         MAYBE_PRINTF("llseek ( %d, 0x%x, 0x%x, %p, %d )\n",
+                        arg1,arg2,arg3,arg4,arg5);
+         SYSCALL_TRACK( pre_mem_write, tst, "llseek(result)", arg4, sizeof(loff_t));
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0)
+            VG_TRACK( post_mem_write, arg4, sizeof(loff_t) );
+         break;
+
+      case __NR_lstat: /* syscall 107 */
+         /* int lstat(const char *file_name, struct stat *buf); */
+         MAYBE_PRINTF("lstat ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "lstat(file_name)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "lstat(buf)", arg2, sizeof(struct stat) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat) );
+         }
+         break;
+
+#     if defined(__NR_lstat64)
+      case __NR_lstat64: /* syscall 196 */
+         /* int lstat64(const char *file_name, struct stat64 *buf); */
+         MAYBE_PRINTF("lstat64 ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "lstat64(file_name)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "lstat64(buf)", arg2, sizeof(struct stat64) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat64) );
+         }
+         break;
+#     endif
+
+      case __NR_mkdir: /* syscall 39 */
+         /* int mkdir(const char *pathname, mode_t mode); */
+         MAYBE_PRINTF("mkdir ( %p, %d )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "mkdir(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_mmap2)
+      case __NR_mmap2: /* syscall 192 */
+         /* My impression is that this is exactly like __NR_mmap 
+            except that all 6 args are passed in regs, rather than in 
+            a memory-block. */
+         /* void* mmap(void *start, size_t length, int prot, 
+                       int flags, int fd, off_t offset); 
+         */
+         if (VG_(clo_trace_syscalls)) {
+            UInt arg6 = tst->m_ebp;
+            VG_(printf)("mmap2 ( %p, %d, %d, %d, %d, %d )\n",
+                        arg1, arg2, arg3, arg4, arg5, arg6 );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            mmap_segment( (Addr)res, arg2, arg3, arg5 );
+         }
+         break;
+#     endif
+
+      case __NR_mmap: /* syscall 90 */
+         /* void* mmap(void *start, size_t length, int prot, 
+                       int flags, int fd, off_t offset); 
+         */
+         SYSCALL_TRACK( pre_mem_read, tst, "mmap(args)", arg1, 6*sizeof(UInt) );
+         {
+            UInt* arg_block = (UInt*)arg1;
+            UInt arg6;
+            arg1 = arg_block[0];
+            arg2 = arg_block[1];
+            arg3 = arg_block[2];
+            arg4 = arg_block[3];
+            arg5 = arg_block[4];
+            arg6 = arg_block[5];
+            MAYBE_PRINTF("mmap ( %p, %d, %d, %d, %d, %d )\n",
+                        arg1, arg2, arg3, arg4, arg5, arg6 );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            mmap_segment( (Addr)res, arg2, arg3, arg5 );
+         }
+         break;
+
+      case __NR_mprotect: /* syscall 125 */
+         /* int mprotect(const void *addr, size_t len, int prot); */
+         /* should addr .. addr+len-1 be checked before the call? */
+         MAYBE_PRINTF("mprotect ( %p, %d, %d )\n", arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            mprotect_segment( arg1, arg2, arg3 );
+         }
+         break;
+
+      case __NR_munmap: /* syscall 91 */
+         /* int munmap(void *start, size_t length); */
+         /* should start .. start+length-1 be checked before the call? */
+         MAYBE_PRINTF("munmap ( %p, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            munmap_segment( arg1, arg2 );
+         }
+         break;
+
+      case __NR_nanosleep: /* syscall 162 */
+         /* int nanosleep(const struct timespec *req, struct timespec *rem); */
+         MAYBE_PRINTF("nanosleep ( %p, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read, tst, "nanosleep(req)", arg1, 
+                                              sizeof(struct timespec) );
+         if (arg2 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "nanosleep(rem)", arg2, 
+                               sizeof(struct timespec) );
+         KERNEL_DO_SYSCALL(tid,res);
+         /* Somewhat bogus ... is only written by the kernel if
+            res == -1 && errno == EINTR. */
+         if (!VG_(is_kerror)(res) && arg2 != (UInt)NULL)
+            VG_TRACK( post_mem_write, arg2, sizeof(struct timespec) );
+         break;
+
+      case __NR__newselect: /* syscall 142 */
+         /* int select(int n,  
+                       fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 
+                       struct timeval *timeout);
+         */
+         MAYBE_PRINTF("newselect ( %d, %p, %p, %p, %p )\n",
+                        arg1,arg2,arg3,arg4,arg5);
+         if (arg2 != 0)
+            SYSCALL_TRACK( pre_mem_read, tst, "newselect(readfds)",   
+                              arg2, arg1/8 /* __FD_SETSIZE/8 */ );
+         if (arg3 != 0)
+            SYSCALL_TRACK( pre_mem_read, tst, "newselect(writefds)",  
+                              arg3, arg1/8 /* __FD_SETSIZE/8 */ );
+         if (arg4 != 0)
+            SYSCALL_TRACK( pre_mem_read, tst, "newselect(exceptfds)", 
+                              arg4, arg1/8 /* __FD_SETSIZE/8 */ );
+         if (arg5 != 0)
+            SYSCALL_TRACK( pre_mem_read, tst, "newselect(timeout)", arg5, 
+                              sizeof(struct timeval) );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+         
+      case __NR_open: /* syscall 5 */
+         /* int open(const char *pathname, int flags); */
+         MAYBE_PRINTF("open ( %p(%s), %d ) --> ",arg1,arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "open(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         MAYBE_PRINTF("%d\n",res);
+         break;
+
+      case __NR_pipe: /* syscall 42 */
+         /* int pipe(int filedes[2]); */
+         MAYBE_PRINTF("pipe ( %p ) ...\n", arg1);
+         SYSCALL_TRACK( pre_mem_write, tst, "pipe(filedes)", arg1, 2*sizeof(int) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg1, 2*sizeof(int) );
+         if (VG_(clo_trace_syscalls) && !VG_(is_kerror)(res))
+            VG_(printf)("SYSCALL[%d]       pipe --> (rd %d, wr %d)\n", 
+                        VG_(getpid)(), 
+                        ((UInt*)arg1)[0], ((UInt*)arg1)[1] );
+         break;
+
+      case __NR_poll: /* syscall 168 */
+         /* struct pollfd {
+               int fd;           -- file descriptor
+               short events;     -- requested events
+               short revents;    -- returned events
+            };
+           int poll(struct pollfd *ufds, unsigned int nfds, 
+                                         int timeout) 
+         */
+         MAYBE_PRINTF("poll ( %p, %d, %d )\n",arg1,arg2,arg3);
+         /* In fact some parts of this struct should be readable too.
+            This should be fixed properly. */
+         SYSCALL_TRACK( pre_mem_write, tst, "poll(ufds)", 
+                           arg1, arg2 * sizeof(struct pollfd) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            Int i;
+            struct pollfd * arr = (struct pollfd *)arg1;
+            for (i = 0; i < arg2; i++)
+               VG_TRACK( post_mem_write, (Addr)(&arr[i].revents), sizeof(Short) );
+         }
+         break;
+ 
+      case __NR_readlink: /* syscall 85 */
+         /* int readlink(const char *path, char *buf, size_t bufsiz); */
+         MAYBE_PRINTF("readlink ( %p, %p, %d )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "readlink(path)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "readlink(buf)", arg2,arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            VG_TRACK( post_mem_write, arg2, res );
+         }
+         break;
+
+      case __NR_readv: { /* syscall 145 */
+         /* int readv(int fd, const struct iovec * vector, size_t count); */
+         UInt i;
+         struct iovec * vec;
+         MAYBE_PRINTF("readv ( %d, %p, %d )\n",arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_read, tst, "readv(vector)", 
+                           arg2, arg3 * sizeof(struct iovec) );
+         /* ToDo: don't do any of the following if the vector is invalid */
+         vec = (struct iovec *)arg2;
+         for (i = 0; i < arg3; i++)
+            SYSCALL_TRACK( pre_mem_write, tst, "readv(vector[...])",
+                              (UInt)vec[i].iov_base,vec[i].iov_len );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            /* res holds the number of bytes read. */
+            for (i = 0; i < arg3; i++) {
+               Int nReadThisBuf = vec[i].iov_len;
+               if (nReadThisBuf > res) nReadThisBuf = res;
+               VG_TRACK( post_mem_write, (UInt)vec[i].iov_base, nReadThisBuf );
+               res -= nReadThisBuf;
+               if (res < 0) VG_(panic)("readv: res < 0");
+            }
+         }
+         break;
+      }
+
+      case __NR_rename: /* syscall 38 */
+         /* int rename(const char *oldpath, const char *newpath); */
+         MAYBE_PRINTF("rename ( %p, %p )\n", arg1, arg2 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "rename(oldpath)", arg1 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "rename(newpath)", arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_rmdir: /* syscall 40 */
+         /* int rmdir(const char *pathname); */
+         MAYBE_PRINTF("rmdir ( %p )\n", arg1);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "rmdir(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_sched_setparam: /* syscall 154 */
+         /* int sched_setparam(pid_t pid, const struct sched_param *p); */
+         MAYBE_PRINTF("sched_setparam ( %d, %p )\n", arg1, arg2 );
+         SYSCALL_TRACK( pre_mem_read, tst, "sched_setparam(ptr)",
+                           arg2, sizeof(struct sched_param) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct sched_param) );
+         break;
+
+      case __NR_sched_getparam: /* syscall 155 */
+         /* int sched_getparam(pid_t pid, struct sched_param *p); */
+         MAYBE_PRINTF("sched_getparam ( %d, %p )\n", arg1, arg2 );
+         SYSCALL_TRACK( pre_mem_write, tst, "sched_getparam(ptr)",
+                           arg2, sizeof(struct sched_param) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct sched_param) );
+         break;
+
+      case __NR_sched_yield: /* syscall 158 */
+         /* int sched_yield(void); */
+         MAYBE_PRINTF("sched_yield ()\n" );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_select: /* syscall 82 */
+         /* struct sel_arg_struct {
+              unsigned long n;
+              fd_set *inp, *outp, *exp;
+              struct timeval *tvp;
+            };
+            int old_select(struct sel_arg_struct *arg);
+         */
+         SYSCALL_TRACK( pre_mem_read, tst, "select(args)", arg1, 5*sizeof(UInt) );
+         {
+            UInt* arg_struct = (UInt*)arg1;
+            arg1 = arg_struct[0];
+            arg2 = arg_struct[1];
+            arg3 = arg_struct[2];
+            arg4 = arg_struct[3];
+            arg5 = arg_struct[4];
+
+            MAYBE_PRINTF("select ( %d, %p, %p, %p, %p )\n", 
+                         arg1,arg2,arg3,arg4,arg5);
+            if (arg2 != (Addr)NULL)
+               SYSCALL_TRACK( pre_mem_read, tst, "select(readfds)", arg2, 
+                                          arg1/8 /* __FD_SETSIZE/8 */ );
+            if (arg3 != (Addr)NULL)
+               SYSCALL_TRACK( pre_mem_read, tst, "select(writefds)", arg3, 
+                                          arg1/8 /* __FD_SETSIZE/8 */ );
+            if (arg4 != (Addr)NULL)
+               SYSCALL_TRACK( pre_mem_read, tst, "select(exceptfds)", arg4, 
+                                          arg1/8 /* __FD_SETSIZE/8 */ );
+            if (arg5 != (Addr)NULL)
+               SYSCALL_TRACK( pre_mem_read, tst, "select(timeout)", arg5, 
+                                          sizeof(struct timeval) );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_setitimer: /* syscall 104 */
+         /* setitimer(int which, const struct itimerval *value,
+                                 struct itimerval *ovalue); */
+         MAYBE_PRINTF("setitimer ( %d, %p, %p )\n", arg1,arg2,arg3);
+         if (arg2 != (Addr)NULL)
+            SYSCALL_TRACK( pre_mem_read,tst, "setitimer(value)", 
+                             arg2, sizeof(struct itimerval) );
+         if (arg3 != (Addr)NULL)
+            SYSCALL_TRACK( pre_mem_write,tst, "setitimer(ovalue)", 
+                             arg3, sizeof(struct itimerval));
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg3 != (Addr)NULL) {
+            VG_TRACK( post_mem_write,arg3, sizeof(struct itimerval));
+         }
+         break;
+
+#     if defined(__NR_setfsgid32)
+      case __NR_setfsgid32: /* syscall 216 */
+         /* int setfsgid(uid_t fsgid); */
+         MAYBE_PRINTF("setfsgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setgid32)
+      case __NR_setgid32: /* syscall 214 */
+#     endif
+      case __NR_setgid: /* syscall 46 */
+         /* int setgid(gid_t gid); */
+         MAYBE_PRINTF("setgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_setsid: /* syscall 66 */
+         /* pid_t setsid(void); */
+         MAYBE_PRINTF("setsid ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_setgroups32)
+      case __NR_setgroups32: /* syscall 206 */
+#     endif
+      case __NR_setgroups: /* syscall 81 */
+         /* int setgroups(size_t size, const gid_t *list); */
+         MAYBE_PRINTF("setgroups ( %d, %p )\n", arg1, arg2);
+         if (arg1 > 0)
+            SYSCALL_TRACK( pre_mem_read, tst, "setgroups(list)", arg2, 
+                               arg1 * sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_setpgid: /* syscall 57 */
+         /* int setpgid(pid_t pid, pid_t pgid); */
+         MAYBE_PRINTF("setpgid ( %d, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_setregid32)
+      case __NR_setregid32: /* syscall 204 */
+         /* int setregid(gid_t rgid, gid_t egid); */
+         MAYBE_PRINTF("setregid32(?) ( %d, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setresuid32)
+      case __NR_setresuid32: /* syscall 208 */
+         /* int setresuid(uid_t ruid, uid_t euid, uid_t suid); */
+         MAYBE_PRINTF("setresuid32(?) ( %d, %d, %d )\n", arg1, arg2, arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setreuid32)
+      case __NR_setreuid32: /* syscall 203 */
+#     endif
+      case __NR_setreuid: /* syscall 70 */
+         /* int setreuid(uid_t ruid, uid_t euid); */
+         MAYBE_PRINTF("setreuid ( 0x%x, 0x%x )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_setrlimit: /* syscall 75 */
+         /* int setrlimit (int resource, const struct rlimit *rlim); */
+         MAYBE_PRINTF("setrlimit ( %d, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read, tst, "setrlimit(rlim)", arg2, sizeof(struct rlimit) );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_setuid32)
+      case __NR_setuid32: /* syscall 213 */
+#     endif
+      case __NR_setuid: /* syscall 23 */
+         /* int setuid(uid_t uid); */
+         MAYBE_PRINTF("setuid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_socketcall: /* syscall 102 */
+         /* int socketcall(int call, unsigned long *args); */
+         MAYBE_PRINTF("socketcall ( %d, %p )\n",arg1,arg2);
+         switch (arg1 /* request */) {
+
+            case SYS_SOCKETPAIR:
+               /* int socketpair(int d, int type, int protocol, int sv[2]); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.socketpair(args)", 
+                                 arg2, 4*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_write, tst, "socketcall.socketpair(sv)", 
+                                 ((UInt*)arg2)[3], 2*sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res))
+                  VG_TRACK( post_mem_write, ((UInt*)arg2)[3], 2*sizeof(int) );
+               break;
+
+            case SYS_SOCKET:
+               /* int socket(int domain, int type, int protocol); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.socket(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_BIND:
+               /* int bind(int sockfd, struct sockaddr *my_addr, 
+                           int addrlen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.bind(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               pre_mem_read_sockaddr( tst, "socketcall.bind(my_addr.%s)",
+                  (struct sockaddr *) (((UInt*)arg2)[1]), ((UInt*)arg2)[2]);
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+               
+            case SYS_LISTEN:
+               /* int listen(int s, int backlog); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.listen(args)", 
+                                 arg2, 2*sizeof(Addr) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_ACCEPT: {
+               /* int accept(int s, struct sockaddr *addr, int *addrlen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.accept(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               {
+               Addr addr_p     = ((UInt*)arg2)[1];
+               Addr addrlen_p  = ((UInt*)arg2)[2];
+               buf_and_len_pre_check ( tst, addr_p, addrlen_p,
+                                       "socketcall.accept(addr)",
+                                       "socketcall.accept(addrlen_in)" );
+               KERNEL_DO_SYSCALL(tid,res);
+               buf_and_len_post_check ( tst, res, addr_p, addrlen_p,
+                                        "socketcall.accept(addrlen_out)" );
+               }
+               break;
+            }
+
+            case SYS_SENDTO:
+               /* int sendto(int s, const void *msg, int len, 
+                             unsigned int flags, 
+                             const struct sockaddr *to, int tolen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.sendto(args)", arg2, 
+                                 6*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.sendto(msg)",
+                                 ((UInt*)arg2)[1], /* msg */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               pre_mem_read_sockaddr( tst, "socketcall.sendto(to.%s)",
+                  (struct sockaddr *) (((UInt*)arg2)[4]), ((UInt*)arg2)[5]);
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_SEND:
+               /* int send(int s, const void *msg, size_t len, int flags); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.send(args)", arg2,
+                                 4*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.send(msg)",
+                                 ((UInt*)arg2)[1], /* msg */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_RECVFROM:
+               /* int recvfrom(int s, void *buf, int len, unsigned int flags,
+                               struct sockaddr *from, int *fromlen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.recvfrom(args)", 
+                                 arg2, 6*sizeof(Addr) );
+               {
+               Addr buf_p      = ((UInt*)arg2)[1];
+               Int  len        = ((UInt*)arg2)[2];
+               Addr from_p     = ((UInt*)arg2)[4];
+               Addr fromlen_p  = ((UInt*)arg2)[5];
+
+               SYSCALL_TRACK( pre_mem_write, tst, "socketcall.recvfrom(buf)", 
+                                             buf_p, len );
+               buf_and_len_pre_check ( tst, from_p, fromlen_p, 
+                                       "socketcall.recvfrom(from)",
+                                       "socketcall.recvfrom(fromlen_in)" );
+               KERNEL_DO_SYSCALL(tid,res);
+               buf_and_len_post_check ( tst, res, from_p, fromlen_p,
+                                        "socketcall.recvfrom(fromlen_out)" );
+               if (!VG_(is_kerror)(res))
+                  VG_TRACK( post_mem_write, buf_p, len );
+               }
+               break;
+
+            case SYS_RECV:
+               /* int recv(int s, void *buf, int len, unsigned int flags); */
+               /* man 2 recv says:
+               The  recv call is normally used only on a connected socket
+               (see connect(2)) and is identical to recvfrom with a  NULL
+               from parameter.
+               */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.recv(args)", 
+                                 arg2, 4*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_write, tst, "socketcall.recv(buf)", 
+                                 ((UInt*)arg2)[1], /* buf */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res >= 0 
+                                   && ((UInt*)arg2)[1] != (UInt)NULL) {
+                  VG_TRACK( post_mem_write, ((UInt*)arg2)[1], /* buf */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               }
+               break;
+
+            case SYS_CONNECT:
+               /* int connect(int sockfd, 
+                              struct sockaddr *serv_addr, int addrlen ); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.connect(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.connect(serv_addr.sa_family)",
+                                 ((UInt*)arg2)[1], /* serv_addr */
+                                 sizeof (sa_family_t));
+               pre_mem_read_sockaddr( tst,
+                  "socketcall.connect(serv_addr.%s)",
+                  (struct sockaddr *) (((UInt*)arg2)[1]), ((UInt*)arg2)[2]);
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_SETSOCKOPT:
+               /* int setsockopt(int s, int level, int optname, 
+                                 const void *optval, int optlen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.setsockopt(args)", 
+                                 arg2, 5*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.setsockopt(optval)",
+                                 ((UInt*)arg2)[3], /* optval */
+                                 ((UInt*)arg2)[4]  /* optlen */ );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_GETSOCKOPT:
+               /* int setsockopt(int s, int level, int optname, 
+                                 void *optval, socklen_t *optlen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.getsockopt(args)", 
+                                 arg2, 5*sizeof(Addr) );
+               {
+               Addr optval_p  = ((UInt*)arg2)[3];
+               Addr optlen_p  = ((UInt*)arg2)[4];
+               /* vg_assert(sizeof(socklen_t) == sizeof(UInt)); */
+               buf_and_len_pre_check ( tst, optval_p, optlen_p,
+                                       "socketcall.getsockopt(optval)",
+                                       "socketcall.getsockopt(optlen)" );
+               KERNEL_DO_SYSCALL(tid,res);
+               buf_and_len_post_check ( tst, res, optval_p, optlen_p,
+                                        "socketcall.getsockopt(optlen_out)" );
+               }
+               break;
+
+            case SYS_GETSOCKNAME:
+               /* int getsockname(int s, struct sockaddr* name, int* namelen) */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.getsockname(args)",
+                                            arg2, 3*sizeof(Addr) );
+               {
+               Addr name_p     = ((UInt*)arg2)[1];
+               Addr namelen_p  = ((UInt*)arg2)[2];
+
+               buf_and_len_pre_check ( tst, name_p, namelen_p,
+                                       "socketcall.getsockname(name)",
+                                       "socketcall.getsockname(namelen_in)" );
+               KERNEL_DO_SYSCALL(tid,res);
+               buf_and_len_post_check ( tst, res, name_p, namelen_p,
+                                        "socketcall.getsockname(namelen_out)" );
+               }
+               break;
+
+            case SYS_GETPEERNAME:
+               /* int getpeername(int s, struct sockaddr* name, int* namelen) */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.getpeername(args)",
+                                            arg2, 3*sizeof(Addr) );
+               {
+               Addr name_p     = ((UInt*)arg2)[1];
+               Addr namelen_p  = ((UInt*)arg2)[2];
+               buf_and_len_pre_check ( tst, name_p, namelen_p,
+                                       "socketcall.getpeername(name)",
+                                       "socketcall.getpeername(namelen_in)" );
+               KERNEL_DO_SYSCALL(tid,res);
+               buf_and_len_post_check ( tst, res, name_p, namelen_p,
+                                        "socketcall.getpeername(namelen_out)" );
+               }
+               break;
+
+            case SYS_SHUTDOWN:
+               /* int shutdown(int s, int how); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.shutdown(args)", 
+                                            arg2, 2*sizeof(Addr) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_SENDMSG:
+               {
+                  /* int sendmsg(int s, const struct msghdr *msg, int flags); */
+
+                  /* this causes warnings, and I don't get why. glibc bug?
+                   * (after all it's glibc providing the arguments array)
+                  SYSCALL_TRACK( pre_mem_read, "socketcall.sendmsg(args)", 
+                                     arg2, 3*sizeof(Addr) );
+                  */
+
+                  struct msghdr *msg = (struct msghdr *)((UInt *)arg2)[ 1 ];
+                  msghdr_foreachfield ( tst, msg, pre_mem_read_sendmsg );
+
+                  KERNEL_DO_SYSCALL(tid,res);
+                  break;
+               }
+
+            case SYS_RECVMSG:
+               {
+                  /* int recvmsg(int s, struct msghdr *msg, int flags); */
+
+                  /* this causes warnings, and I don't get why. glibc bug?
+                   * (after all it's glibc providing the arguments array)
+                  SYSCALL_TRACK( pre_mem_read, "socketcall.recvmsg(args)", 
+                                     arg2, 3*sizeof(Addr) );
+                  */
+
+                  struct msghdr *msg = (struct msghdr *)((UInt *)arg2)[ 1 ];
+                  msghdr_foreachfield ( tst, msg, pre_mem_write_recvmsg );
+
+                  KERNEL_DO_SYSCALL(tid,res);
+
+                  if ( !VG_(is_kerror)( res ) )
+                     msghdr_foreachfield( tst, msg, post_mem_write_recvmsg );
+
+                  break;
+               }
+
+            default:
+               VG_(message)(Vg_DebugMsg,"FATAL: unhandled socketcall 0x%x",arg1);
+               VG_(panic)("... bye!\n");
+               break; /*NOTREACHED*/
+         }
+         break;
+
+      case __NR_stat: /* syscall 106 */
+         /* int stat(const char *file_name, struct stat *buf); */
+         MAYBE_PRINTF("stat ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "stat(file_name)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "stat(buf)", arg2, sizeof(struct stat) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat) );
+         break;
+
+      case __NR_statfs: /* syscall 99 */
+         /* int statfs(const char *path, struct statfs *buf); */
+         MAYBE_PRINTF("statfs ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "statfs(path)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "stat(buf)", arg2, sizeof(struct statfs) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct statfs) );
+         break;
+
+      case __NR_symlink: /* syscall 83 */
+         /* int symlink(const char *oldpath, const char *newpath); */
+         MAYBE_PRINTF("symlink ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "symlink(oldpath)", arg1 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "symlink(newpath)", arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break; 
+
+#     if defined(__NR_stat64)
+      case __NR_stat64: /* syscall 195 */
+         /* int stat64(const char *file_name, struct stat64 *buf); */
+         MAYBE_PRINTF("stat64 ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "stat64(file_name)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "stat64(buf)", arg2, sizeof(struct stat64) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat64) );
+         break;
+#     endif
+
+#     if defined(__NR_fstat64)
+      case __NR_fstat64: /* syscall 197 */
+         /* int fstat64(int filedes, struct stat64 *buf); */
+         MAYBE_PRINTF("fstat64 ( %d, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "fstat64(buf)", arg2, sizeof(struct stat64) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat64) );
+         break;
+#     endif
+
+      case __NR_sysinfo: /* syscall 116 */
+         /* int sysinfo(struct sysinfo *info); */
+         MAYBE_PRINTF("sysinfo ( %p )\n",arg1);
+         SYSCALL_TRACK( pre_mem_write, tst, "sysinfo(info)", arg1, sizeof(struct sysinfo) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg1, sizeof(struct sysinfo) );
+         break;
+
+      case __NR_time: /* syscall 13 */
+         /* time_t time(time_t *t); */
+         MAYBE_PRINTF("time ( %p )\n",arg1);
+         if (arg1 != (UInt)NULL) {
+            SYSCALL_TRACK( pre_mem_write, tst, "time", arg1, sizeof(time_t) );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
+            VG_TRACK( post_mem_write, arg1, sizeof(time_t) );
+         }
+         break;
+
+      case __NR_times: /* syscall 43 */
+         /* clock_t times(struct tms *buf); */
+         MAYBE_PRINTF("times ( %p )\n",arg1);
+         SYSCALL_TRACK( pre_mem_write, tst, "times(buf)", arg1, sizeof(struct tms) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
+            VG_TRACK( post_mem_write, arg1, sizeof(struct tms) );
+         }
+         break;
+
+      case __NR_truncate: /* syscall 92 */
+         /* int truncate(const char *path, size_t length); */
+         MAYBE_PRINTF("truncate ( %p, %d )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "truncate(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_umask: /* syscall 60 */
+         /* mode_t umask(mode_t mask); */
+         MAYBE_PRINTF("umask ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_unlink: /* syscall 10 */
+         /* int unlink(const char *pathname) */
+         MAYBE_PRINTF("ulink ( %p )\n",arg1);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "unlink(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_uname: /* syscall 122 */
+         /* int uname(struct utsname *buf); */
+         MAYBE_PRINTF("uname ( %p )\n",arg1);
+         SYSCALL_TRACK( pre_mem_write, tst, "uname(buf)", arg1, sizeof(struct utsname) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
+            VG_TRACK( post_mem_write, arg1, sizeof(struct utsname) );
+         }
+         break;
+
+      case __NR_utime: /* syscall 30 */
+         /* int utime(const char *filename, struct utimbuf *buf); */
+         MAYBE_PRINTF("utime ( %p, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "utime(filename)", arg1 );
+         if (arg2 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_read, tst, "utime(buf)", arg2, 
+                                                 sizeof(struct utimbuf) );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_wait4: /* syscall 114 */
+         /* pid_t wait4(pid_t pid, int *status, int options,
+                        struct rusage *rusage) */
+         MAYBE_PRINTF("wait4 ( %d, %p, %d, %p )\n",
+                      arg1,arg2,arg3,arg4);
+         if (arg2 != (Addr)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "wait4(status)", arg2, sizeof(int) );
+         if (arg4 != (Addr)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "wait4(rusage)", arg4, 
+                              sizeof(struct rusage) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            if (arg2 != (Addr)NULL)
+               VG_TRACK( post_mem_write, arg2, sizeof(int) );
+            if (arg4 != (Addr)NULL)
+               VG_TRACK( post_mem_write, arg4, sizeof(struct rusage) );
+         }
+         break;
+
+      case __NR_writev: { /* syscall 146 */
+         /* int writev(int fd, const struct iovec * vector, size_t count); */
+         UInt i;
+         struct iovec * vec;
+         MAYBE_PRINTF("writev ( %d, %p, %d )\n",arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_read, tst, "writev(vector)", 
+                           arg2, arg3 * sizeof(struct iovec) );
+         /* ToDo: don't do any of the following if the vector is invalid */
+         vec = (struct iovec *)arg2;
+         for (i = 0; i < arg3; i++)
+            SYSCALL_TRACK( pre_mem_read, tst, "writev(vector[...])",
+                              (UInt)vec[i].iov_base,vec[i].iov_len );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+      }
+
+      /*-------------------------- SIGNALS --------------------------*/
+
+      /* Normally set to 1, so that Valgrind's signal-simulation machinery
+         is engaged.  Sometimes useful to disable (set to 0), for
+         debugging purposes, to make clients more deterministic. */
+#     define SIGNAL_SIMULATION 1
+
+      case __NR_sigaltstack: /* syscall 186 */
+         /* int sigaltstack(const stack_t *ss, stack_t *oss); */
+         MAYBE_PRINTF("sigaltstack ( %p, %p )\n",arg1,arg2);
+         if (arg1 != (UInt)NULL) {
+            SYSCALL_TRACK( pre_mem_read, tst, "sigaltstack(ss)", 
+                              arg1, sizeof(vki_kstack_t) );
+         }
+         if (arg2 != (UInt)NULL) {
+            SYSCALL_TRACK( pre_mem_write, tst, "sigaltstack(ss)", 
+                              arg1, sizeof(vki_kstack_t) );
+         }
+#        if SIGNAL_SIMULATION
+         VG_(do__NR_sigaltstack) (tid);
+         res = tst->m_eax;
+#        else
+         KERNEL_DO_SYSCALL(tid,res);
+#        endif
+         if (!VG_(is_kerror)(res) && res == 0 && arg2 != (UInt)NULL)
+            VG_TRACK( post_mem_write, arg2, sizeof(vki_kstack_t));
+         break;
+
+      case __NR_rt_sigaction:
+      case __NR_sigaction:
+         /* int sigaction(int signum, struct k_sigaction *act, 
+                                      struct k_sigaction *oldact); */
+         MAYBE_PRINTF("sigaction ( %d, %p, %p )\n",arg1,arg2,arg3);
+         if (arg2 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_read, tst, "sigaction(act)", 
+                              arg2, sizeof(vki_ksigaction));
+         if (arg3 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "sigaction(oldact)", 
+                              arg3, sizeof(vki_ksigaction));
+         /* We do this one ourselves! */
+#        if SIGNAL_SIMULATION
+         VG_(do__NR_sigaction)(tid);
+         res = tst->m_eax;
+#        else
+         /* debugging signals; when we don't handle them. */
+         KERNEL_DO_SYSCALL(tid,res);
+#        endif
+         if (!VG_(is_kerror)(res) && res == 0 && arg3 != (UInt)NULL)
+            VG_TRACK( post_mem_write, arg3, sizeof(vki_ksigaction));
+         break;
+
+      case __NR_rt_sigprocmask:
+      case __NR_sigprocmask:
+         /* int sigprocmask(int how, k_sigset_t *set, 
+                                     k_sigset_t *oldset); */
+         MAYBE_PRINTF("sigprocmask ( %d, %p, %p )\n",arg1,arg2,arg3);
+         if (arg2 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_read, tst, "sigprocmask(set)", 
+                              arg2, sizeof(vki_ksigset_t));
+         if (arg3 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "sigprocmask(oldset)", 
+                              arg3, sizeof(vki_ksigset_t));
+#        if SIGNAL_SIMULATION
+         VG_(do__NR_sigprocmask) ( tid, 
+                                   arg1 /*how*/, 
+                                   (vki_ksigset_t*) arg2,
+                                   (vki_ksigset_t*) arg3 );
+         res = tst->m_eax;
+#        else
+         KERNEL_DO_SYSCALL(tid,res);
+#        endif
+         if (!VG_(is_kerror)(res) && res == 0 && arg3 != (UInt)NULL)
+            VG_TRACK( post_mem_write, arg3, sizeof(vki_ksigset_t));
+         break;
+      case __NR_sigpending: /* syscall 73 */
+#     if defined(__NR_rt_sigpending)
+      case __NR_rt_sigpending: /* syscall 176 */
+#     endif
+         /* int sigpending( sigset_t *set ) ; */
+         MAYBE_PRINTF( "sigpending ( %p )\n", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "sigpending(set)", 
+                           arg1, sizeof(vki_ksigset_t));
+#        if SIGNAL_SIMULATION
+         VG_(do_sigpending)( tid, (vki_ksigset_t*)arg1 );
+         res = 0;
+	 SET_EAX(tid, res);
+#        else
+         KERNEL_DO_SYSCALL(tid, res);
+#        endif
+         if ( !VG_( is_kerror )( res ) && res == 0 )
+            VG_TRACK( post_mem_write, arg1, sizeof( vki_ksigset_t ) ) ;
+         break ;
+
+      default:
+         VG_(message)
+            (Vg_DebugMsg,"FATAL: unhandled syscall: %d",syscallno);
+         VG_(message)
+            (Vg_DebugMsg,"Do not panic.  You may be able to fix this easily.");
+         VG_(message)
+            (Vg_DebugMsg,"Read the file README_MISSING_SYSCALL_OR_IOCTL.");
+         VG_(unimplemented)("no wrapper for the above system call");
+         vg_assert(3+3 == 7);
+         break; /*NOTREACHED*/
+   }
+
+   /* { void zzzmemscan(void); zzzmemscan(); } */
+
+   /* Do any post-syscall actions */
+   if (VG_(needs).syscall_wrapper) {
+      VGP_PUSHCC(VgpSkinSysWrap);
+      SK_(post_syscall)(tid, syscallno, pre_res, res, /*isBlocking*/False);
+      VGP_POPCC(VgpSkinSysWrap);
+   }
+
+   VGP_POPCC(VgpCoreSysWrap);
+}
+
+
+
+/* Perform pre-actions for a blocking syscall, but do not do the
+   syscall itself.
+
+   Because %eax is used both for the syscall number before the call
+   and the result value afterwards, we can't reliably use it to get
+   the syscall number.  So the caller has to pass it explicitly.  
+*/
+void* VG_(pre_known_blocking_syscall) ( ThreadId tid, Int syscallno )
+{
+   ThreadState* tst;
+   UInt         arg1, arg2, arg3;
+   void*        pre_res = 0;
+
+   VGP_PUSHCC(VgpCoreSysWrap);
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   tst              = & VG_(threads)[tid];
+   arg1             = tst->m_ebx;
+   arg2             = tst->m_ecx;
+   arg3             = tst->m_edx;
+   /*
+   arg4             = tst->m_esi;
+   arg5             = tst->m_edi;
+   */
+
+   if (VG_(needs).syscall_wrapper) {
+      VGP_PUSHCC(VgpSkinSysWrap);
+      pre_res = SK_(pre_syscall)(tid, syscallno, /*isBlocking*/True);
+      VGP_POPCC(VgpSkinSysWrap);
+   }
+
+   switch (syscallno) {
+
+      case __NR_read: /* syscall 3 */
+         /* size_t read(int fd, void *buf, size_t count); */
+         MAYBE_PRINTF(
+               "SYSCALL--PRE[%d,%d]       read ( %d, %p, %d )\n", 
+               VG_(getpid)(), tid,
+               arg1, arg2, arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "read(buf)", arg2, arg3 );
+         break;
+
+      case __NR_write: /* syscall 4 */
+         /* size_t write(int fd, const void *buf, size_t count); */
+         MAYBE_PRINTF(
+               "SYSCALL--PRE[%d,%d]       write ( %d, %p, %d )\n", 
+               VG_(getpid)(), tid,
+               arg1, arg2, arg3);
+         SYSCALL_TRACK( pre_mem_read, tst, "write(buf)", arg2, arg3 );
+         break;
+
+      default:
+         VG_(printf)("pre_known_blocking_syscall: unexpected %d\n", syscallno);
+         VG_(panic)("pre_known_blocking_syscall");
+         /*NOTREACHED*/
+         break;
+   }
+   VGP_POPCC(VgpCoreSysWrap);
+
+   return pre_res;      /* 0 if SK_(pre_syscall)() not called */
+}
+
+
+/* Perform post-actions for a blocking syscall, but do not do the
+   syscall itself.  
+
+   Because %eax is used both for the syscall number before the call
+   and the result value afterwards, we can't reliably use it to get
+   the syscall number.  So the caller has to pass it explicitly.  
+*/
+void VG_(post_known_blocking_syscall) ( ThreadId tid,
+                                        Int syscallno,
+                                        void* pre_res,
+                                        Int res )
+{
+   ThreadState* tst;
+   UInt         arg1, arg2, arg3;
+
+   VGP_PUSHCC(VgpCoreSysWrap);
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   tst              = & VG_(threads)[tid];
+   arg1             = tst->m_ebx;
+   arg2             = tst->m_ecx;
+   arg3             = tst->m_edx;
+   /*
+   arg4             = tst->m_esi;
+   arg5             = tst->m_edi;
+   */
+
+   switch (syscallno) {
+
+      case __NR_read: /* syscall 3 */
+         /* size_t read(int fd, void *buf, size_t count); */
+         MAYBE_PRINTF(
+               "SYSCALL-POST[%d,%d]       read ( %d, %p, %d ) --> %d\n", 
+               VG_(getpid)(), tid,
+               arg1, arg2, arg3, res);
+         if (!VG_(is_kerror)(res) && res > 0)
+            VG_TRACK( post_mem_write, arg2, res );
+         break;
+
+      case __NR_write: /* syscall 4 */
+         /* size_t write(int fd, const void *buf, size_t count); */
+         MAYBE_PRINTF(
+               "SYSCALL-POST[%d,%d]       write ( %d, %p, %d ) --> %d\n", 
+               VG_(getpid)(), tid,
+               arg1, arg2, arg3, res);
+         break;
+
+      default:
+         VG_(printf)("post_known_blocking_syscall: unexpected %d\n", 
+                     syscallno);
+         VG_(panic)("post_known_blocking_syscall");
+         /*NOTREACHED*/
+         break;
+   }
+
+   if (VG_(needs).syscall_wrapper) {
+      VGP_PUSHCC(VgpSkinSysWrap);
+      SK_(post_syscall)(tid, syscallno, pre_res, res, /*isBlocking*/True);
+      VGP_POPCC(VgpSkinSysWrap);
+   }
+
+   VGP_POPCC(VgpCoreSysWrap);
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                         vg_syscall_mem.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_to_ucode.c b/coregrind/vg_to_ucode.c
index 179c059..0447d8f 100644
--- a/coregrind/vg_to_ucode.c
+++ b/coregrind/vg_to_ucode.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -40,12 +40,12 @@
 #define uInstr1   VG_(newUInstr1)
 #define uInstr2   VG_(newUInstr2)
 #define uInstr3   VG_(newUInstr3)
-#define dis       VG_(disassemble)
 #define nameIReg  VG_(nameOfIntReg)
 #define nameISize VG_(nameOfIntSize)
 #define newTemp   VG_(getNewTemp)
 #define uLiteral  VG_(setLiteralField)
 
+#define dis       VG_(print_codegen)
 
 /*------------------------------------------------------------*/
 /*--- Here so it can be inlined everywhere.                ---*/
@@ -66,21 +66,6 @@
    return SHADOW(t);
 }
 
-/* Handy predicates. */
-#define SMC_IF_SOME(cb)                              \
-   do {                                              \
-      if (VG_(clo_smc_check) >= VG_CLO_SMC_SOME) {   \
-           LAST_UINSTR((cb)).smc_check = True;       \
-      }                                              \
-   } while (0)
-
-#define SMC_IF_ALL(cb)                               \
-   do {                                              \
-      if (VG_(clo_smc_check) == VG_CLO_SMC_ALL) {    \
-         LAST_UINSTR((cb)).smc_check = True;         \
-      }                                              \
-   } while (0)
-
 
 /*------------------------------------------------------------*/
 /*--- Helper bits and pieces for deconstructing the        ---*/
@@ -818,7 +803,6 @@
       }
       if (keep) {
          uInstr2(cb, STORE, size, TempReg, tmpv, TempReg, tmpa);
-         SMC_IF_ALL(cb);
       }
       if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), 
                            nameIReg(size,gregOfRM(rm)), dis_buf);
@@ -916,7 +900,6 @@
       Int  tmpv = newTemp(cb);
       uInstr2(cb, GET,   size, ArchReg, gregOfRM(rm), TempReg, tmpv);
       uInstr2(cb, STORE, size, TempReg, tmpv, TempReg, tmpa);
-      SMC_IF_SOME(cb);
       if (dis) VG_(printf)("mov%c %s,%s\n", nameISize(size), 
                            nameIReg(size,gregOfRM(rm)), dis_buf);
       return HI8(pair)+eip0;
@@ -1113,7 +1096,6 @@
       }
       if (gregOfRM(modrm) < 7) {
          uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1);
-         SMC_IF_ALL(cb);
       }
       if (dis)
          VG_(printf)("%s%c $0x%x, %s\n",
@@ -1201,7 +1183,6 @@
       }
       setFlagsFromUOpcode(cb, uopc);
       uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1);
-      SMC_IF_ALL(cb);
       if (dis) {
          if (orig_src_tag == Literal)
             VG_(printf)("%s%c $0x%x, %s\n",
@@ -1321,7 +1302,6 @@
       /* Dump the result back, if non-BT. */
       if (gregOfRM(modrm) != 4 /* BT */) {
          uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1);
-         SMC_IF_ALL(cb);
       }
       if (dis)
             VG_(printf)("%s%c $0x%x, %s\n",
@@ -1512,7 +1492,6 @@
             uInstr1(cb, NOT, sz, TempReg, t1);
             setFlagsFromUOpcode(cb, NOT);
             uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             if (dis)
                VG_(printf)("not%c %s\n", nameISize(sz), dis_buf);
             break;
@@ -1520,7 +1499,6 @@
             uInstr1(cb, NEG, sz, TempReg, t1);
             setFlagsFromUOpcode(cb, NEG);
             uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             if (dis)
                VG_(printf)("neg%c %s\n", nameISize(sz), dis_buf);
             break;
@@ -1595,13 +1573,11 @@
             uInstr1(cb, INC, 1, TempReg, t1);
             setFlagsFromUOpcode(cb, INC);
             uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             break;
          case 1: /* DEC */
             uInstr1(cb, DEC, 1, TempReg, t1);
             setFlagsFromUOpcode(cb, DEC);
             uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             break;
          default: 
             VG_(printf)(
@@ -1650,7 +1626,6 @@
             uInstr2(cb, MOV,   4, Literal, 0,     TempReg, t4);
 	    uLiteral(cb, eip+1);
             uInstr2(cb, STORE, 4, TempReg, t4,    TempReg, t3);
-            SMC_IF_ALL(cb);
             uInstr1(cb, JMP,   0, TempReg, t1);
             uCond(cb, CondAlways);
             LAST_UINSTR(cb).jmpkind = JmpCall;
@@ -1680,13 +1655,11 @@
             uInstr1(cb, INC, sz, TempReg, t1);
             setFlagsFromUOpcode(cb, INC);
             uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             break;
          case 1: /* DEC */
             uInstr1(cb, DEC, sz, TempReg, t1);
             setFlagsFromUOpcode(cb, DEC);
             uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             break;
          case 2: /* call Ev */
             t3 = newTemp(cb); t4 = newTemp(cb);
@@ -1697,7 +1670,6 @@
             uInstr2(cb, MOV,   4, Literal, 0,     TempReg, t4);
 	         uLiteral(cb, eip+HI8(pair));
             uInstr2(cb, STORE, 4, TempReg, t4,    TempReg, t3);
-            SMC_IF_ALL(cb);
             uInstr1(cb, JMP,   0, TempReg, t1);
             uCond(cb, CondAlways);
             LAST_UINSTR(cb).jmpkind = JmpCall;
@@ -1715,7 +1687,6 @@
 	    uLiteral(cb, sz);
             uInstr2(cb, PUT,    4, TempReg, t3,    ArchReg, R_ESP);
             uInstr2(cb, STORE, sz, TempReg, t1,    TempReg, t3);
-            SMC_IF_ALL(cb);
             break;
          default: 
             VG_(printf)(
@@ -1864,7 +1835,6 @@
 
    uInstr2(cb, LOAD,  sz, TempReg, ts,    TempReg, tv);
    uInstr2(cb, STORE, sz, TempReg, tv,    TempReg, td);
-   SMC_IF_SOME(cb);
 
    uInstr0(cb, CALLM_S, 0);
    uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tv);
@@ -1912,7 +1882,6 @@
    uInstr2(cb, GET,   sz, ArchReg, R_EAX, TempReg, ta);
    uInstr2(cb, GET,    4, ArchReg, R_EDI, TempReg, td);
    uInstr2(cb, STORE, sz, TempReg, ta,    TempReg, td);
-   SMC_IF_SOME(cb);
 
    uInstr0(cb, CALLM_S, 0);
    uInstr2(cb, MOV,   4, Literal, 0,     TempReg, ta);
@@ -1996,7 +1965,6 @@
 
    uInstr2(cb, LOAD,  sz, TempReg, ts,    TempReg, tv);
    uInstr2(cb, STORE, sz, TempReg, tv,    TempReg, td);
-   SMC_IF_SOME(cb);
 
    uInstr0(cb, CALLM_S, 0);
    uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tv);
@@ -2032,7 +2000,6 @@
    uInstr2(cb, GET,   sz, ArchReg, R_EAX, TempReg, ta);
    uInstr2(cb, GET,    4, ArchReg, R_EDI, TempReg, td);
    uInstr2(cb, STORE, sz, TempReg, ta,    TempReg, td);
-   SMC_IF_SOME(cb);
 
    uInstr0(cb, CALLM_S, 0);
    uInstr2(cb, MOV,   4, Literal, 0,     TempReg, ta);
@@ -2269,7 +2236,6 @@
                Lit16, 
                (((UShort)first_byte) << 8) | ((UShort)second_byte),
                TempReg, ta);
-   if (is_write) SMC_IF_ALL(cb);
    if (dis) {
       if (is_write)
          VG_(printf)("fpu_w_%d 0x%x:0x%x, %s\n",
@@ -2485,7 +2451,13 @@
                return dis_fpu_mem(cb, 8, rd, eip, first_byte); 
             case 2: /* FST double-real */
             case 3: /* FSTP double-real */
-               return dis_fpu_mem(cb, 8, wr, eip, first_byte); 
+               return dis_fpu_mem(cb, 8, wr, eip, first_byte);
+            case 4: /* FRSTOR */
+               return dis_fpu_mem(cb, 108, rd, eip, first_byte);
+            case 6: /* FSAVE */
+               return dis_fpu_mem(cb, 108, wr, eip, first_byte);
+            case 7: /* FSTSW */
+               return dis_fpu_mem(cb, 2, wr, eip, first_byte);
             default: 
                goto unhandled;
          }
@@ -2585,7 +2557,6 @@
       uFlagsRWU(cb, FlagsEmpty, FlagsOSZACP, FlagsEmpty);
       uInstr1(cb, POP,   sz, TempReg, t);
       uInstr2(cb, STORE, sz, TempReg, t,      TempReg, ta);
-      SMC_IF_ALL(cb);
       if (dis)
          VG_(printf)("shld%c %%cl, %s, %s\n",
                      nameISize(sz), nameIReg(sz, gregOfRM(modrm)), 
@@ -3010,7 +2981,6 @@
       uInstr2(cb,  ADD, sz, TempReg, tmpd, TempReg, tmpt);
       setFlagsFromUOpcode(cb, ADD);
       uInstr2(cb, STORE, sz, TempReg, tmpt, TempReg, tmpa);
-      SMC_IF_SOME(cb);
       uInstr2(cb, PUT, sz, TempReg, tmpd, ArchReg, gregOfRM(rm));
       if (dis)
          VG_(printf)("xadd%c %s, %s\n", nameISize(sz), 
@@ -3167,7 +3137,6 @@
          uInstr2(cb, MOV,   4, Literal, 0,     TempReg, t2);
 	 uLiteral(cb, eip);
          uInstr2(cb, STORE, 4, TempReg, t2,    TempReg, t1);
-         SMC_IF_ALL(cb);
          uInstr1(cb, JMP,   0, Literal, 0);
 	 uLiteral(cb, d32);
          uCond(cb, CondAlways);
@@ -3472,7 +3441,6 @@
       uInstr2(cb, MOV,    4, Literal, 0,     TempReg, t2);
       uLiteral(cb, d32);
       uInstr2(cb, STORE, sz, TempReg, t1,    TempReg, t2);
-      SMC_IF_SOME(cb);
       if (dis) VG_(printf)("mov%c %s,0x%x\n", nameISize(sz), 
                            nameIReg(sz,R_EAX), d32);
       break;
@@ -3535,7 +3503,6 @@
          uInstr2(cb, MOV, sz, Literal, 0, TempReg, t1);
 	 uLiteral(cb, d32);
          uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
-         SMC_IF_SOME(cb);
          if (dis) VG_(printf)("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
       }
       break;
@@ -3556,6 +3523,10 @@
       eip = dis_op_imm_A(cb, sz, OR, True, eip, "or" );
       break;
 
+   case 0x15: /* ADC Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, ADC, True, eip, "adc" );
+      break;
+
    case 0x1C: /* SBB Ib, AL */
       eip = dis_op_imm_A(cb, 1, SBB, True, eip, "sbb" );
       break;
@@ -3718,40 +3689,7 @@
    case 0x5D: /* POP eBP */
    case 0x5E: /* POP eSI */
    case 0x5F: /* POP eDI */
-    { Int   n_pops;
-      Addr  eipS, eipE;
-      UChar ch;
-      if (sz != 4)           goto normal_pop_case;
-      if (VG_(clo_cachesim)) goto normal_pop_case;
-      /* eip points at first pop insn + 1.  Make eipS and eipE
-         bracket the sequence. */
-      eipE = eipS = eip - 1;
-      while (True) { 
-         ch = getUChar(eipE+1);
-         if (ch < 0x58 || ch > 0x5F || ch == 0x5C) break;
-         eipE++;
-      }
-      n_pops = eipE - eipS + 1;
-      if (0 && n_pops > 1) VG_(printf)("%d pops\n", n_pops);
-      t1 = newTemp(cb); t3 = newTemp(cb);
-      uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t1);
-      for (; eipS <= eipE; eipS++) {
-         ch = getUChar(eipS);
-	 uInstr2(cb, LOAD, 4, TempReg, t1, TempReg, t3);
-         uInstr2(cb, PUT,  4, TempReg, t3, ArchReg, ch-0x58);
-         uInstr2(cb, ADD,  4, Literal, 0,        TempReg, t1);
-         uLiteral(cb, 4);
-         SMC_IF_ALL(cb);
-         if (dis) 
-            VG_(printf)("popl %s\n", nameIReg(4,ch-0x58));
-      }
-      uInstr2(cb, PUT,    4, TempReg, t1,       ArchReg, R_ESP);
-      eip = eipE + 1;
-      break;
-    }
-
    case 0x5C: /* POP eSP */
-   normal_pop_case:
       t1 = newTemp(cb); t2 = newTemp(cb);
       uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t2);
       uInstr2(cb, LOAD,  sz, TempReg, t2,       TempReg, t1);
@@ -3863,43 +3801,7 @@
    case 0x55: /* PUSH eBP */
    case 0x56: /* PUSH eSI */
    case 0x57: /* PUSH eDI */
-    { Int   n_pushes;
-      Addr  eipS, eipE;
-      UChar ch;
-      if (sz != 4)           goto normal_push_case;
-      if (VG_(clo_cachesim)) goto normal_push_case;
-      /* eip points at first push insn + 1.  Make eipS and eipE
-         bracket the sequence. */
-      eipE = eipS = eip - 1;
-      while (True) { 
-         ch = getUChar(eipE+1);
-         if (ch < 0x50 || ch > 0x57 || ch == 0x54) break;
-         eipE++;
-      }
-      n_pushes = eipE - eipS + 1;
-      if (0 && n_pushes > 1) VG_(printf)("%d pushes\n", n_pushes);
-      t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb);
-      uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t1);
-      uInstr2(cb, MOV,    4, TempReg, t1,       TempReg, t2);
-      uInstr2(cb, SUB,    4, Literal, 0,        TempReg, t2);
-      uLiteral(cb, 4 * n_pushes);
-      uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
-      for (; eipS <= eipE; eipS++) {
-         ch = getUChar(eipS);
-         uInstr2(cb, SUB,    4, Literal, 0,        TempReg, t1);
-         uLiteral(cb, 4);
-         uInstr2(cb, GET, 4, ArchReg, ch-0x50, TempReg, t3);
-	 uInstr2(cb, STORE, 4, TempReg, t3, TempReg, t1);
-         SMC_IF_ALL(cb);
-         if (dis) 
-            VG_(printf)("pushl %s\n", nameIReg(4,ch-0x50));
-      }
-      eip = eipE + 1;
-      break;
-    }
-
    case 0x54: /* PUSH eSP */
-   normal_push_case:
       /* This is the Right Way, in that the value to be pushed is
          established before %esp is changed, so that pushl %esp
          correctly pushes the old value. */
@@ -3911,7 +3813,6 @@
       uLiteral(cb, sz);
       uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
       uInstr2(cb, STORE, sz, TempReg, t1,       TempReg, t2);
-      SMC_IF_ALL(cb);
       if (dis) 
          VG_(printf)("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50));
       break;
@@ -3931,7 +3832,6 @@
       uInstr2(cb, MOV,   sz, Literal, 0,     TempReg, t2);
       uLiteral(cb, d32);
       uInstr2(cb, STORE, sz, TempReg, t2,    TempReg, t1);
-      SMC_IF_ALL(cb);
       if (dis) 
          VG_(printf)("push%c $0x%x\n", nameISize(sz), d32);
       break;
@@ -3948,7 +3848,6 @@
       uLiteral(cb, sz);
       uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
       uInstr2(cb, STORE, sz, TempReg, t1,       TempReg, t2);
-      SMC_IF_ALL(cb);
       if (dis) 
          VG_(printf)("pushf%c\n", nameISize(sz));
       break;
@@ -3980,20 +3879,17 @@
          uInstr2(cb, SUB,    4, Literal,   0, TempReg, t2);
          uLiteral(cb, sz);
          uInstr2(cb, STORE, sz, TempReg,  t1, TempReg, t2);
-         SMC_IF_ALL(cb);
       }
       /* Push old value of %esp */
       uInstr2(cb, SUB,    4, Literal,   0, TempReg, t2);
       uLiteral(cb, sz);
       uInstr2(cb, STORE, sz, TempReg,  t3, TempReg, t2);
-      SMC_IF_ALL(cb);
       /* Do %ebp, %esi, %edi */
       for (reg = 5; reg <= 7; reg++) {
          uInstr2(cb, GET,   sz, ArchReg, reg, TempReg, t1);
          uInstr2(cb, SUB,    4, Literal,   0, TempReg, t2);
          uLiteral(cb, sz);
          uInstr2(cb, STORE, sz, TempReg,  t1, TempReg, t2);
-         SMC_IF_ALL(cb);
       }
       if (dis)
          VG_(printf)("pusha%c\n", nameISize(sz));
@@ -4149,7 +4045,6 @@
          uInstr2(cb, LOAD, sz, TempReg, t3, TempReg, t1);
          uInstr2(cb, GET, sz, ArchReg, gregOfRM(modrm), TempReg, t2);
          uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t3);
-         SMC_IF_SOME(cb);
          uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, gregOfRM(modrm));
          eip += HI8(pair);
          if (dis)
@@ -4231,6 +4126,14 @@
       eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, Literal, d32 );
       break;
 
+   case 0xD2: /* Grp2 CL,Eb */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = 0;
+      sz    = 1;
+      eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, ArchReg, R_ECX );
+      break;
+
    case 0xD3: /* Grp2 CL,Ev */
       modrm = getUChar(eip);
       am_sz = lengthAMode(eip);
@@ -4499,7 +4402,6 @@
             uCond(cb, (Condcode)(opc-0x90));
             uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
             uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             if (dis) VG_(printf)("set%s %s\n", 
                                  VG_(nameCondcode)(opc-0x90), 
                                  dis_buf);
@@ -4568,10 +4470,11 @@
    if (dis)
       VG_(printf)("\n");
    for (; first_uinstr < cb->used; first_uinstr++) {
-      Bool sane = VG_(saneUInstr)(True, &cb->instrs[first_uinstr]);
-      if (dis || !sane) 
-         VG_(ppUInstr)(sane ? first_uinstr : -1,
-                       &cb->instrs[first_uinstr]);
+      Bool sane = VG_(saneUInstr)(True, True, &cb->instrs[first_uinstr]);
+      if (dis) 
+         VG_(ppUInstr)(first_uinstr, &cb->instrs[first_uinstr]);
+      else if (!sane)
+         VG_(upUInstr)(-1, &cb->instrs[first_uinstr]);
       vg_assert(sane);
    }
 
@@ -4588,28 +4491,17 @@
    Addr eip   = eip0;
    Bool isEnd = False;
    Bool block_sane;
-   Int INCEIP_allowed_lag = 4;
    Int delta = 0;
 
-   if (dis) VG_(printf)("\n");
+   if (dis) VG_(printf)("Original x86 code to UCode:\n\n");
 
-   /* When cache simulating, to ensure cache misses are attributed to the
-    * correct line we ensure EIP is always correct.   This is done by:
+   /* After every x86 instruction do an INCEIP, except for the final one
+    * in the basic block.  For them we patch in the x86 instruction size 
+    * into the `extra4b' field of the basic-block-ending JMP. 
     *
-    * a) Using eager INCEIP updating to cope with all instructions except those
-    *    at the end of a basic block.
-    *
-    * b) Patching in the size of the original x86 instr in the `extra4b' field
-    *    of JMPs at the end of a basic block.  Two cases:
-    *       - Jcond followed by Juncond:  patch the Jcond
-    *       - Juncond alone:              patch the Juncond
-    *
-    * See vg_cachesim_instrument() for how this is used. 
+    * The INCEIPs and JMP.extra4b fields allows a skin to track x86
+    * instruction sizes, important for some skins (eg. cache simulation).
     */
-   if (VG_(clo_cachesim)) {
-       INCEIP_allowed_lag = 0;
-   }
-
    if (VG_(clo_single_step)) {
       eip = disInstr ( cb, eip, &isEnd );
 
@@ -4620,15 +4512,17 @@
          uInstr1(cb, JMP, 0, Literal, 0);
          uLiteral(cb, eip);
          uCond(cb, CondAlways);
+         /* Print added JMP */
          if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]);
       }
+      if (dis) VG_(printf)("\n");
       delta = eip - eip0;
 
    } else {
       Addr eip2;
       while (!isEnd) {
          eip2 = disInstr ( cb, eip, &isEnd );
-         delta += (eip2 - eip);
+         delta = (eip2 - eip);
          eip = eip2;
          /* Split up giant basic blocks into pieces, so the
             translations fall within 64k. */
@@ -4639,27 +4533,23 @@
             uInstr1(cb, JMP, 0, Literal, 0);
             uLiteral(cb, eip);
             uCond(cb, CondAlways);
+            /* Print added JMP */
             if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]);
             isEnd = True;
 
-         } else if (delta > INCEIP_allowed_lag && !isEnd) {
+         } else if (!isEnd) {
             uInstr1(cb, INCEIP, 0, Lit16, delta);
+            /* Print added INCEIP */
             if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]);
-            delta = 0;
          }
          if (dis) VG_(printf)("\n");
       }
    }
-   if (VG_(clo_cachesim)) {
-      /* Patch instruction size into earliest JMP. */
-      if (cb->used >= 2 && JMP == cb->instrs[cb->used - 2].opcode) {
-         cb->instrs[cb->used - 2].extra4b = delta;
-      } else {
-         LAST_UINSTR(cb).extra4b = delta;
-      }
-   }
 
-   block_sane = VG_(saneUCodeBlock)(cb);
+   /* Patch instruction size into final JMP. */
+   LAST_UINSTR(cb).extra4b = delta;
+
+   block_sane = VG_(saneUCodeBlockCalls)(cb);
    if (!block_sane) {
       VG_(ppUCodeBlock)(cb, "block failing sanity check");
       vg_assert(block_sane);
@@ -4668,6 +4558,7 @@
    return eip - eip0;
 }
 
+#undef dis
 
 /*--------------------------------------------------------------------*/
 /*--- end                                            vg_to_ucode.c ---*/
diff --git a/coregrind/vg_translate.c b/coregrind/vg_translate.c
index 68d9faf..cd52c65 100644
--- a/coregrind/vg_translate.c
+++ b/coregrind/vg_translate.c
@@ -26,79 +26,20 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
 
-
 /*------------------------------------------------------------*/
 /*--- Renamings of frequently-used global functions.       ---*/
 /*------------------------------------------------------------*/
 
-#define uInstr1   VG_(newUInstr1)
 #define uInstr2   VG_(newUInstr2)
-#define uInstr3   VG_(newUInstr3)
-#define dis       VG_(disassemble)
 #define nameIReg  VG_(nameOfIntReg)
 #define nameISize VG_(nameOfIntSize)
-#define uLiteral  VG_(setLiteralField)
-#define newTemp   VG_(getNewTemp)
-#define newShadow VG_(getNewShadow)
 
-
-/*------------------------------------------------------------*/
-/*--- Memory management for the translater.                ---*/
-/*------------------------------------------------------------*/
-
-#define N_JITBLOCKS    4
-#define N_JITBLOCK_SZ  5000
-
-static UChar jitstorage[N_JITBLOCKS][N_JITBLOCK_SZ];
-static Bool  jitstorage_inuse[N_JITBLOCKS];
-static Bool  jitstorage_initdone = False;
-
-static __inline__ void jitstorage_initialise ( void )
-{
-   Int i;
-   if (jitstorage_initdone) return;
-   jitstorage_initdone = True;
-   for (i = 0; i < N_JITBLOCKS; i++)
-      jitstorage_inuse[i] = False; 
-}
-
-void* VG_(jitmalloc) ( Int nbytes )
-{
-   Int i;
-   jitstorage_initialise();
-   if (nbytes > N_JITBLOCK_SZ) {
-      /* VG_(printf)("too large: %d\n", nbytes); */
-      return VG_(malloc)(VG_AR_PRIVATE, nbytes);
-   }
-   for (i = 0; i < N_JITBLOCKS; i++) {
-      if (!jitstorage_inuse[i]) {
-         jitstorage_inuse[i] = True;
-         /* VG_(printf)("alloc %d -> %d\n", nbytes, i ); */
-         return & jitstorage[i][0];
-      }
-   }
-   VG_(panic)("out of slots in vg_jitmalloc\n");
-   return VG_(malloc)(VG_AR_PRIVATE, nbytes);
-}
-
-void VG_(jitfree) ( void* ptr )
-{
-   Int i;
-   jitstorage_initialise();
-   for (i = 0; i < N_JITBLOCKS; i++) {
-      if (ptr == & jitstorage[i][0]) {
-         vg_assert(jitstorage_inuse[i]);
-         jitstorage_inuse[i] = False;
-         return;
-      }
-   }
-   VG_(free)(VG_AR_PRIVATE, ptr);
-}
+#define dis       VG_(print_codegen)
 
 /*------------------------------------------------------------*/
 /*--- Basics                                               ---*/
@@ -106,7 +47,7 @@
 
 UCodeBlock* VG_(allocCodeBlock) ( void )
 {
-   UCodeBlock* cb = VG_(malloc)(VG_AR_PRIVATE, sizeof(UCodeBlock));
+   UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
    cb->used = cb->size = cb->nextTemp = 0;
    cb->instrs = NULL;
    return cb;
@@ -115,8 +56,8 @@
 
 void VG_(freeCodeBlock) ( UCodeBlock* cb )
 {
-   if (cb->instrs) VG_(free)(VG_AR_PRIVATE, cb->instrs);
-   VG_(free)(VG_AR_PRIVATE, cb);
+   if (cb->instrs) VG_(arena_free)(VG_AR_CORE, cb->instrs);
+   VG_(arena_free)(VG_AR_CORE, cb);
 }
 
 
@@ -129,15 +70,15 @@
          vg_assert(cb->size == 0);
          vg_assert(cb->used == 0);
          cb->size = 8;
-         cb->instrs = VG_(malloc)(VG_AR_PRIVATE, 8 * sizeof(UInstr));
+         cb->instrs = VG_(arena_malloc)(VG_AR_CORE, 8 * sizeof(UInstr));
       } else {
          Int i;
-         UInstr* instrs2 = VG_(malloc)(VG_AR_PRIVATE, 
+         UInstr* instrs2 = VG_(arena_malloc)(VG_AR_CORE, 
                                        2 * sizeof(UInstr) * cb->size);
          for (i = 0; i < cb->used; i++)
             instrs2[i] = cb->instrs[i];
          cb->size *= 2;
-         VG_(free)(VG_AR_PRIVATE, cb->instrs);
+         VG_(arena_free)(VG_AR_CORE, cb->instrs);
          cb->instrs = instrs2;
       }
    }
@@ -147,18 +88,20 @@
 
 
 __inline__ 
-void VG_(emptyUInstr) ( UInstr* u )
+void VG_(newNOP) ( UInstr* u )
 {
    u->val1 = u->val2 = u->val3 = 0;
    u->tag1 = u->tag2 = u->tag3 = NoValue;
    u->flags_r = u->flags_w = FlagsEmpty;
    u->jmpkind = JmpBoring;
-   u->smc_check = u->signed_widen = False;
+   u->signed_widen = u->has_ret_val = False;
+   u->regs_live_after = ALL_RREGS_LIVE;
    u->lit32    = 0;
-   u->opcode   = 0;
+   u->opcode   = NOP;
    u->size     = 0;
    u->cond     = 0;
    u->extra4b  = 0;
+   u->argc = u->regparms_n = 0;
 }
 
 
@@ -174,7 +117,7 @@
    ensureUInstr(cb);
    ui = & cb->instrs[cb->used];
    cb->used++;
-   VG_(emptyUInstr)(ui);
+   VG_(newNOP)(ui);
    ui->val1   = val1;
    ui->val2   = val2;
    ui->val3   = val3;
@@ -198,7 +141,7 @@
    ensureUInstr(cb);
    ui = & cb->instrs[cb->used];
    cb->used++;
-   VG_(emptyUInstr)(ui);
+   VG_(newNOP)(ui);
    ui->val1   = val1;
    ui->val2   = val2;
    ui->opcode = opcode;
@@ -218,7 +161,7 @@
    ensureUInstr(cb);
    ui = & cb->instrs[cb->used];
    cb->used++;
-   VG_(emptyUInstr)(ui);
+   VG_(newNOP)(ui);
    ui->val1   = val1;
    ui->opcode = opcode;
    ui->tag1   = tag1;
@@ -234,7 +177,7 @@
    ensureUInstr(cb);
    ui = & cb->instrs[cb->used];
    cb->used++;
-   VG_(emptyUInstr)(ui);
+   VG_(newNOP)(ui);
    ui->opcode = opcode;
    ui->size   = sz;
 }
@@ -252,13 +195,16 @@
 static __inline__ 
 void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
 {
-   dst->cond          = src->cond;
-   dst->extra4b       = src->extra4b;
-   dst->smc_check     = src->smc_check;
-   dst->signed_widen  = src->signed_widen;
-   dst->jmpkind       = src->jmpkind;
-   dst->flags_r       = src->flags_r;
-   dst->flags_w       = src->flags_w;
+   dst->cond            = src->cond;
+   dst->extra4b         = src->extra4b;
+   dst->signed_widen    = src->signed_widen;
+   dst->jmpkind         = src->jmpkind;
+   dst->flags_r         = src->flags_r;
+   dst->flags_w         = src->flags_w;
+   dst->argc            = src->argc;
+   dst->regparms_n      = src->regparms_n;
+   dst->has_ret_val     = src->has_ret_val;
+   dst->regs_live_after = src->regs_live_after;
 }
 
 
@@ -280,44 +226,85 @@
 }
 
 
+/* Set the C call info fields of the most recent uinsn. */
+void  VG_(setCCallFields) ( UCodeBlock* cb, Addr fn, UChar argc, UChar
+                            regparms_n, Bool has_ret_val )
+{
+   vg_assert(argc       <  4);
+   vg_assert(regparms_n <= argc);
+   LAST_UINSTR(cb).lit32       = fn;
+   LAST_UINSTR(cb).argc        = argc;
+   LAST_UINSTR(cb).regparms_n  = regparms_n;
+   LAST_UINSTR(cb).has_ret_val = has_ret_val;
+}
+
 Bool VG_(anyFlagUse) ( UInstr* u )
 {
    return (u->flags_r != FlagsEmpty 
            || u->flags_w != FlagsEmpty);
 }
 
-
-
+#if 1
+#  define BEST_ALLOC_ORDER
+#endif
 
 /* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
    register number.  This effectively defines the order in which real
    registers are allocated.  %ebp is excluded since it is permanently
-   reserved for pointing at VG_(baseBlock).  %edi is a general spare
-   temp used for Left4 and various misc tag ops.
+   reserved for pointing at VG_(baseBlock).
 
-   Important!  If you change the set of allocatable registers from
-   %eax, %ebx, %ecx, %edx, %esi you must change the
-   save/restore sequences in various places to match!  
+   Important!  This function must correspond with the value of
+   VG_MAX_REALREGS (actually, VG_MAX_REALREGS can be reduced without
+   a problem, except the generated code will obviously be worse).
 */
-__inline__ Int VG_(rankToRealRegNo) ( Int rank )
+__inline__ 
+Int VG_(rankToRealRegNum) ( Int rank )
 {
    switch (rank) {
-#     if 1
+#     ifdef BEST_ALLOC_ORDER
       /* Probably the best allocation ordering. */
       case 0: return R_EAX;
       case 1: return R_EBX;
       case 2: return R_ECX;
       case 3: return R_EDX;
       case 4: return R_ESI;
+      case 5: return R_EDI;
 #     else
       /* Contrary; probably the worst.  Helpful for debugging, tho. */
-      case 4: return R_EAX;
-      case 3: return R_EBX;
-      case 2: return R_ECX;
-      case 1: return R_EDX;
-      case 0: return R_ESI;
+      case 5: return R_EAX;
+      case 4: return R_EBX;
+      case 3: return R_ECX;
+      case 2: return R_EDX;
+      case 1: return R_ESI;
+      case 0: return R_EDI;
 #     endif
-      default: VG_(panic)("rankToRealRegNo");
+      default: VG_(panic)("VG_(rankToRealRegNum)");
+   }
+}
+
+/* Convert an Intel register number into a rank in the range 0 ..
+   VG_MAX_REALREGS-1.  See related comments for rankToRealRegNum()
+   above.  */
+__inline__
+Int VG_(realRegNumToRank) ( Int realReg )
+{
+   switch (realReg) {
+#     ifdef BEST_ALLOC_ORDER
+      case R_EAX: return 0;
+      case R_EBX: return 1;
+      case R_ECX: return 2;
+      case R_EDX: return 3;
+      case R_ESI: return 4;
+      case R_EDI: return 5;
+#     else
+      case R_EAX: return 5;
+      case R_EBX: return 4;
+      case R_ECX: return 3;
+      case R_EDX: return 2;
+      case R_ESI: return 1;
+      case R_EDI: return 0;
+#     endif
+      default: VG_(panic)("VG_(realRegNumToRank)");
    }
 }
 
@@ -382,78 +369,62 @@
    from the result of register allocation on the ucode efficiently and
    without need of any further RealRegs.
 
-   Restrictions on insns (as generated by the disassembler) are as
-   follows:
-
-      A=ArchReg   S=SpillNo   T=TempReg   L=Literal   R=RealReg
-      N=NoValue
-
-         GETF       T       N       N
-         PUTF       T       N       N
-
-         GET        A,S     T       N
-         PUT        T       A,S     N
-         LOAD       T       T       N
-         STORE      T       T       N
-         MOV        T,L     T       N
-         CMOV       T       T       N
-         WIDEN      T       N       N
-         JMP        T,L     N       N
-         CALLM      L       N       N
-         CALLM_S    N       N       N
-         CALLM_E    N       N       N
-         CCALL_1_0  T       N       N
-         CCALL_2_0  T       T       N
-         PUSH,POP   T       N       N
-         CLEAR      L       N       N
-
-         AND, OR
-                    T       T       N
-
-         ADD, ADC, XOR, SUB, SBB
-                    A,L,T   T       N
-
-         SHL, SHR, SAR, ROL, ROR, RCL, RCR
-                    L,T     T       N
-
-         NOT, NEG, INC, DEC, CC2VAL, BSWAP
-                    T       N       N
-
-         JIFZ       T       L       N
-
-         FPU_R      L       T       N
-         FPU_W      L       T       N
-         FPU        L       T       N
-
-         LEA1       T       T   (const in a seperate field)
-         LEA2       T       T       T   (const & shift ditto)
-
-         INCEIP     L       N       N
+   Restrictions for the individual UInstrs are clear from the checks below.
+   Abbreviations: A=ArchReg   S=SpillNo   T=TempReg   L=Literal
+                  Ls=Lit16    R=RealReg   N=NoValue
  
-   and for instrumentation insns:
-
-         LOADV      T       T       N
-         STOREV     T,L     T       N
-         GETV       A       T       N
-         PUTV       T,L     A       N
-         GETVF      T       N       N
-         PUTVF      T       N       N
-         WIDENV     T       N       N
-         TESTV      A,T     N       N
-         SETV       A,T     N       N
-         TAG1       T       N       N
-         TAG2       T       T       N
-
    Before register allocation, S operands should not appear anywhere.
    After register allocation, all T operands should have been
    converted into Rs, and S operands are allowed in GET and PUT --
    denoting spill saves/restores.  
 
+   Before liveness analysis, save_e[acd]x fields should all be True.
+   Afterwards, they may be False.
+
    The size field should be 0 for insns for which it is meaningless,
    ie those which do not directly move/operate on data.
 */
-Bool VG_(saneUInstr) ( Bool beforeRA, UInstr* u )
+Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u )
 {
+#  define LIT0 (u->lit32 == 0)
+#  define LIT1 (!(LIT0))
+#  define LITm (u->tag1 == Literal ? True : LIT0 )
+#  define SZ4  (u->size == 4)
+#  define SZ2  (u->size == 2)
+#  define SZ1  (u->size == 1)
+#  define SZ0  (u->size == 0)
+#  define SZ42 (u->size == 4 || u->size == 2)
+#  define SZi  (u->size == 4 || u->size == 2 || u->size == 1)
+#  define SZf  (  u->size ==  4 || u->size ==  8 || u->size ==   2     \
+               || u->size == 10 || u->size == 28 || u->size == 108)
+#  define SZ4m ((u->tag1 == TempReg || u->tag1 == RealReg) \
+                      ? (u->size == 4) : True)
+
+/* For these ones, two cases:
+ *
+ * 1. They are transliterations of the corresponding x86 instruction, in
+ *    which case they should have its flags (except that redundant write
+ *    flags can be annulled by the optimisation pass).
+ *
+ * 2. They are being used generally for other purposes, eg. helping with a
+ *    'rep'-prefixed instruction, in which case should have empty flags .
+ */
+#  define emptyR (u->flags_r == FlagsEmpty)
+#  define emptyW (u->flags_w == FlagsEmpty)
+#  define CC0 (emptyR && emptyW)
+#  define CCr (u->flags_r == FlagsALL && emptyW)
+#  define CCw (emptyR &&  u->flags_w == FlagsALL)
+#  define CCa (emptyR && (u->flags_w == FlagsOSZACP  || emptyW))
+#  define CCc (emptyR && (u->flags_w == FlagsOC      || emptyW))
+#  define CCe (emptyR && (u->flags_w == FlagsOSZAP   || emptyW))
+#  define CCb ((u->flags_r==FlagC       || emptyR) && \
+               (u->flags_w==FlagsOSZACP || emptyW))
+#  define CCd ((u->flags_r==FlagC   || emptyR) && \
+               (u->flags_w==FlagsOC || emptyW))
+#  define CCf (CC0 || CCr || CCw)
+#  define CCg ((u->flags_r==FlagsOSZACP || emptyR) && emptyW)
+#  define CCj (u->cond==CondAlways ? CC0 : CCg)
+
 #  define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
 #  define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
 #  define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
@@ -466,24 +437,29 @@
 #  define L2  (u->tag2 == Literal && u->val2 == 0)
 #  define Ls1 (u->tag1 == Lit16)
 #  define Ls3 (u->tag3 == Lit16)
+#  define TRL1 (TR1 || L1)
+#  define TRAL1 (TR1 || A1 || L1)
 #  define N1  (u->tag1 == NoValue)
 #  define N2  (u->tag2 == NoValue)
 #  define N3  (u->tag3 == NoValue)
-#  define SZ4 (u->size == 4)
-#  define SZ2 (u->size == 2)
-#  define SZ1 (u->size == 1)
-#  define SZ0 (u->size == 0)
-#  define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)
-#  define FLG_RD (u->flags_r == FlagsALL && u->flags_w == FlagsEmpty)
-#  define FLG_WR (u->flags_r == FlagsEmpty && u->flags_w == FlagsALL)
-#  define FLG_RD_WR_MAYBE                                         \
-       ((u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)    \
-        || (u->flags_r == FlagsEmpty && u->flags_w == FlagsZCP)   \
-        || (u->flags_r == FlagsZCP && u->flags_w == FlagsEmpty))
-#  define CC1 (!(CC0))
-#  define SZ4_IF_TR1 ((u->tag1 == TempReg || u->tag1 == RealReg) \
-                      ? (u->size == 4) : True)
 
+#  define COND0    (u->cond         == 0)
+#  define EXTRA4b0 (u->extra4b      == 0)
+#  define SG_WD0   (u->signed_widen == 0)
+#  define JMPKIND0 (u->jmpkind      == 0)
+#  define CCALL0   (u->argc==0 && u->regparms_n==0 && u->has_ret_val==0 && \
+                    ( beforeLiveness                                       \
+                    ? u->regs_live_after == ALL_RREGS_LIVE                 \
+                    : True ))
+
+#  define XCONDi   (         EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
+#  define Xextra4b (COND0             && SG_WD0 && JMPKIND0 && CCALL0)
+#  define XWIDEN   (COND0                       && JMPKIND0 && CCALL0)
+#  define XJMP     (                     SG_WD0             && CCALL0)
+#  define XCCALL   (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0          )
+#  define XOTHER   (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
+
+   /* 0 or 1 Literal args per UInstr */
    Int n_lits = 0;
    if (u->tag1 == Literal) n_lits++;
    if (u->tag2 == Literal) n_lits++;
@@ -491,94 +467,94 @@
    if (n_lits > 1) 
       return False;
 
+   /* Fields not checked: val1, val2, val3 */
+
    switch (u->opcode) {
-      case GETF:
-         return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_RD;
-      case PUTF:
-         return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_WR;
-      case CALLM_S: case CALLM_E:
-         return SZ0 && N1 && N2 && N3;
-      case INCEIP:
-         return SZ0 && CC0 && Ls1 && N2 && N3;
-      case LEA1:
-         return CC0 && TR1 && TR2 && N3 && SZ4;
-      case LEA2:
-         return CC0 && TR1 && TR2 && TR3 && SZ4;
-      case NOP: 
-         return SZ0 && CC0 && N1 && N2 && N3;
-      case GET: 
-         return CC0 && AS1 && TR2 && N3;
-      case PUT: 
-         return CC0 && TR1 && AS2 && N3;
-      case LOAD: case STORE: 
-         return CC0 && TR1 && TR2 && N3;
-      case MOV:
-         return CC0 && (TR1 || L1) && TR2 && N3 && SZ4_IF_TR1;
-      case CMOV:
-         return CC1 && TR1 && TR2 && N3 && SZ4;
-      case JMP: 
-         return (u->cond==CondAlways ? CC0 : CC1)
-                && (TR1 || L1) && N2 && SZ0 && N3;
-      case CLEAR:
-         return CC0 && Ls1 && N2 && SZ0 && N3;
-      case CALLM:
-         return SZ0 && Ls1 && N2 && N3;
-      case CCALL_1_0:
-         return SZ0 && CC0 && TR1 && N2 && N3;
-      case CCALL_2_0:
-         return SZ0 && CC0 && TR1 && TR2 && N3;
-      case PUSH: case POP:
-         return CC0 && TR1 && N2 && N3;
-      case AND: case OR:
-         return TR1 && TR2 && N3;
-      case ADD: case ADC: case XOR: case SUB: case SBB:
-         return (A1 || TR1 || L1) && TR2 && N3;
-      case SHL: case SHR: case SAR: case ROL: case ROR: case RCL: case RCR:
-         return       (TR1 || L1) && TR2 && N3;
-      case NOT: case NEG: case INC: case DEC:
-         return        TR1 && N2 && N3;
-      case BSWAP:
-         return TR1 && N2 && N3 && CC0 && SZ4;
-      case CC2VAL: 
-         return CC1 && SZ1 && TR1 && N2 && N3;
-      case JIFZ:
-         return CC0 && SZ4 && TR1 && L2 && N3;
-      case FPU_R:  case FPU_W: 
-         return CC0 && Ls1 && TR2 && N3;
-      case FPU: 
-         return SZ0 && FLG_RD_WR_MAYBE && Ls1 && N2 && N3;
-      case LOADV:
-         return CC0 && TR1 && TR2 && N3;
-      case STOREV:
-         return CC0 && (TR1 || L1) && TR2 && N3;
-      case GETV: 
-         return CC0 && A1 && TR2 && N3;
-      case PUTV: 
-         return CC0 && (TR1 || L1) && A2 && N3;
-      case GETVF: 
-         return CC0 && TR1 && N2 && N3 && SZ0;
-      case PUTVF: 
-         return CC0 && TR1 && N2 && N3 && SZ0;
-      case WIDEN:
-         return CC0 && TR1 && N2 && N3;
-      case TESTV: 
-         return CC0 && (A1 || TR1) && N2 && N3;
-      case SETV:
-         return CC0 && (A1 || TR1) && N2 && N3;
-      case TAG1:
-         return CC0 && TR1 && N2 && Ls3 && SZ0;
-      case TAG2:
-         return CC0 && TR1 && TR2 && Ls3 && SZ0;
-      default: 
-         VG_(panic)("vg_saneUInstr: unhandled opcode");
+
+   /* Fields checked: lit32   size  flags_r/w tag1   tag2   tag3    (rest) */
+   case NOP:    return LIT0 && SZ0  && CC0 &&   N1 &&  N2 &&  N3 && XOTHER;
+   case GETF:   return LIT0 && SZ42 && CCr &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case PUTF:   return LIT0 && SZ42 && CCw &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case GET:    return LIT0 && SZi  && CC0 &&  AS1 && TR2 &&  N3 && XOTHER;
+   case PUT:    return LIT0 && SZi  && CC0 &&  TR1 && AS2 &&  N3 && XOTHER;
+   case LOAD: 
+   case STORE:  return LIT0 && SZi  && CC0 &&  TR1 && TR2 &&  N3 && XOTHER;
+   case MOV:    return LITm && SZ4m && CC0 && TRL1 && TR2 &&  N3 && XOTHER;
+   case CMOV:   return LIT0 && SZ4  && CCg &&  TR1 && TR2 &&  N3 && XCONDi;
+   case WIDEN:  return LIT0 && SZi  && CC0 &&  TR1 &&  N2 &&  N3 && XWIDEN;
+   case JMP:    return LITm && SZ0  && CCj && TRL1 &&  N2 &&  N3 && XJMP;
+   case CALLM:  return LIT0 && SZ0 /*any*/ &&  Ls1 &&  N2 &&  N3 && XOTHER;
+   case CALLM_S: 
+   case CALLM_E:return LIT0 && SZ0  && CC0 &&   N1 &&  N2 &&  N3 && XOTHER;
+   case PUSH: 
+   case POP:    return LIT0 && SZi  && CC0 &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case CLEAR:  return LIT0 && SZ0  && CC0 &&  Ls1 &&  N2 &&  N3 && XOTHER;
+   case AND:
+   case OR:     return LIT0 && SZi  && CCa &&  TR1 && TR2 &&  N3 && XOTHER;
+   case ADD:
+   case XOR:
+   case SUB:    return LITm && SZi  && CCa &&TRAL1 && TR2 &&  N3 && XOTHER;
+   case SBB:
+   case ADC:    return LITm && SZi  && CCb &&TRAL1 && TR2 &&  N3 && XOTHER;
+   case SHL:
+   case SHR:
+   case SAR:    return LITm && SZi  && CCa && TRL1 && TR2 &&  N3 && XOTHER;
+   case ROL:
+   case ROR:    return LITm && SZi  && CCc && TRL1 && TR2 &&  N3 && XOTHER;
+   case RCL:
+   case RCR:    return LITm && SZi  && CCd && TRL1 && TR2 &&  N3 && XOTHER;
+   case NOT:    return LIT0 && SZi  && CC0 &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case NEG:    return LIT0 && SZi  && CCa &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case INC:
+   case DEC:    return LIT0 && SZi  && CCe &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case CC2VAL: return LIT0 && SZ1  && CCg &&  TR1 &&  N2 &&  N3 && XCONDi;
+   case BSWAP:  return LIT0 && SZ4  && CC0 &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case JIFZ:   return LIT1 && SZ4  && CC0 &&  TR1 &&  L2 &&  N3 && XOTHER;
+   case FPU_R:  
+   case FPU_W:  return LIT0 && SZf  && CC0 &&  Ls1 && TR2 &&  N3 && XOTHER;
+   case FPU:    return LIT0 && SZ0  && CCf &&  Ls1 &&  N2 &&  N3 && XOTHER;
+   case LEA1:   return /*any*/ SZ4  && CC0 &&  TR1 && TR2 &&  N3 && XOTHER;
+   case LEA2:   return /*any*/ SZ4  && CC0 &&  TR1 && TR2 && TR3 && Xextra4b;
+   case INCEIP: return LIT0 && SZ0  && CC0 &&  Ls1 &&  N2 &&  N3 && XOTHER;
+   case CCALL:  return LIT1 && SZ0  && CC0 && 
+                       (u->argc > 0                   ? TR1 : N1) && 
+                       (u->argc > 1                   ? TR2 : N2) && 
+                       (u->argc > 2 || u->has_ret_val ? TR3 : N3) &&
+                       u->regparms_n <= u->argc && XCCALL;
+   default: 
+      if (VG_(needs).extended_UCode)
+         return SK_(saneExtUInstr)(beforeRA, beforeLiveness, u);
+      else {
+         VG_(printf)("unhandled opcode: %u.  Perhaps " 
+                     "VG_(needs).extended_UCode should be set?",
+                     u->opcode);
+         VG_(panic)("VG_(saneUInstr): unhandled opcode");
+      }
    }
-#  undef SZ4_IF_TR1
-#  undef CC0
-#  undef CC1
+#  undef LIT0
+#  undef LIT1
+#  undef LITm
 #  undef SZ4
 #  undef SZ2
 #  undef SZ1
 #  undef SZ0
+#  undef SZ42
+#  undef SZi
+#  undef SZf
+#  undef SZ4m
+#  undef emptyR
+#  undef emptyW
+#  undef CC0
+#  undef CCr
+#  undef CCw
+#  undef CCa
+#  undef CCb
+#  undef CCc
+#  undef CCd
+#  undef CCe
+#  undef CCf
+#  undef CCg
+#  undef CCj
 #  undef TR1
 #  undef TR2
 #  undef TR3
@@ -588,20 +564,42 @@
 #  undef AS2
 #  undef AS3
 #  undef L1
-#  undef Ls1
 #  undef L2
+#  undef Ls1
 #  undef Ls3
+#  undef TRL1
+#  undef TRAL1
 #  undef N1
 #  undef N2
 #  undef N3
-#  undef FLG_RD
-#  undef FLG_WR
-#  undef FLG_RD_WR_MAYBE 
+#  undef COND0
+#  undef EXTRA4b0
+#  undef SG_WD0
+#  undef JMPKIND0
+#  undef CCALL0
+#  undef Xextra4b
+#  undef XWIDEN
+#  undef XJMP
+#  undef XCCALL
+#  undef XOTHER
 }
 
+void VG_(saneUCodeBlock) ( UCodeBlock* cb )
+{
+   Int i;
+        
+   for (i = 0; i < cb->used; i++) {
+      Bool sane = VG_(saneUInstr)(True, True, &cb->instrs[i]);
+      if (!sane) {
+         VG_(printf)("Instruction failed sanity check:\n");
+         VG_(upUInstr)(i, &cb->instrs[i]);
+      }
+      vg_assert(sane);
+   }
+}
 
 /* Sanity checks to do with CALLMs in UCodeBlocks. */
-Bool VG_(saneUCodeBlock) ( UCodeBlock* cb )
+Bool VG_(saneUCodeBlockCalls) ( UCodeBlock* cb )
 {
    Int  callm = 0;
    Int  callm_s = 0;
@@ -687,6 +685,9 @@
 /*--- Printing uinstrs.                                    ---*/
 /*------------------------------------------------------------*/
 
+/* Global that dictates whether to print generated code at all stages */
+Bool VG_(print_codegen);
+
 Char* VG_(nameCondcode) ( Condcode cond )
 {
    switch (cond) {
@@ -734,14 +735,14 @@
 }
 
 
-static void ppUOperand ( UInstr* u, Int operandNo, Int sz, Bool parens )
+void VG_(ppUOperand) ( UInstr* u, Int operandNo, Int sz, Bool parens )
 {
    UInt tag, val;
    switch (operandNo) {
       case 1: tag = u->tag1; val = u->val1; break;
       case 2: tag = u->tag2; val = u->val2; break;
       case 3: tag = u->tag3; val = u->val3; break;
-      default: VG_(panic)("ppUOperand(1)");
+      default: VG_(panic)("VG_(ppUOperand)(1)");
    }
    if (tag == Literal) val = u->lit32;
 
@@ -754,7 +755,7 @@
       case NoValue: VG_(printf)("NoValue"); break;
       case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
       case SpillNo: VG_(printf)("spill%d", val); break;
-      default: VG_(panic)("ppUOperand(2)");
+      default: VG_(panic)("VG_(ppUOperand)(2)");
    }
    if (parens) VG_(printf)(")");
 }
@@ -786,10 +787,6 @@
    }
    if (!upper) VG_(panic)("vg_nameUOpcode: invalid !upper");
    switch (opc) {
-      case GETVF:   return "GETVF";
-      case PUTVF:   return "PUTVF";
-      case TAG1:    return "TAG1";
-      case TAG2:    return "TAG2";
       case CALLM_S: return "CALLM_S";
       case CALLM_E: return "CALLM_E";
       case INCEIP:  return "INCEIP";
@@ -808,8 +805,7 @@
       case JMP:     return "J"    ;
       case JIFZ:    return "JIFZ" ;
       case CALLM:   return "CALLM";
-      case CCALL_1_0: return "CCALL_1_0";
-      case CCALL_2_0: return "CCALL_2_0";
+      case CCALL:   return "CCALL";
       case PUSH:    return "PUSH" ;
       case POP:     return "POP"  ;
       case CLEAR:   return "CLEAR";
@@ -817,18 +813,61 @@
       case FPU_R:   return "FPU_R";
       case FPU_W:   return "FPU_W";
       case FPU:     return "FPU"  ;
-      case LOADV:   return "LOADV";
-      case STOREV:  return "STOREV";
-      case GETV:    return "GETV";
-      case PUTV:    return "PUTV";
-      case TESTV:   return "TESTV";
-      case SETV:    return "SETV";
-      default:      VG_(panic)("nameUOpcode: unhandled case");
+      default:
+         if (VG_(needs).extended_UCode)
+            return SK_(nameExtUOpcode)(opc);
+         else {
+            VG_(printf)("unhandled opcode: %u.  Perhaps " 
+                        "VG_(needs).extended_UCode should be set?",
+                        opc);
+            VG_(panic)("nameUOpcode: unhandled opcode");
+         }
    }
 }
 
+void ppRealRegsLiveness ( UInstr* u )
+{
+#  define PRINT_RREG_LIVENESS(realReg,s) \
+     VG_(printf)( IS_RREG_LIVE(VG_(realRegNumToRank)(realReg), \
+                               u->regs_live_after)             \
+                     ? s : "-");
 
-void VG_(ppUInstr) ( Int instrNo, UInstr* u )
+   VG_(printf)("[");
+   PRINT_RREG_LIVENESS(R_EAX, "a");
+   PRINT_RREG_LIVENESS(R_EBX, "b");
+   PRINT_RREG_LIVENESS(R_ECX, "c");
+   PRINT_RREG_LIVENESS(R_EDX, "d");
+   PRINT_RREG_LIVENESS(R_ESI, "S");
+   PRINT_RREG_LIVENESS(R_EDI, "D");
+   VG_(printf)("]");
+
+#  undef PRINT_RREG_LIVENESS
+}
+
+/* Ugly-print UInstr :) */
+void VG_(upUInstr) ( Int i, UInstr* u )
+{
+   VG_(ppUInstrWithRegs)(i, u);
+   
+   VG_(printf)("opcode:          %d\n", u->opcode);
+   VG_(printf)("lit32:           %x\n", u->lit32);
+   VG_(printf)("size:            %d\n", u->size);
+   VG_(printf)("val1,val2,val3:  %d, %d, %d\n", u->val1, u->val2, u->val3);
+   VG_(printf)("tag1,tag2,tag3:  %d, %d, %d\n", u->tag1, u->tag2, u->tag3);
+   VG_(printf)("flags_r:         %x\n", u->flags_r);
+   VG_(printf)("flags_w:         %x\n", u->flags_w);
+   VG_(printf)("extra4b:         %x\n", u->extra4b);
+   VG_(printf)("cond:            %x\n", u->cond);
+   VG_(printf)("signed_widen:    %d\n", u->signed_widen);
+   VG_(printf)("jmpkind:         %d\n", u->jmpkind);
+   VG_(printf)("argc,regparms_n: %d, %d\n", u->argc, u->regparms_n);
+   VG_(printf)("has_ret_val:     %d\n", u->has_ret_val);
+   VG_(printf)("regs_live_after: ");
+   ppRealRegsLiveness(u);
+   VG_(printf)("\n");
+}
+
+void ppUInstrWorker ( Int instrNo, UInstr* u, Bool ppRegsLiveness )
 {
    VG_(printf)("\t%4d: %s", instrNo, 
                             VG_(nameUOpcode)(True, u->opcode));
@@ -846,24 +885,6 @@
 
    switch (u->opcode) {
 
-      case TAG1:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, 4, False);
-         VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
-         ppUOperand(u, 1, 4, False);
-         VG_(printf)(" )");
-         break;
-
-      case TAG2:
-         VG_(printf)("\t");
-         ppUOperand(u, 2, 4, False);
-         VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
-         ppUOperand(u, 1, 4, False);
-         VG_(printf)(", ");
-         ppUOperand(u, 2, 4, False);
-         VG_(printf)(" )");
-         break;
-
       case CALLM_S: case CALLM_E:
          break;
 
@@ -873,18 +894,18 @@
 
       case LEA2:
          VG_(printf)("\t%d(" , u->lit32);
-         ppUOperand(u, 1, 4, False);
+         VG_(ppUOperand)(u, 1, 4, False);
          VG_(printf)(",");
-         ppUOperand(u, 2, 4, False);
+         VG_(ppUOperand)(u, 2, 4, False);
          VG_(printf)(",%d), ", (Int)u->extra4b);
-         ppUOperand(u, 3, 4, False);
+         VG_(ppUOperand)(u, 3, 4, False);
          break;
 
       case LEA1:
          VG_(printf)("\t%d" , u->lit32);
-         ppUOperand(u, 1, 4, True);
+         VG_(ppUOperand)(u, 1, 4, True);
          VG_(printf)(", ");
-         ppUOperand(u, 2, 4, False);
+         VG_(ppUOperand)(u, 2, 4, False);
          break;
 
       case NOP:
@@ -893,12 +914,12 @@
       case FPU_W:
          VG_(printf)("\t0x%x:0x%x, ",
                      (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
-         ppUOperand(u, 2, 4, True);
+         VG_(ppUOperand)(u, 2, 4, True);
          break;
 
       case FPU_R:
          VG_(printf)("\t");
-         ppUOperand(u, 2, 4, True);
+         VG_(ppUOperand)(u, 2, 4, True);
          VG_(printf)(", 0x%x:0x%x",
                      (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
          break;
@@ -908,97 +929,93 @@
                      (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
          break;
 
-      case STOREV: case LOADV:
       case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
          VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, u->opcode==LOAD || u->opcode==LOADV); 
+         VG_(ppUOperand)(u, 1, u->size, u->opcode==LOAD); 
          VG_(printf)(", ");
-         ppUOperand(u, 2, u->size, u->opcode==STORE || u->opcode==STOREV);
+         VG_(ppUOperand)(u, 2, u->size, u->opcode==STORE);
+         break;
+
+      case JMP:
+         switch (u->jmpkind) {
+            case JmpCall:      VG_(printf)("-c"); break;
+            case JmpRet:       VG_(printf)("-r"); break;
+            case JmpSyscall:   VG_(printf)("-sys"); break;
+            case JmpClientReq: VG_(printf)("-cli"); break;
+            default: break;
+         }
+         VG_(printf)("\t");
+         VG_(ppUOperand)(u, 1, u->size, False);
+         if (CondAlways == u->cond) {
+            /* Print x86 instruction size if filled in */
+            if (0 != u->extra4b)
+               VG_(printf)("  ($%u)", u->extra4b);
+         }
          break;
 
       case GETF: case PUTF:
+      case CC2VAL: case PUSH: case POP: case CLEAR: case CALLM:
+      case NOT: case NEG: case INC: case DEC: case BSWAP:
          VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False);
+         VG_(ppUOperand)(u, 1, u->size, False);
          break;
 
-      case JMP: case CC2VAL:
-      case PUSH: case POP: case CLEAR: case CALLM:
-         if (u->opcode == JMP) {
-            switch (u->jmpkind) {
-               case JmpCall:      VG_(printf)("-c"); break;
-               case JmpRet:       VG_(printf)("-r"); break;
-               case JmpSyscall:   VG_(printf)("-sys"); break;
-               case JmpClientReq: VG_(printf)("-cli"); break;
-               default: break;
-            }
+      /* Print a "(s)" after args passed on stack */
+      case CCALL:
+         VG_(printf)("\t");
+         if (u->has_ret_val) {
+            VG_(ppUOperand)(u, 3, 0, False);
+            VG_(printf)(" = ");
          }
-         VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False);
-         break;
-
-      case CCALL_1_0:
-         VG_(printf)(" ");
-         ppUOperand(u, 1, 0, False);
-         VG_(printf)(" (%u)", u->lit32);
-         break;
-
-      case CCALL_2_0:
-         VG_(printf)(" ");
-         ppUOperand(u, 1, 0, False);
-         VG_(printf)(", ");
-         ppUOperand(u, 2, 0, False);
-         VG_(printf)(" (%u)", u->lit32);
+         VG_(printf)("%p(", u->lit32);
+         if (u->argc > 0) {
+            VG_(ppUOperand)(u, 1, 0, False);
+            if (u->regparms_n < 1)
+               VG_(printf)("(s)");
+         }
+         if (u->argc > 1) {
+            VG_(printf)(", ");
+            VG_(ppUOperand)(u, 2, 0, False);
+            if (u->regparms_n < 2)
+               VG_(printf)("(s)");
+         }
+         if (u->argc > 2) {
+            VG_(printf)(", ");
+            VG_(ppUOperand)(u, 3, 0, False);
+            if (u->regparms_n < 3)
+               VG_(printf)("(s)");
+         }
+         VG_(printf)(") ");
          break;
 
       case JIFZ:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False);
-         VG_(printf)(", ");
-         ppUOperand(u, 2, u->size, False);
-         break;
-
-      case PUTVF: case GETVF:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, 0, False); 
-         break;
-
-      case NOT: case NEG: case INC: case DEC: case BSWAP:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False); 
-         break;
-
       case ADD: case ADC: case AND: case OR:  
       case XOR: case SUB: case SBB:   
       case SHL: case SHR: case SAR: 
       case ROL: case ROR: case RCL: case RCR:   
          VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False); 
+         VG_(ppUOperand)(u, 1, u->size, False); 
          VG_(printf)(", ");
-         ppUOperand(u, 2, u->size, False);
-         break;
-
-      case GETV: case PUTV:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, u->opcode==PUTV ? 4 : u->size, False);
-         VG_(printf)(", ");
-         ppUOperand(u, 2, u->opcode==GETV ? 4 : u->size, False);
+         VG_(ppUOperand)(u, 2, u->size, False);
          break;
 
       case WIDEN:
          VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
                               u->signed_widen?'s':'z');
          VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False);
+         VG_(ppUOperand)(u, 1, u->size, False);
          break;
 
-      case TESTV: case SETV:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False);
-         break;
-
-      default: VG_(panic)("ppUInstr: unhandled opcode");
+      default: 
+         if (VG_(needs).extended_UCode)
+            SK_(ppExtUInstr)(u);
+         else {
+            VG_(printf)("unhandled opcode: %u.  Perhaps " 
+                        "VG_(needs).extended_UCode should be set?",
+                        u->opcode);
+            VG_(panic)("ppUInstr: unhandled opcode");
+         }
    }
-
    if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
       VG_(printf)("  (");
       if (u->flags_r != FlagsEmpty) 
@@ -1007,16 +1024,31 @@
          vg_ppFlagSet("-w", u->flags_w);
       VG_(printf)(")");
    }
+
+   if (ppRegsLiveness) {
+      VG_(printf)("\t\t");
+      ppRealRegsLiveness ( u );
+   }
+
    VG_(printf)("\n");
 }
 
+void VG_(ppUInstr) ( Int instrNo, UInstr* u )
+{
+   ppUInstrWorker ( instrNo, u, /*ppRegsLiveness*/False );
+}
+
+void VG_(ppUInstrWithRegs) ( Int instrNo, UInstr* u )
+{
+   ppUInstrWorker ( instrNo, u, /*ppRegsLiveness*/True );
+}
 
 void VG_(ppUCodeBlock) ( UCodeBlock* cb, Char* title )
 {
    Int i;
-   VG_(printf)("\n%s\n", title);
+   VG_(printf)("%s\n", title);
    for (i = 0; i < cb->used; i++)
-      if (0 || cb->instrs[i].opcode != NOP)
+      if (cb->instrs[i].opcode != NOP)
          VG_(ppUInstr) ( i, &cb->instrs[i] );
    VG_(printf)("\n");
 }
@@ -1027,43 +1059,35 @@
 /*--- and code improvement.                                ---*/
 /*------------------------------------------------------------*/
 
-/* A structure for communicating temp uses, and for indicating
-   temp->real register mappings for patchUInstr. */
-typedef
-   struct {
-      Int   realNo;
-      Int   tempNo;
-      Bool  isWrite;
-   }
-   TempUse;
-
-
-/* Get the temp use of a uinstr, parking them in an array supplied by
+/* Get the temp/reg use of a uinstr, parking them in an array supplied by
    the caller, which is assumed to be big enough.  Return the number
    of entries.  Insns which read _and_ write a register wind up
    mentioning it twice.  Entries are placed in the array in program
    order, so that if a reg is read-modified-written, it appears first
-   as a read and then as a write.  
+   as a read and then as a write.  'tag' indicates whether we are looking at
+   TempRegs or RealRegs.
 */
-static __inline__ 
-Int getTempUsage ( UInstr* u, TempUse* arr )
+__inline__
+Int VG_(getRegUsage) ( UInstr* u, Tag tag, RegUse* arr )
 {
-
-#  define RD(ono)                                  \
-      if (mycat(u->tag,ono) == TempReg)            \
-         { arr[n].tempNo  = mycat(u->val,ono);     \
-           arr[n].isWrite = False; n++; }
-#  define WR(ono)                                  \
-      if (mycat(u->tag,ono) == TempReg)            \
-         { arr[n].tempNo  = mycat(u->val,ono);     \
-           arr[n].isWrite = True; n++; }
+#  define RD(ono)    VG_UINSTR_READS_REG(ono)
+#  define WR(ono)    VG_UINSTR_WRITES_REG(ono)
 
    Int n = 0;
    switch (u->opcode) {
       case LEA1: RD(1); WR(2); break;
       case LEA2: RD(1); RD(2); WR(3); break;
 
-      case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E: break;
+      case NOP:   case FPU:   case INCEIP: case CALLM_S: case CALLM_E:
+      case CLEAR: case CALLM: break;
+
+      case CCALL:
+         if (u->argc > 0)    RD(1); 
+         if (u->argc > 1)    RD(2); 
+         if (u->argc > 2)    RD(3); 
+         if (u->has_ret_val) WR(3);
+         break;
+
       case FPU_R: case FPU_W: RD(2); break;
 
       case GETF:  WR(1); break;
@@ -1072,16 +1096,14 @@
       case GET:   WR(2); break;
       case PUT:   RD(1); break;
       case LOAD:  RD(1); WR(2); break;
-      case STORE: case CCALL_2_0: RD(1); RD(2); break;
+      case STORE: RD(1); RD(2); break;
       case MOV:   RD(1); WR(2); break;
 
       case JMP:   RD(1); break;
-      case CLEAR: case CALLM: break;
 
-      case PUSH: case CCALL_1_0: RD(1); break;
+      case PUSH: RD(1); break;
       case POP:  WR(1); break;
 
-      case TAG2:
       case CMOV:
       case ADD: case ADC: case AND: case OR:  
       case XOR: case SUB: case SBB:   
@@ -1091,7 +1113,7 @@
       case ROL: case ROR: case RCL: case RCR:
          RD(1); RD(2); WR(2); break;
 
-      case NOT: case NEG: case INC: case DEC: case TAG1: case BSWAP:
+      case NOT: case NEG: case INC: case DEC: case BSWAP:
          RD(1); WR(1); break;
 
       case WIDEN: RD(1); WR(1); break;
@@ -1099,19 +1121,15 @@
       case CC2VAL: WR(1); break;
       case JIFZ: RD(1); break;
 
-      /* These sizes are only ever consulted when the instrumentation
-         code is being added, so the following can return
-         manifestly-bogus sizes. */
-      case LOADV:   RD(1); WR(2); break;
-      case STOREV:  RD(1); RD(2); break;
-      case GETV:    WR(2); break;
-      case PUTV:    RD(1); break;
-      case TESTV:   RD(1); break;
-      case SETV:    WR(1); break;
-      case PUTVF:   RD(1); break;
-      case GETVF:   WR(1); break;
-
-      default: VG_(panic)("getTempUsage: unhandled opcode");
+      default:
+         if (VG_(needs).extended_UCode)
+            return SK_(getExtRegUsage)(u, tag, arr);
+         else {
+            VG_(printf)("unhandled opcode: %u.  Perhaps " 
+                        "VG_(needs).extended_UCode should be set?",
+                        u->opcode);
+            VG_(panic)("VG_(getRegUsage): unhandled opcode");
+         }
    }
    return n;
 
@@ -1120,31 +1138,32 @@
 }
 
 
-/* Change temp regs in u into real regs, as directed by tmap. */
-static __inline__ 
-void patchUInstr ( UInstr* u, TempUse* tmap, Int n_tmap )
+/* Change temp regs in u into real regs, as directed by the
+ * temps[i]-->reals[i] mapping. */
+static __inline__
+void patchUInstr ( UInstr* u, RegUse temps[], UInt reals[], Int n_tmap )
 {
    Int i;
    if (u->tag1 == TempReg) {
       for (i = 0; i < n_tmap; i++)
-         if (tmap[i].tempNo == u->val1) break;
+         if (temps[i].num == u->val1) break;
       if (i == n_tmap) VG_(panic)("patchUInstr(1)");
       u->tag1 = RealReg;
-      u->val1 = tmap[i].realNo;
+      u->val1 = reals[i];
    }
    if (u->tag2 == TempReg) {
       for (i = 0; i < n_tmap; i++)
-         if (tmap[i].tempNo == u->val2) break;
+         if (temps[i].num == u->val2) break;
       if (i == n_tmap) VG_(panic)("patchUInstr(2)");
       u->tag2 = RealReg;
-      u->val2 = tmap[i].realNo;
+      u->val2 = reals[i];
    }
    if (u->tag3 == TempReg) {
       for (i = 0; i < n_tmap; i++)
-         if (tmap[i].tempNo == u->val3) break;
+         if (temps[i].num == u->val3) break;
       if (i == n_tmap) VG_(panic)("patchUInstr(3)");
       u->tag3 = RealReg;
-      u->val3 = tmap[i].realNo;
+      u->val3 = reals[i];
    }
 }
 
@@ -1166,7 +1185,9 @@
 
 
 /* If u reads an ArchReg, return the number of the containing arch
-   reg.  Otherwise return -1.  Used in redundant-PUT elimination. */
+   reg.  Otherwise return -1.  Used in redundant-PUT elimination.
+   Note that this is not required for skins extending UCode because
+   this happens before instrumentation. */
 static __inline__ 
 Int maybe_uinstrReadsArchReg ( UInstr* u )
 {
@@ -1211,10 +1232,10 @@
 Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
 {
    Int i, k;
-   TempUse tempUse[3];
-   k = getTempUsage ( u, &tempUse[0] );
+   RegUse tempUse[3];
+   k = VG_(getRegUsage) ( u, TempReg, &tempUse[0] );
    for (i = 0; i < k; i++)
-      if (tempUse[i].tempNo == tempreg)
+      if (tempUse[i].num == tempreg)
          return True;
    return False;
 }
@@ -1236,14 +1257,18 @@
    Int     i, j, k, m, n, ar, tr, told, actual_areg;
    Int     areg_map[8];
    Bool    annul_put[8];
-   TempUse tempUse[3];
+   RegUse  tempUse[3];
    UInstr* u;
    Bool    wr;
    Int*    last_live_before;
    FlagSet future_dead_flags;
 
+   if (dis) 
+      VG_(printf) ("Improvements:\n");
+
    if (cb->nextTemp > 0)
-      last_live_before = VG_(jitmalloc) ( cb->nextTemp * sizeof(Int) );
+      last_live_before = VG_(arena_malloc) ( VG_AR_JITTER, 
+                                             cb->nextTemp * sizeof(Int) );
    else
       last_live_before = NULL;
 
@@ -1259,11 +1284,11 @@
    for (i = cb->used-1; i >= 0; i--) {
       u = &cb->instrs[i];
 
-      k = getTempUsage(u, &tempUse[0]);
+      k = VG_(getRegUsage)(u, TempReg, &tempUse[0]);
 
       /* For each temp usage ... bwds in program order. */
       for (j = k-1; j >= 0; j--) {
-         tr = tempUse[j].tempNo;
+         tr = tempUse[j].num;
          wr = tempUse[j].isWrite;
          if (last_live_before[tr] == -1) {
             vg_assert(tr >= 0 && tr < cb->nextTemp);
@@ -1300,15 +1325,14 @@
                out here.  Annul this GET, rename tr to told for the
                rest of the block, and extend told's live range to that
                of tr.  */
-            u->opcode = NOP;
-            u->tag1 = u->tag2 = NoValue;
+            VG_(newNOP)(u);
             n = last_live_before[tr] + 1;
             if (n > cb->used) n = cb->used;
             last_live_before[told] = last_live_before[tr];
             last_live_before[tr] = i-1;
-            if (VG_(disassemble))
+            if (dis)
                VG_(printf)(
-                  "at %d: delete GET, rename t%d to t%d in (%d .. %d)\n", 
+                  "   at %2d: delete GET, rename t%d to t%d in (%d .. %d)\n", 
                   i, tr, told,i+1, n-1);
             for (m = i+1; m < n; m++) {
                if (cb->instrs[m].tag1 == TempReg 
@@ -1349,9 +1373,9 @@
                case ADC: case SBB:
                case SHL: case SHR: case SAR: case ROL: case ROR:
                case RCL: case RCR:
-                  if (VG_(disassemble)) 
+                  if (dis) 
                      VG_(printf)(
-                        "at %d: change ArchReg %S to TempReg t%d\n", 
+                        "   at %2d: change ArchReg %S to TempReg t%d\n", 
                         i, nameIReg(4,u->val1), areg_map[u->val1]);
                   u->tag1 = TempReg;
                   u->val1 = areg_map[u->val1];
@@ -1366,12 +1390,12 @@
          }
 
          /* boring insn; invalidate any mappings to temps it writes */
-         k = getTempUsage(u, &tempUse[0]);
+         k = VG_(getRegUsage)(u, TempReg, &tempUse[0]);
 
          for (j = 0; j < k; j++) {
             wr  = tempUse[j].isWrite;
             if (!wr) continue;
-            tr = tempUse[j].tempNo;
+            tr = tempUse[j].num;
             for (m = 0; m < 8; m++)
                if (areg_map[m] == tr) areg_map[m] = -1;
          }
@@ -1398,10 +1422,9 @@
          actual_areg = containingArchRegOf ( 4, u->val2 );
          if (annul_put[actual_areg]) {
             vg_assert(actual_areg != R_ESP);
-            u->opcode = NOP;
-            u->tag1 = u->tag2 = NoValue;
-            if (VG_(disassemble)) 
-               VG_(printf)("at %d: delete PUT\n", i );
+            VG_(newNOP)(u);
+            if (dis) 
+               VG_(printf)("   at %2d: delete PUT\n", i );
          } else {
             if (actual_areg != R_ESP)
                annul_put[actual_areg] = True;
@@ -1443,9 +1466,9 @@
       vg_assert(u->tag1 == TempReg);
       vg_assert(u->tag2 == TempReg);
       if (last_live_before[u->val1] == i) {
-         if (VG_(disassemble))
+         if (dis)
             VG_(printf)(
-               "at %d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
+               "   at %2d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
                i, u->val2, u->val1, i+1, last_live_before[u->val2] );
          for (j = i+1; j <= last_live_before[u->val2]; j++) {
             if (cb->instrs[j].tag1 == TempReg 
@@ -1457,8 +1480,7 @@
          }
          last_live_before[u->val1] = last_live_before[u->val2];
          last_live_before[u->val2] = i-1;
-         u->opcode = NOP;
-         u->tag1 = u->tag2 = NoValue;
+         VG_(newNOP)(u);
       }
    }
 
@@ -1495,8 +1517,8 @@
          this insn.*/
       if (u->flags_w != FlagsEmpty
           && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
-         if (VG_(disassemble)) {
-            VG_(printf)("at %d: annul flag write ", i);
+         if (dis) {
+            VG_(printf)("   at %2d: annul flag write ", i);
             vg_ppFlagSet("", u->flags_w);
             VG_(printf)(" due to later ");
             vg_ppFlagSet("", future_dead_flags);
@@ -1515,7 +1537,12 @@
    }
 
    if (last_live_before) 
-      VG_(jitfree) ( last_live_before );
+      VG_(arena_free) ( VG_AR_JITTER, last_live_before );
+
+   if (dis) {
+      VG_(printf)("\n");
+      VG_(ppUCodeBlock) ( cb, "Improved UCode:" );
+   }
 }
 
 
@@ -1570,7 +1597,8 @@
    Int          ss_busy_until_before[VG_MAX_SPILLSLOTS];
    Int          i, j, k, m, r, tno, max_ss_no;
    Bool         wr, defer, isRead, spill_reqd;
-   TempUse      tempUse[3];
+   UInt         realUse[3];
+   RegUse       tempUse[3];
    UCodeBlock*  c2;
 
    /* Used to denote ... well, "no value" in this fn. */
@@ -1578,7 +1606,8 @@
 
    /* Initialise the TempReg info.  */
    if (c1->nextTemp > 0)
-      temp_info = VG_(jitmalloc)(c1->nextTemp * sizeof(TempInfo) );
+      temp_info = VG_(arena_malloc)(VG_AR_JITTER,
+                                    c1->nextTemp * sizeof(TempInfo) );
    else
       temp_info = NULL;
 
@@ -1594,12 +1623,12 @@
    /* Scan fwds to establish live ranges. */
 
    for (i = 0; i < c1->used; i++) {
-      k = getTempUsage(&c1->instrs[i], &tempUse[0]);
+      k = VG_(getRegUsage)(&c1->instrs[i], TempReg, &tempUse[0]);
       vg_assert(k >= 0 && k <= 3);
 
       /* For each temp usage ... fwds in program order */
       for (j = 0; j < k; j++) {
-         tno = tempUse[j].tempNo;
+         tno = tempUse[j].num;
          wr  = tempUse[j].isWrite;
          if (wr) {
             /* Writes hold a reg live until after this insn. */
@@ -1662,26 +1691,30 @@
 
    /* Show live ranges and assigned spill slot nos. */
 
-   if (VG_(disassemble)) {
-      VG_(printf)("Live Range Assignments\n");
+   if (dis) {
+      VG_(printf)("Live range assignments:\n");
 
       for (i = 0; i < c1->nextTemp; i++) {
          if (temp_info[i].live_after == VG_NOTHING) 
             continue;
          VG_(printf)(
-            "   LR %d is   after %d to before %d   spillno %d\n",
+            "   LR %d is  after %d to before %d\tspillno %d\n",
             i,
             temp_info[i].live_after,
             temp_info[i].dead_before,
             temp_info[i].spill_no
          );
       }
+      VG_(printf)("\n");
    }
 
    /* Now that we've established a spill slot number for each used
       temporary, we can go ahead and do the core of the "Second-chance
       binpacking" allocation algorithm. */
 
+   if (dis) VG_(printf)("Register allocated UCode:\n");
+      
+
    /* Resulting code goes here.  We generate it all in a forwards
       pass. */
    c2 = VG_(allocCodeBlock)();
@@ -1694,9 +1727,6 @@
    for (i = 0; i < c1->nextTemp; i++)
       temp_info[i].real_no = VG_NOTHING;
 
-   if (VG_(disassemble))
-      VG_(printf)("\n");
-
    /* Process each insn in turn. */
    for (i = 0; i < c1->used; i++) {
 
@@ -1721,14 +1751,14 @@
       }
 #     endif
 
-      if (VG_(disassemble))
+      if (dis)
          VG_(ppUInstr)(i, &c1->instrs[i]);
 
       /* First, free up enough real regs for this insn.  This may
          generate spill stores since we may have to evict some TempRegs
          currently in real regs.  Also generates spill loads. */
 
-      k = getTempUsage(&c1->instrs[i], &tempUse[0]);
+      k = VG_(getRegUsage)(&c1->instrs[i], TempReg, &tempUse[0]);
       vg_assert(k >= 0 && k <= 3);
 
       /* For each ***different*** temp mentioned in the insn .... */
@@ -1739,14 +1769,14 @@
             used by the insn once, even if it is mentioned more than
             once. */
          defer = False;
-         tno = tempUse[j].tempNo;
+         tno = tempUse[j].num;
          for (m = j+1; m < k; m++)
-            if (tempUse[m].tempNo == tno) 
+            if (tempUse[m].num == tno) 
                defer = True;
          if (defer) 
             continue;
 
-         /* Now we're trying to find a register for tempUse[j].tempNo.
+         /* Now we're trying to find a register for tempUse[j].num.
             First of all, if it already has a register assigned, we
             don't need to do anything more. */
          if (temp_info[tno].real_no != VG_NOTHING)
@@ -1772,7 +1802,7 @@
 
             Select r in 0 .. VG_MAX_REALREGS-1 such that
             real_to_temp[r] is not mentioned in 
-            tempUse[0 .. k-1].tempNo, since it would be just plain 
+            tempUse[0 .. k-1].num, since it would be just plain 
             wrong to eject some other TempReg which we need to use in 
             this insn.
 
@@ -1783,7 +1813,7 @@
          for (r = 0; r < VG_MAX_REALREGS; r++) {
             is_spill_cand[r] = True;
             for (m = 0; m < k; m++) {
-               if (real_to_temp[r] == tempUse[m].tempNo) {
+               if (real_to_temp[r] == tempUse[m].num) {
                   is_spill_cand[r] = False;
                   break;
                }
@@ -1834,28 +1864,28 @@
          temp_info[real_to_temp[r]].real_no = VG_NOTHING;
          if (temp_info[real_to_temp[r]].dead_before > i) {
             uInstr2(c2, PUT, 4, 
-                        RealReg, VG_(rankToRealRegNo)(r), 
+                        RealReg, VG_(rankToRealRegNum)(r), 
                         SpillNo, temp_info[real_to_temp[r]].spill_no);
             VG_(uinstrs_spill)++;
             spill_reqd = True;
-            if (VG_(disassemble))
+            if (dis)
                VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
          }
 
          /* Decide if tno is read. */
          isRead = False;
          for (m = 0; m < k; m++)
-            if (tempUse[m].tempNo == tno && !tempUse[m].isWrite) 
+            if (tempUse[m].num == tno && !tempUse[m].isWrite) 
                isRead = True;
 
          /* If so, generate a spill load. */
          if (isRead) {
             uInstr2(c2, GET, 4, 
                         SpillNo, temp_info[tno].spill_no, 
-                        RealReg, VG_(rankToRealRegNo)(r) );
+                        RealReg, VG_(rankToRealRegNum)(r) );
             VG_(uinstrs_spill)++;
             spill_reqd = True;
-            if (VG_(disassemble))
+            if (dis)
                VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
          }
 
@@ -1869,19 +1899,18 @@
          and use patchUInstr to convert its rTempRegs into
          realregs. */
       for (j = 0; j < k; j++)
-         tempUse[j].realNo 
-            = VG_(rankToRealRegNo)(temp_info[tempUse[j].tempNo].real_no);
+         realUse[j] = VG_(rankToRealRegNum)(temp_info[tempUse[j].num].real_no);
       VG_(copyUInstr)(c2, &c1->instrs[i]);
-      patchUInstr(&LAST_UINSTR(c2), &tempUse[0], k);
+      patchUInstr(&LAST_UINSTR(c2), &tempUse[0], &realUse[0], k);
 
-      if (VG_(disassemble)) {
+      if (dis) {
          VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
          VG_(printf)("\n");
       }
    }
 
    if (temp_info != NULL)
-      VG_(jitfree)(temp_info);
+      VG_(arena_free)(VG_AR_JITTER, temp_info);
 
    VG_(freeCodeBlock)(c1);
 
@@ -1893,1170 +1922,37 @@
 #  undef VG_NOTHING
 
 }
+extern void fooble(int);
+/* Analysis records liveness of all general-use RealRegs in the UCode. */
+static void vg_realreg_liveness_analysis ( UCodeBlock* cb )
+{        
+   Int      i, j, k;
+   RRegSet  rregs_live;
+   RegUse   regUse[3];
+   UInstr*  u;
 
-
-/*------------------------------------------------------------*/
-/*--- New instrumentation machinery.                       ---*/
-/*------------------------------------------------------------*/
-
-static
-VgTagOp get_VgT_ImproveOR_TQ ( Int sz )
-{
-   switch (sz) {
-      case 4: return VgT_ImproveOR4_TQ;
-      case 2: return VgT_ImproveOR2_TQ;
-      case 1: return VgT_ImproveOR1_TQ;
-      default: VG_(panic)("get_VgT_ImproveOR_TQ");
-   }
-}
-
-
-static
-VgTagOp get_VgT_ImproveAND_TQ ( Int sz )
-{
-   switch (sz) {
-      case 4: return VgT_ImproveAND4_TQ;
-      case 2: return VgT_ImproveAND2_TQ;
-      case 1: return VgT_ImproveAND1_TQ;
-      default: VG_(panic)("get_VgT_ImproveAND_TQ");
-   }
-}
-
-
-static
-VgTagOp get_VgT_Left ( Int sz )
-{
-   switch (sz) {
-      case 4: return VgT_Left4;
-      case 2: return VgT_Left2;
-      case 1: return VgT_Left1;
-      default: VG_(panic)("get_VgT_Left");
-   }
-}
-
-
-static
-VgTagOp get_VgT_UifU ( Int sz )
-{
-   switch (sz) {
-      case 4: return VgT_UifU4;
-      case 2: return VgT_UifU2;
-      case 1: return VgT_UifU1;
-      case 0: return VgT_UifU0;
-      default: VG_(panic)("get_VgT_UifU");
-   }
-}
-
-
-static
-VgTagOp get_VgT_DifD ( Int sz )
-{
-   switch (sz) {
-      case 4: return VgT_DifD4;
-      case 2: return VgT_DifD2;
-      case 1: return VgT_DifD1;
-      default: VG_(panic)("get_VgT_DifD");
-   }
-}
-
-
-static 
-VgTagOp get_VgT_PCast ( Int szs, Int szd )
-{
-   if (szs == 4 && szd == 0) return VgT_PCast40;
-   if (szs == 2 && szd == 0) return VgT_PCast20;
-   if (szs == 1 && szd == 0) return VgT_PCast10;
-   if (szs == 0 && szd == 1) return VgT_PCast01;
-   if (szs == 0 && szd == 2) return VgT_PCast02;
-   if (szs == 0 && szd == 4) return VgT_PCast04;
-   if (szs == 1 && szd == 4) return VgT_PCast14;
-   if (szs == 1 && szd == 2) return VgT_PCast12;
-   if (szs == 1 && szd == 1) return VgT_PCast11;
-   VG_(printf)("get_VgT_PCast(%d,%d)\n", szs, szd);
-   VG_(panic)("get_VgT_PCast");
-}
-
-
-static 
-VgTagOp get_VgT_Widen ( Bool syned, Int szs, Int szd )
-{
-   if (szs == 1 && szd == 2 && syned)  return VgT_SWiden12;
-   if (szs == 1 && szd == 2 && !syned) return VgT_ZWiden12;
-
-   if (szs == 1 && szd == 4 && syned)  return VgT_SWiden14;
-   if (szs == 1 && szd == 4 && !syned) return VgT_ZWiden14;
-
-   if (szs == 2 && szd == 4 && syned)  return VgT_SWiden24;
-   if (szs == 2 && szd == 4 && !syned) return VgT_ZWiden24;
-
-   VG_(printf)("get_VgT_Widen(%d,%d,%d)\n", (Int)syned, szs, szd);
-   VG_(panic)("get_VgT_Widen");
-}
-
-/* Pessimally cast the spec'd shadow from one size to another. */
-static 
-void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg )
-{
-   if (szs == 0 && szd == 0)
-      return;
-   uInstr3(cb, TAG1, 0, TempReg, tempreg, 
-                        NoValue, 0, 
-                        Lit16,   get_VgT_PCast(szs,szd));
-}
-
-
-/* Create a signed or unsigned widen of the spec'd shadow from one
-   size to another.  The only allowed size transitions are 1->2, 1->4
-   and 2->4. */
-static 
-void create_Widen ( UCodeBlock* cb, Bool signed_widen,
-                    Int szs, Int szd, Int tempreg )
-{
-   if (szs == szd) return;
-   uInstr3(cb, TAG1, 0, TempReg, tempreg, 
-                        NoValue, 0, 
-                        Lit16,   get_VgT_Widen(signed_widen,szs,szd));
-}
-
-
-/* Get the condition codes into a new shadow, at the given size. */
-static
-Int create_GETVF ( UCodeBlock* cb, Int sz )
-{
-   Int tt = newShadow(cb);
-   uInstr1(cb, GETVF, 0, TempReg, tt);
-   create_PCast(cb, 0, sz, tt);
-   return tt;
-}
-
-
-/* Save the condition codes from the spec'd shadow. */
-static
-void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg )
-{
-   if (sz == 0) {
-      uInstr1(cb, PUTVF, 0, TempReg, tempreg);
-   } else { 
-      Int tt = newShadow(cb);
-      uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt);
-      create_PCast(cb, sz, 0, tt);
-      uInstr1(cb, PUTVF, 0, TempReg, tt);
-   }
-}
-
-
-/* Do Left on the spec'd shadow. */
-static 
-void create_Left ( UCodeBlock* cb, Int sz, Int tempreg )
-{
-   uInstr3(cb, TAG1, 0, 
-               TempReg, tempreg,
-               NoValue, 0, 
-               Lit16, get_VgT_Left(sz));
-}
-
-
-/* Do UifU on ts and td, putting the result in td. */
-static 
-void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td )
-{
-   uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
-               Lit16, get_VgT_UifU(sz));
-}
-
-
-/* Do DifD on ts and td, putting the result in td. */
-static 
-void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td )
-{
-   uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
-               Lit16, get_VgT_DifD(sz));
-}
-
-
-/* Do HelpAND on value tval and tag tqqq, putting the result in
-   tqqq. */
-static 
-void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
-{
-   uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
-               Lit16, get_VgT_ImproveAND_TQ(sz));
-}
-
-
-/* Do HelpOR on value tval and tag tqqq, putting the result in
-   tqqq. */
-static 
-void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
-{
-   uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
-               Lit16, get_VgT_ImproveOR_TQ(sz));
-}
-
-
-/* Get the shadow for an operand described by (tag, val).  Emit code
-   to do this and return the identity of the shadow holding the
-   result.  The result tag is always copied into a new shadow, so it
-   can be modified without trashing the original.*/
-static
-Int /* TempReg */ getOperandShadow ( UCodeBlock* cb, 
-                                     Int sz, Int tag, Int val )
-{
-   Int sh;
-   sh = newShadow(cb);
-   if (tag == TempReg) {
-      uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh);
-      return sh;
-   }
-   if (tag == Literal) {
-      uInstr1(cb, SETV, sz, TempReg, sh);
-      return sh;
-   }
-   if (tag == ArchReg) {
-      uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh);
-      return sh;
-   }
-   VG_(panic)("getOperandShadow");
-}
-
-
-
-/* Create and return an instrumented version of cb_in.  Free cb_in
-   before returning. */
-static UCodeBlock* vg_instrument ( UCodeBlock* cb_in )
-{
-   UCodeBlock* cb;
-   Int         i, j;
-   UInstr*     u_in;
-   Int         qs, qd, qt, qtt;
-   cb = VG_(allocCodeBlock)();
-   cb->nextTemp = cb_in->nextTemp;
-
-   for (i = 0; i < cb_in->used; i++) {
-      qs = qd = qt = qtt = INVALID_TEMPREG;
-      u_in = &cb_in->instrs[i];
-
-      /* if (i > 0) uInstr1(cb, NOP, 0, NoValue, 0); */
-
-      /* VG_(ppUInstr)(0, u_in); */
-      switch (u_in->opcode) {
-
-         case NOP:
-            break;
-
-         case INCEIP:
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Loads and stores.  Test the V bits for the address.  24
-            Mar 02: since the address is A-checked anyway, there's not
-            really much point in doing the V-check too, unless you
-            think that you might use addresses which are undefined but
-            still addressible.  Hence the optionalisation of the V
-            check.
-
-            The LOADV/STOREV does an addressibility check for the
-            address. */
-
-         case LOAD: 
-            if (VG_(clo_check_addrVs)) {
-               uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
-               uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
-            }
-            uInstr2(cb, LOADV, u_in->size, 
-                        TempReg, u_in->val1,
-                        TempReg, SHADOW(u_in->val2));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         case STORE:
-            if (VG_(clo_check_addrVs)) {
-               uInstr1(cb, TESTV,  4, TempReg, SHADOW(u_in->val2));
-               uInstr1(cb, SETV,   4, TempReg, SHADOW(u_in->val2));
-            }
-            uInstr2(cb, STOREV, u_in->size,
-                        TempReg, SHADOW(u_in->val1), 
-                        TempReg, u_in->val2);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Moving stuff around.  Make the V bits follow accordingly,
-            but don't do anything else.  */
-
-         case GET:
-            uInstr2(cb, GETV, u_in->size,
-                        ArchReg, u_in->val1,
-                        TempReg, SHADOW(u_in->val2));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         case PUT:
-            uInstr2(cb, PUTV, u_in->size, 
-                        TempReg, SHADOW(u_in->val1),
-                        ArchReg, u_in->val2);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         case GETF:
-            /* This is not the smartest way to do it, but should work. */
-            qd = create_GETVF(cb, u_in->size);
-            uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         case PUTF:
-            create_PUTVF(cb, u_in->size, SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         case MOV:
-            switch (u_in->tag1) {
-               case TempReg: 
-                  uInstr2(cb, MOV, 4,
-                              TempReg, SHADOW(u_in->val1),
-                              TempReg, SHADOW(u_in->val2));
-                  break;
-               case Literal: 
-                  uInstr1(cb, SETV, u_in->size, 
-                              TempReg, SHADOW(u_in->val2));
-                  break;
-               default: 
-                  VG_(panic)("vg_instrument: MOV");
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Special case of add, where one of the operands is a literal.
-            lea1(t) = t + some literal.
-            Therefore: lea1#(qa) = left(qa) 
-         */
-         case LEA1:
-            vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
-            qs = SHADOW(u_in->val1);
-            qd = SHADOW(u_in->val2);
-            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd);
-            create_Left(cb, u_in->size, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Another form of add.  
-            lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal
-                                and is 0,1,2 or 3.
-            lea2#(qs,qt) = left(qs `UifU` (qt << shift)).
-            Note, subtly, that the shift puts zeroes at the bottom of qt,
-            meaning Valid, since the corresponding shift of tt puts 
-            zeroes at the bottom of tb.
-         */
-         case LEA2: {
-            Int shift;
-            vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
-            switch (u_in->extra4b) {
-               case 1: shift = 0; break;
-               case 2: shift = 1; break;
-               case 4: shift = 2; break;
-               case 8: shift = 3; break;
-               default: VG_(panic)( "vg_instrument(LEA2)" );
-            }
-            qs = SHADOW(u_in->val1);
-            qt = SHADOW(u_in->val2);
-            qd = SHADOW(u_in->val3);
-            uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd);
-            if (shift > 0) {
-               uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd);
-               uLiteral(cb, shift);
-            }
-            create_UifU(cb, 4, qs, qd);
-            create_Left(cb, u_in->size, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         }
-
-         /* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */
-         case INC: case DEC:
-            qd = SHADOW(u_in->val1);
-            create_Left(cb, u_in->size, qd);
-            if (u_in->flags_w != FlagsEmpty)
-               create_PUTVF(cb, u_in->size, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* This is a HACK (approximation :-) */
-         /* rcl#/rcr#(qs,qd) 
-               = let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags#
-                 eflags# = q0
-                 qd =pcast-0-sz(q0)
-            Ie, cast everything down to a single bit, then back up.
-            This assumes that any bad bits infect the whole word and 
-            the eflags.
-         */
-         case RCL: case RCR:
-	    vg_assert(u_in->flags_r != FlagsEmpty);
-            /* The following assertion looks like it makes sense, but is
-               actually wrong.  Consider this:
-                  rcll    %eax
-                  imull   %eax, %eax
-               The rcll writes O and C but so does the imull, so the O and C 
-               write of the rcll is annulled by the prior improvement pass.
-               Noticed by Kevin Ryde <user42@zip.com.au>
-            */
-	    /* vg_assert(u_in->flags_w != FlagsEmpty); */
-            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
-            /* We can safely modify qs; cast it to 0-size. */
-            create_PCast(cb, u_in->size, 0, qs);
-            qd = SHADOW(u_in->val2);
-            create_PCast(cb, u_in->size, 0, qd);
-            /* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */
-            create_UifU(cb, 0, qs, qd);
-            /* qs is now free; reuse it for the flag definedness. */
-            qs = create_GETVF(cb, 0);
-            create_UifU(cb, 0, qs, qd);
-            create_PUTVF(cb, 0, qd);
-            create_PCast(cb, 0, u_in->size, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* for OP in shl shr sar rol ror
-            (qs is shift count#, qd is value to be OP#d)
-            OP(ts,td)
-            OP#(qs,qd)
-               = pcast-1-sz(qs) `UifU` OP(ts,qd)
-            So we apply OP to the tag bits too, and then UifU with
-            the shift count# to take account of the possibility of it
-            being undefined.
+   /* All regs are dead at the end of the block */
+   rregs_live = ALL_RREGS_DEAD;
             
-            A bit subtle:
-               ROL/ROR rearrange the tag bits as per the value bits.
-               SHL/SHR shifts zeroes into the value, and corresponding 
-                  zeroes indicating Definedness into the tag.
-               SAR copies the top bit of the value downwards, and therefore
-                  SAR also copies the definedness of the top bit too.
-            So in all five cases, we just apply the same op to the tag 
-            bits as is applied to the value bits.  Neat!
-         */
-         case SHL:
-         case SHR: case SAR:
-         case ROL: case ROR: {
-            Int t_amount = INVALID_TEMPREG;
-            vg_assert(u_in->tag1 == TempReg || u_in->tag1 == Literal);
-            vg_assert(u_in->tag2 == TempReg);
-            qd = SHADOW(u_in->val2);
-
-            /* Make qs hold shift-count# and make
-               t_amount be a TempReg holding the shift count. */
-            if (u_in->tag1 == Literal) {
-               t_amount = newTemp(cb);
-               uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount);
-               uLiteral(cb, u_in->lit32);
-               qs = SHADOW(t_amount);
-               uInstr1(cb, SETV, 1, TempReg, qs);
-            } else {
-               t_amount = u_in->val1;
-               qs = SHADOW(u_in->val1);
-            }
-
-            uInstr2(cb, u_in->opcode, 
-                        u_in->size, 
-                        TempReg, t_amount, 
-                        TempReg, qd);
-            qt = newShadow(cb);
-            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
-            create_PCast(cb, 1, u_in->size, qt);
-            create_UifU(cb, u_in->size, qt, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         }
-
-         /* One simple tag operation. */
-         case WIDEN:
-            vg_assert(u_in->tag1 == TempReg);
-            create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size, 
-                             SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* not#(x) = x (since bitwise independent) */
-         case NOT:
-            vg_assert(u_in->tag1 == TempReg);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* neg#(x) = left(x) (derivable from case for SUB) */
-         case NEG:
-            vg_assert(u_in->tag1 == TempReg);
-            create_Left(cb, u_in->size, SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* bswap#(x) = bswap(x) */
-         case BSWAP:
-            vg_assert(u_in->tag1 == TempReg);
-            vg_assert(u_in->size == 4);
-            qd = SHADOW(u_in->val1);
-            uInstr1(cb, BSWAP, 4, TempReg, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* cc2val#(qd) = pcast-0-to-size(eflags#) */
-         case CC2VAL:
-            vg_assert(u_in->tag1 == TempReg);
-            vg_assert(u_in->flags_r != FlagsEmpty);
-            qt = create_GETVF(cb, u_in->size);
-            uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* cmov#(qs,qd) = cmov(qs,qd)
-            That is, do the cmov of tags using the same flags as for
-            the data (obviously).  However, first do a test on the 
-            validity of the flags.
-         */
-         case CMOV:
-            vg_assert(u_in->size == 4);
-            vg_assert(u_in->tag1 == TempReg);
-            vg_assert(u_in->tag2 == TempReg);
-            vg_assert(u_in->flags_r != FlagsEmpty);
-            vg_assert(u_in->flags_w == FlagsEmpty);
-            qs = SHADOW(u_in->val1);
-            qd = SHADOW(u_in->val2);
-            qt = create_GETVF(cb, 0);
-            uInstr1(cb, TESTV, 0, TempReg, qt);
-            /* qt should never be referred to again.  Nevertheless
-               ... */
-            uInstr1(cb, SETV, 0, TempReg, qt);
-
-            uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd);
-            LAST_UINSTR(cb).cond    = u_in->cond;
-            LAST_UINSTR(cb).flags_r = u_in->flags_r;
-
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* add#/sub#(qs,qd) 
-               = qs `UifU` qd `UifU` left(qs) `UifU` left(qd)
-               = left(qs) `UifU` left(qd)
-               = left(qs `UifU` qd)
-            adc#/sbb#(qs,qd)
-               = left(qs `UifU` qd) `UifU` pcast(eflags#)
-            Second arg (dest) is TempReg.
-            First arg (src) is Literal or TempReg or ArchReg. 
-         */
-         case ADD: case SUB:
-         case ADC: case SBB:
-            qd = SHADOW(u_in->val2);
-            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
-            create_UifU(cb, u_in->size, qs, qd);
-            create_Left(cb, u_in->size, qd);
-            if (u_in->opcode == ADC || u_in->opcode == SBB) {
-               vg_assert(u_in->flags_r != FlagsEmpty);
-               qt = create_GETVF(cb, u_in->size);
-               create_UifU(cb, u_in->size, qt, qd);
-            }
-            if (u_in->flags_w != FlagsEmpty) {
-               create_PUTVF(cb, u_in->size, qd);
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* xor#(qs,qd) = qs `UifU` qd */
-         case XOR:
-            qd = SHADOW(u_in->val2);
-            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
-            create_UifU(cb, u_in->size, qs, qd);
-            if (u_in->flags_w != FlagsEmpty) {
-               create_PUTVF(cb, u_in->size, qd);
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* and#/or#(qs,qd) 
-               = (qs `UifU` qd) `DifD` improve(vs,qs) 
-                                `DifD` improve(vd,qd)
-            where improve is the relevant one of
-                Improve{AND,OR}_TQ
-            Use the following steps, with qt as a temp:
-               qt = improve(vd,qd)
-               qd = qs `UifU` qd
-               qd = qt `DifD` qd
-               qt = improve(vs,qs)
-               qd = qt `DifD` qd
-         */
-         case AND: case OR:
-            vg_assert(u_in->tag1 == TempReg);
-            vg_assert(u_in->tag2 == TempReg);
-            qd = SHADOW(u_in->val2);
-            qs = SHADOW(u_in->val1);
-            qt = newShadow(cb);
-
-            /* qt = improve(vd,qd) */
-            uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt);
-            if (u_in->opcode == AND)
-               create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt);
-            else
-               create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt);
-            /* qd = qs `UifU` qd */
-            create_UifU(cb, u_in->size, qs, qd);
-            /* qd = qt `DifD` qd */
-            create_DifD(cb, u_in->size, qt, qd);
-            /* qt = improve(vs,qs) */
-            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
-            if (u_in->opcode == AND)
-               create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt);
-            else
-               create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt);
-            /* qd = qt `DifD` qd */
-               create_DifD(cb, u_in->size, qt, qd);
-            /* So, finally qd is the result tag. */
-            if (u_in->flags_w != FlagsEmpty) {
-               create_PUTVF(cb, u_in->size, qd);
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Machinery to do with supporting CALLM.  Copy the start and
-            end markers only to make the result easier to read
-            (debug); they generate no code and have no effect. 
-         */
-         case CALLM_S: case CALLM_E:
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Copy PUSH and POP verbatim.  Arg/result absval
-            calculations are done when the associated CALL is
-            processed.  CLEAR has no effect on absval calculations but
-            needs to be copied.  
-         */
-         case PUSH: case POP: case CLEAR:
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* In short:
-               callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#)
-            We have to decide on a size to do the computation at,
-            although the choice doesn't affect correctness.  We will
-            do a pcast to the final size anyway, so the only important
-            factor is to choose a size which minimises the total
-            number of casts needed.  Valgrind: just use size 0,
-            regardless.  It may not be very good for performance
-            but does simplify matters, mainly by reducing the number
-            of different pessimising casts which have to be implemented.
-         */
-         case CALLM: {
-            UInstr* uu;
-            Bool res_used;
-
-            /* Now generate the code.  Get the final result absval
-               into qt. */
-            qt  = newShadow(cb);
-            qtt = newShadow(cb);
-            uInstr1(cb, SETV, 0, TempReg, qt);
-            for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) {
-               uu = & cb_in->instrs[j];
-               if (uu->opcode != PUSH) continue;
-               /* cast via a temporary */
-               uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1),
-                                   TempReg, qtt);
-               create_PCast(cb, uu->size, 0, qtt);
-               create_UifU(cb, 0, qtt, qt);
-            }
-            /* Remembering also that flags read count as inputs. */
-            if (u_in->flags_r != FlagsEmpty) {
-               qtt = create_GETVF(cb, 0);
-               create_UifU(cb, 0, qtt, qt);
-            }
-
-            /* qt now holds the result tag.  If any results from the
-               call are used, either by fetching with POP or
-               implicitly by writing the flags, we copy the result
-               absval to the relevant location.  If not used, the call
-               must have been for its side effects, so we test qt here
-               and now.  Note that this assumes that all values
-               removed by POP continue to be live.  So dead args
-               *must* be removed with CLEAR, not by POPping them into
-               a dummy tempreg. 
-            */
-            res_used = False;
-            for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) {
-               uu = & cb_in->instrs[j];
-               if (uu->opcode != POP) continue;
-               /* Cast via a temp. */
-               uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt);
-               create_PCast(cb, 0, uu->size, qtt);
-               uInstr2(cb, MOV, 4, TempReg, qtt, 
-                                   TempReg, SHADOW(uu->val1));
-               res_used = True;
-            }
-            if (u_in->flags_w != FlagsEmpty) {
-               create_PUTVF(cb, 0, qt);
-               res_used = True;
-            }
-            if (!res_used) {
-               uInstr1(cb, TESTV, 0, TempReg, qt);
-               /* qt should never be referred to again.  Nevertheless
-                  ... */
-               uInstr1(cb, SETV, 0, TempReg, qt);
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         }
-         /* Whew ... */
-
-         case JMP:
-            if (u_in->tag1 == TempReg) {
-               uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
-               uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
-            } else {
-               vg_assert(u_in->tag1 == Literal);
-            }
-            if (u_in->cond != CondAlways) {
-               vg_assert(u_in->flags_r != FlagsEmpty);
-               qt = create_GETVF(cb, 0);
-               uInstr1(cb, TESTV, 0, TempReg, qt);
-               /* qt should never be referred to again.  Nevertheless
-                  ... */
-               uInstr1(cb, SETV, 0, TempReg, qt);
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         case JIFZ:
-            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
-            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Emit a check on the address used.  For FPU_R, the value
-            loaded into the FPU is checked at the time it is read from
-            memory (see synth_fpu_mem_check_actions).  */
-         case FPU_R: case FPU_W:
-            vg_assert(u_in->tag2 == TempReg);
-            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
-            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val2));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* For FPU insns not referencing memory, just copy thru. */
-         case FPU: 
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         default:
-            VG_(ppUInstr)(0, u_in);
-            VG_(panic)( "vg_instrument: unhandled case");
-
-      } /* end of switch (u_in->opcode) */
-
-   } /* end of for loop */
-
-   VG_(freeCodeBlock)(cb_in);
-   return cb;
-}
-
-/*------------------------------------------------------------*/
-/*--- Clean up mem check instrumentation.                  ---*/
-/*------------------------------------------------------------*/
-
-#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1)
-#define VGC_UNDEF ((UChar)100)
-#define VGC_VALUE ((UChar)101)
-
-#define NOP_no_msg(uu)                                         \
-   do { uu->opcode = NOP; } while (False)
-
-#define NOP_tag1_op(uu)                                        \
-   do { uu->opcode = NOP;                                      \
-        if (VG_(disassemble))                                  \
-           VG_(printf)("at %d: delete %s due to defd arg\n",   \
-                       i, VG_(nameOfTagOp(u->val3)));          \
-   } while (False)
-
-#define SETV_tag1_op(uu,newsz)                                 \
-   do { uu->opcode = SETV;                                     \
-        uu->size = newsz;                                      \
-        uu->tag2 = uu->tag3 = NoValue;                         \
-        if (VG_(disassemble))                                  \
-           VG_(printf)("at %d: convert %s to SETV%d "          \
-                       "due to defd arg\n",                    \
-                       i, VG_(nameOfTagOp(u->val3)), newsz);   \
-   } while (False)
-
-
-
-/* Run backwards and delete SETVs on shadow temps for which the next
-   action is a write.  Needs an env saying whether or not the next
-   action is a write.  The supplied UCodeBlock is destructively
-   modified.
-*/
-static void vg_delete_redundant_SETVs ( UCodeBlock* cb )
-{
-   Bool*   next_is_write;
-   Int     i, j, k, n_temps;
-   UInstr* u;
-   TempUse tempUse[3];
-
-   n_temps = cb->nextTemp;
-   if (n_temps == 0) return;
-
-   next_is_write = VG_(jitmalloc)(n_temps * sizeof(Bool));
-
-   for (i = 0; i < n_temps; i++) next_is_write[i] = True;
-
    for (i = cb->used-1; i >= 0; i--) {
       u = &cb->instrs[i];
 
-      /* If we're not checking address V bits, there will be a lot of
-         GETVs, TAG1s and TAG2s calculating values which are never
-         used.  These first three cases get rid of them. */
+      u->regs_live_after = rregs_live;
 
-      if (u->opcode == GETV && VGC_IS_SHADOW(u->val2) 
-                            && next_is_write[u->val2]
-                            && !VG_(clo_check_addrVs)) {
-         u->opcode = NOP;
-         u->size = 0;
-         if (VG_(disassemble)) 
-            VG_(printf)("at %d: delete GETV\n", i);
-      } else
+      k = VG_(getRegUsage)(u, RealReg, regUse);
 
-      if (u->opcode == TAG1 && VGC_IS_SHADOW(u->val1) 
-                            && next_is_write[u->val1]
-                            && !VG_(clo_check_addrVs)) {
-         u->opcode = NOP;
-         u->size = 0;
-         if (VG_(disassemble)) 
-            VG_(printf)("at %d: delete TAG1\n", i);
-      } else
-
-      if (u->opcode == TAG2 && VGC_IS_SHADOW(u->val2) 
-                            && next_is_write[u->val2]
-                            && !VG_(clo_check_addrVs)) {
-         u->opcode = NOP;
-         u->size = 0;
-         if (VG_(disassemble)) 
-            VG_(printf)("at %d: delete TAG2\n", i);
-      } else
-
-      /* We do the rest of these regardless of whether or not
-         addresses are V-checked. */
-
-      if (u->opcode == MOV && VGC_IS_SHADOW(u->val2) 
-                           && next_is_write[u->val2]) {
-         /* This MOV is pointless because the target is dead at this
-            point.  Delete it. */
-         u->opcode = NOP;
-         u->size = 0;
-         if (VG_(disassemble)) 
-            VG_(printf)("at %d: delete MOV\n", i);
-      } else
-
-      if (u->opcode == SETV) {
-         if (u->tag1 == TempReg) {
-            vg_assert(VGC_IS_SHADOW(u->val1));
-            if (next_is_write[u->val1]) {
-               /* This write is pointless, so annul it. */
-               u->opcode = NOP;
-               u->size = 0;
-               if (VG_(disassemble)) 
-                  VG_(printf)("at %d: delete SETV\n", i);
-            } else {
-               /* This write has a purpose; don't annul it, but do
-                  notice that we did it. */
-               next_is_write[u->val1] = True;
-            }
-              
-         }
-
-      } else {
-         /* Find out what this insn does to the temps. */
-         k = getTempUsage(u, &tempUse[0]);
-         vg_assert(k <= 3);
-         for (j = k-1; j >= 0; j--) {
-            next_is_write[ tempUse[j].tempNo ]
-                         = tempUse[j].isWrite;
-         }
-      }
-
-   }
-
-   VG_(jitfree)(next_is_write);
-}
-
-
-/* Run forwards, propagating and using the is-completely-defined
-   property.  This removes a lot of redundant tag-munging code.
-   Unfortunately it requires intimate knowledge of how each uinstr and
-   tagop modifies its arguments.  This duplicates knowledge of uinstr
-   tempreg uses embodied in getTempUsage(), which is unfortunate. 
-   The supplied UCodeBlock* is modified in-place.
-
-   For each value temp, def[] should hold VGC_VALUE.
-
-   For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is
-   definitely known to be fully defined at that size.  In all other
-   circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly
-   undefined.  In cases of doubt, VGC_UNDEF is always safe.
-*/
-static void vg_propagate_definedness ( UCodeBlock* cb )
-{
-   UChar*  def;
-   Int     i, j, k, t, n_temps;
-   UInstr* u;
-   TempUse tempUse[3];
-
-   n_temps = cb->nextTemp;
-   if (n_temps == 0) return;
-
-   def = VG_(jitmalloc)(n_temps * sizeof(UChar));
-   for (i = 0; i < n_temps; i++) 
-      def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE;
-
-   /* Run forwards, detecting and using the all-defined property. */
-
-   for (i = 0; i < cb->used; i++) {
-      u = &cb->instrs[i];
-      switch (u->opcode) {
-
-      /* Tag-handling uinstrs. */
-
-         /* Deal with these quickly. */
-         case NOP:
-         case INCEIP:
-            break;
-
-         /* Make a tag defined. */
-         case SETV:
-            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-            def[u->val1] = u->size;
-            break;
-
-         /* Check definedness of a tag. */
-         case TESTV:
-            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-            if (def[u->val1] <= 4) { 
-               vg_assert(def[u->val1] == u->size); 
-               NOP_no_msg(u);
-               if (VG_(disassemble)) 
-                  VG_(printf)("at %d: delete TESTV on defd arg\n", i);
-            }
-            break;
-
-         /* Applies to both values and tags.  Propagate Definedness
-            property through copies.  Note that this isn't optional;
-            we *have* to do this to keep def[] correct. */
-         case MOV:
-            vg_assert(u->tag2 == TempReg);
-            if (u->tag1 == TempReg) {
-               if (VGC_IS_SHADOW(u->val1)) {
-                  vg_assert(VGC_IS_SHADOW(u->val2));
-                  def[u->val2] = def[u->val1];
-               }
-            }
-            break;
-
-         case PUTV:
-            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-            if (def[u->val1] <= 4) {
-               vg_assert(def[u->val1] == u->size);
-               u->tag1 = Literal;
-               u->val1 = 0;
-               switch (u->size) {
-                  case 4: u->lit32 = 0x00000000; break;
-                  case 2: u->lit32 = 0xFFFF0000; break;
-                  case 1: u->lit32 = 0xFFFFFF00; break;
-                  default: VG_(panic)("vg_cleanup(PUTV)");
-               }
-               if (VG_(disassemble)) 
-                  VG_(printf)(
-                     "at %d: propagate definedness into PUTV\n", i);
-            }
-            break;
-
-         case STOREV:
-            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-            if (def[u->val1] <= 4) {
-               vg_assert(def[u->val1] == u->size);
-               u->tag1 = Literal;
-               u->val1 = 0;
-               switch (u->size) {
-                  case 4: u->lit32 = 0x00000000; break;
-                  case 2: u->lit32 = 0xFFFF0000; break;
-                  case 1: u->lit32 = 0xFFFFFF00; break;
-                  default: VG_(panic)("vg_cleanup(STOREV)");
-               }
-               if (VG_(disassemble)) 
-                  VG_(printf)(
-                     "at %d: propagate definedness into STandV\n", i);
-            }
-            break;
-
-         /* Nothing interesting we can do with this, I think. */
-         case PUTVF:
-            break;
-
-         /* Tag handling operations. */
-         case TAG2:
-            vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
-            vg_assert(u->tag3 == Lit16);
-            /* Ultra-paranoid "type" checking. */
-            switch (u->val3) {
-               case VgT_ImproveAND4_TQ: case VgT_ImproveAND2_TQ:
-               case VgT_ImproveAND1_TQ: case VgT_ImproveOR4_TQ:
-               case VgT_ImproveOR2_TQ: case VgT_ImproveOR1_TQ:
-                  vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1));
-                  break;
-               default:
-                  vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-                  break;
-            }
-            switch (u->val3) {
-               Int sz;
-               case VgT_UifU4: 
-                  sz = 4; goto do_UifU;
-               case VgT_UifU2: 
-                  sz = 2; goto do_UifU;
-               case VgT_UifU1:
-                  sz = 1; goto do_UifU;
-               case VgT_UifU0:
-                  sz = 0; goto do_UifU;
-               do_UifU:
-                  vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-                  vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
-                  if (def[u->val1] <= 4) {
-                     /* UifU.  The first arg is defined, so result is
-                        simply second arg.  Delete this operation. */
-                     vg_assert(def[u->val1] == sz);
-                     NOP_no_msg(u);
-                     if (VG_(disassemble)) 
-                        VG_(printf)(
-                           "at %d: delete UifU%d due to defd arg1\n", 
-                           i, sz);
-                  }
-                  else 
-                  if (def[u->val2] <= 4) {
-                     /* UifU.  The second arg is defined, so result is
-                        simply first arg.  Copy to second. */
-                     vg_assert(def[u->val2] == sz);
-                     u->opcode = MOV; 
-                     u->size = 4;
-                     u->tag3 = NoValue;
-                     def[u->val2] = def[u->val1];
-                     if (VG_(disassemble)) 
-                        VG_(printf)(
-                           "at %d: change UifU%d to MOV due to defd"
-                           " arg2\n", 
-                           i, sz);
-                  }
-                  break;
-               case VgT_ImproveAND4_TQ:
-                  sz = 4; goto do_ImproveAND;
-               case VgT_ImproveAND1_TQ:
-                  sz = 1; goto do_ImproveAND;
-               do_ImproveAND:
-                  /* Implements Q = T OR Q.  So if Q is entirely defined,
-                     ie all 0s, we get MOV T, Q. */
-		  if (def[u->val2] <= 4) {
-                     vg_assert(def[u->val2] == sz);
-                     u->size = 4; /* Regardless of sz */
-                     u->opcode = MOV;
-                     u->tag3 = NoValue;
-                     def[u->val2] = VGC_UNDEF;
-                     if (VG_(disassemble)) 
-                        VG_(printf)(
-                            "at %d: change ImproveAND%d_TQ to MOV due "
-                            "to defd arg2\n", 
-                            i, sz);
-                  }
-                  break;
-               default: 
-                  goto unhandled;
-            }
-            break;
-
-         case TAG1:
-            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-            if (def[u->val1] > 4) break;
-            /* We now know that the arg to the op is entirely defined.
-               If the op changes the size of the arg, we must replace
-               it with a SETV at the new size.  If it doesn't change
-               the size, we can delete it completely. */
-            switch (u->val3) {
-               /* Maintain the same size ... */
-               case VgT_Left4: 
-                  vg_assert(def[u->val1] == 4);
-                  NOP_tag1_op(u);
-                  break;
-               case VgT_PCast11: 
-                  vg_assert(def[u->val1] == 1);
-                  NOP_tag1_op(u);
-                  break;
-               /* Change size ... */
-               case VgT_PCast40: 
-                  vg_assert(def[u->val1] == 4);
-                  SETV_tag1_op(u,0);
-                  def[u->val1] = 0;
-                  break;
-               case VgT_PCast14: 
-                  vg_assert(def[u->val1] == 1);
-                  SETV_tag1_op(u,4);
-                  def[u->val1] = 4;
-                  break;
-               case VgT_PCast12: 
-                  vg_assert(def[u->val1] == 1);
-                  SETV_tag1_op(u,2);
-                  def[u->val1] = 2;
-                  break;
-               case VgT_PCast10: 
-                  vg_assert(def[u->val1] == 1);
-                  SETV_tag1_op(u,0);
-                  def[u->val1] = 0;
-                  break;
-               case VgT_PCast02: 
-                  vg_assert(def[u->val1] == 0);
-                  SETV_tag1_op(u,2);
-                  def[u->val1] = 2;
-                  break;
-               default: 
-                  goto unhandled;
-            }
-            if (VG_(disassemble)) 
-               VG_(printf)(
-                  "at %d: delete TAG1 %s due to defd arg\n",
-                  i, VG_(nameOfTagOp(u->val3)));
-            break;
-
-         default:
-         unhandled:
-            /* We don't know how to handle this uinstr.  Be safe, and 
-               set to VGC_VALUE or VGC_UNDEF all temps written by it. */
-            k = getTempUsage(u, &tempUse[0]);
-            vg_assert(k <= 3);
-            for (j = 0; j < k; j++) {
-               t = tempUse[j].tempNo;
-               vg_assert(t >= 0 && t < n_temps);
-               if (!tempUse[j].isWrite) {
-                  /* t is read; ignore it. */
-                  if (0&& VGC_IS_SHADOW(t) && def[t] <= 4)
-                     VG_(printf)("ignoring def %d at %s %s\n", 
-                                 def[t], 
-                                 VG_(nameUOpcode)(True, u->opcode),
-                                 (u->opcode == TAG1 || u->opcode == TAG2)
-                                    ? VG_(nameOfTagOp)(u->val3) 
-                                    : (Char*)"");
-               } else {
-                  /* t is written; better nullify it. */
-                  def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE;
-               }
-            }
+      /* For each reg usage ... bwds in program order.  Variable is live
+         before this UInstr if it is read by this UInstr.
+         Note that regUse[j].num holds the Intel reg number, so we must
+         convert it to our rank number.  */
+      for (j = k-1; j >= 0; j--) {
+         SET_RREG_LIVENESS ( VG_(realRegNumToRank)(regUse[j].num),
+                             rregs_live,
+                             !regUse[j].isWrite );
       }
    }
-
-   VG_(jitfree)(def);
 }
 
-
-/* Top level post-instrumentation cleanup function. */
-static void vg_cleanup ( UCodeBlock* cb )
-{
-   vg_propagate_definedness ( cb );
-   vg_delete_redundant_SETVs ( cb );
-}
-
-
 /*------------------------------------------------------------*/
 /*--- Main entry point for the JITter.                     ---*/
 /*------------------------------------------------------------*/
@@ -3068,13 +1964,14 @@
    this call is being done for debugging purposes, in which case (a)
    throw away the translation once it is made, and (b) produce a load
    of debugging output. 
+
+   'tst' is the identity of the thread needing this block.
 */
-void VG_(translate) ( ThreadState* tst, 
-                         /* Identity of thread needing this block */
-                      Addr  orig_addr,
-                      UInt* orig_size,
-                      Addr* trans_addr,
-                      UInt* trans_size )
+void VG_(translate) ( /*IN*/  ThreadState* tst, 
+		      /*IN*/  Addr  orig_addr,  
+                      /*OUT*/ UInt* orig_size,
+                      /*OUT*/ Addr* trans_addr, 
+                      /*OUT*/ UInt* trans_size )
 {
    Int         n_disassembled_bytes, final_code_size;
    Bool        debugging_translation;
@@ -3085,109 +1982,82 @@
    debugging_translation
       = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
 
-   dis = True;
-   dis = debugging_translation;
+   if (!debugging_translation)
+      VG_TRACK( pre_mem_read, Vg_CoreTranslate, tst, "", orig_addr, 1 );
 
-   /* Check if we're being asked to jump to a silly address, and if so
-      record an error message before potentially crashing the entire
-      system. */
-   if (VG_(clo_instrument) && !debugging_translation && !dis) {
-      Addr bad_addr;
-      Bool ok = VGM_(check_readable) ( orig_addr, 1, &bad_addr );
-      if (!ok) {
-         VG_(record_jump_error)(tst, bad_addr);
-      }
-   }
-
-   /* if (VG_(overall_in_count) >= 4800) dis=True; */
-   if (VG_(disassemble))
-      VG_(printf)("\n");
-   if (0 || dis 
-       || (VG_(overall_in_count) > 0 &&
-           (VG_(overall_in_count) % 1000 == 0))) {
-      if (0&& (VG_(clo_verbosity) > 1 || dis))
-         VG_(message)(Vg_UserMsg,
-              "trans# %d, bb# %lu, in %d, out %d",
-              VG_(overall_in_count), 
-              VG_(bbs_done),
-              VG_(overall_in_osize), VG_(overall_in_tsize),
-              orig_addr );
-   }
    cb = VG_(allocCodeBlock)();
 
+   /* If doing any code printing, print a basic block start marker */
+   if (VG_(clo_trace_codegen)) {
+      Char fnname[64] = "";
+      VG_(get_fnname_if_entry)(orig_addr, fnname, 64);
+      VG_(printf)(
+              "==== BB %d %s(%p) in %dB, out %dB, BBs exec'd %lu ====\n\n",
+              VG_(overall_in_count), fnname, orig_addr, 
+              VG_(overall_in_osize), VG_(overall_in_tsize),
+              VG_(bbs_done));
+   }
+
+   /* True if a debug trans., or if bit N set in VG_(clo_trace_codegen). */
+#  define DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(n) \
+      ( debugging_translation || (VG_(clo_trace_codegen) & (1 << (n-1))) )
+
    /* Disassemble this basic block into cb. */
-   /* VGP_PUSHCC(VgpToUCode); */
+   VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(1);
+   VGP_PUSHCC(VgpToUCode);
    n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
-   /* VGP_POPCC; */
-   /* dis=True; */
-   /* if (0&& VG_(translations_done) < 617)  */
-   /*    dis=False; */
+   VGP_POPCC(VgpToUCode);
+
    /* Try and improve the code a bit. */
    if (VG_(clo_optimise)) {
-      /* VGP_PUSHCC(VgpImprove); */
+      VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(2);
+      VGP_PUSHCC(VgpImprove);
       vg_improve ( cb );
-      if (VG_(disassemble)) 
-         VG_(ppUCodeBlock) ( cb, "Improved code:" );
-      /* VGP_POPCC; */
-   }
-   /* dis=False; */
-   /* Add instrumentation code. */
-   if (VG_(clo_instrument)) {
-      /* VGP_PUSHCC(VgpInstrument); */
-      cb = vg_instrument(cb);
-      /* VGP_POPCC; */
-      if (VG_(disassemble)) 
-         VG_(ppUCodeBlock) ( cb, "Instrumented code:" );
-      if (VG_(clo_cleanup)) {
-         /* VGP_PUSHCC(VgpCleanup); */
-         vg_cleanup(cb);
-         /* VGP_POPCC; */
-         if (VG_(disassemble)) 
-            VG_(ppUCodeBlock) ( cb, "Cleaned-up instrumented code:" );
-      }
+      VGP_POPCC(VgpImprove);
    }
 
-   //VG_(disassemble) = True;
+   /* Skin's instrumentation (Nb: must set VG_(print_codegen) in case
+      SK_(instrument) looks at it. */
+   VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(3);
+   VGP_PUSHCC(VgpInstrument);
+   cb = SK_(instrument) ( cb, orig_addr );
+   if (VG_(print_codegen))
+      VG_(ppUCodeBlock) ( cb, "Instrumented UCode:" );
+   VG_(saneUCodeBlock)( cb );
+   VGP_POPCC(VgpInstrument);
 
-   /* Add cache simulation code. */
-   if (VG_(clo_cachesim)) {
-      /* VGP_PUSHCC(VgpCacheInstrument); */
-      cb = VG_(cachesim_instrument)(cb, orig_addr);
-      /* VGP_POPCC; */
-      if (VG_(disassemble)) 
-         VG_(ppUCodeBlock) ( cb, "Cachesim instrumented code:" );
-   }
-   
-   //VG_(disassemble) = False;
-   
    /* Allocate registers. */
-   /* VGP_PUSHCC(VgpRegAlloc); */
+   VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(4);
+   VGP_PUSHCC(VgpRegAlloc);
    cb = vg_do_register_allocation ( cb );
-   /* VGP_POPCC; */
-   /* dis=False; */
-   /* 
-   if (VG_(disassemble))
-      VG_(ppUCodeBlock) ( cb, "After Register Allocation:");
-   */
+   VGP_POPCC(VgpRegAlloc);
 
-   /* VGP_PUSHCC(VgpFromUcode); */
-   /* NB final_code is allocated with VG_(jitmalloc), not VG_(malloc)
-      and so must be VG_(jitfree)'d. */
+   /* Do post reg-alloc %e[acd]x liveness analysis (too boring to print
+    * anything;  results can be seen when emitting final code). */
+   VGP_PUSHCC(VgpLiveness);
+   vg_realreg_liveness_analysis ( cb );
+   VGP_POPCC(VgpLiveness);
+
+   /* Emit final code */
+   VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(5);
+
+   VGP_PUSHCC(VgpFromUcode);
    final_code = VG_(emit_code)(cb, &final_code_size );
-   /* VGP_POPCC; */
+   VGP_POPCC(VgpFromUcode);
    VG_(freeCodeBlock)(cb);
 
+#undef DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE
+
    if (debugging_translation) {
       /* Only done for debugging -- throw away final result. */
-      VG_(jitfree)(final_code);
+      VG_(arena_free)(VG_AR_JITTER, final_code);
    } else {
       /* Doing it for real -- return values to caller. */
-      //VG_(printf)("%d %d\n", n_disassembled_bytes, final_code_size);
       *orig_size = n_disassembled_bytes;
       *trans_addr = (Addr)final_code;
       *trans_size = final_code_size;
    }
-   VGP_POPCC;
+   VGP_POPCC(VgpTranslate);
 }
 
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_transtab.c b/coregrind/vg_transtab.c
index a6e15b3..09e8fa2 100644
--- a/coregrind/vg_transtab.c
+++ b/coregrind/vg_transtab.c
@@ -26,11 +26,10 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
 
 /* #define DEBUG_TRANSTAB */
 
@@ -319,14 +318,13 @@
          vg_tt_used, vg_tc_used / 1000
       );
 
-   /* Reconstruct the SMC detection structures. */
 #  ifdef DEBUG_TRANSTAB
    for (i = 0; i < VG_TT_SIZE; i++)
       vg_assert(vg_tt[i].orig_addr != VG_TTE_DELETED);
 #  endif
    VG_(sanity_check_tc_tt)();
 
-   VGP_POPCC;
+   VGP_POPCC(VgpDoLRU);
 }
 
 
@@ -460,7 +458,7 @@
    if (tte == NULL) {
       /* We didn't find it.  vg_run_innerloop will have to request a
          translation. */
-      VGP_POPCC;
+      VGP_POPCC(VgpSlowFindT);
       return (Addr)0;
    } else {
       /* Found it.  Put the search result into the fast cache now.
@@ -469,7 +467,7 @@
       VG_(tt_fast)[cno] = (Addr)tte;
       VG_(tt_fast_misses)++;
       tte->mru_epoch = VG_(current_epoch);
-      VGP_POPCC;
+      VGP_POPCC(VgpSlowFindT);
       return tte->trans_addr;
    }
 }
@@ -498,8 +496,11 @@
       o_end = o_start + vg_tt[i].orig_size - 1;
       if (o_end < i_start || o_start > i_end)
          continue;
-      if (VG_(clo_cachesim))
-         VG_(cachesim_notify_discard)( & vg_tt[i] );
+
+      if (VG_(needs).basic_block_discards)
+         SK_(discard_basic_block_info)( vg_tt[i].orig_addr, 
+                                         vg_tt[i].orig_size );
+
       vg_tt[i].orig_addr = VG_TTE_DELETED;
       VG_(this_epoch_out_count) ++;
       VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
diff --git a/coregrind/vg_unsafe.h b/coregrind/vg_unsafe.h
index 0f72646..0862e0e 100644
--- a/coregrind/vg_unsafe.h
+++ b/coregrind/vg_unsafe.h
@@ -27,7 +27,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 
diff --git a/coregrind/vg_valgrinq_dummy.c b/coregrind/vg_valgrinq_dummy.c
index a0b1441..332085a 100644
--- a/coregrind/vg_valgrinq_dummy.c
+++ b/coregrind/vg_valgrinq_dummy.c
@@ -26,11 +26,11 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 /* For the rationale behind this file, look at
-   VG_(mash_LD_PRELOAD_string) in vg_main.c. */
+   VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH) in vg_main.c. */
 
 /* Remember not to use a variable of this name in any program you want
    to debug :-) */
diff --git a/demangle/cp-demangle.c b/demangle/cp-demangle.c
index 5cf99c8..8d91d29 100644
--- a/demangle/cp-demangle.c
+++ b/demangle/cp-demangle.c
@@ -46,9 +46,9 @@
 #include "demangle.h"
 
 #ifndef STANDALONE
-#define malloc(s) VG_(malloc)(VG_AR_DEMANGLE, s)
-#define free(p) VG_(free)(VG_AR_DEMANGLE, p)
-#define realloc(p,s) VG_(realloc)(VG_AR_DEMANGLE, p, s)
+#define malloc(s)    VG_(arena_malloc) (VG_AR_DEMANGLE, s)
+#define free(p)      VG_(arena_free)   (VG_AR_DEMANGLE, p)
+#define realloc(p,s) VG_(arena_realloc)(VG_AR_DEMANGLE, p, /*alignment*/4, s)
 #endif
 
 /* If CP_DEMANGLE_DEBUG is defined, a trace of the grammar evaluation,
@@ -1406,7 +1406,7 @@
   }
 
   if (base == 36) {
-     *value = VG_(atoll36) (dyn_string_buf (number));
+     *value = VG_(atoll36) (36, dyn_string_buf (number));
   } else {
      *value = VG_(atoll) (dyn_string_buf (number));
   }
diff --git a/demangle/cplus-dem.c b/demangle/cplus-dem.c
index 56c3261..959dbd3 100644
--- a/demangle/cplus-dem.c
+++ b/demangle/cplus-dem.c
@@ -70,17 +70,18 @@
 #endif
 
 #ifndef STANDALONE
-#define xstrdup(ptr) VG_(strdup)(VG_AR_DEMANGLE, ptr)
-#define free(ptr) VG_(free)(VG_AR_DEMANGLE, ptr)
-#define xmalloc(size) VG_(malloc)(VG_AR_DEMANGLE, size)
-#define xrealloc(ptr, size) VG_(realloc)(VG_AR_DEMANGLE, ptr, size)
+#define xstrdup(ptr)        VG_(arena_strdup) (VG_AR_DEMANGLE, ptr)
+#define free(ptr)           VG_(arena_free)   (VG_AR_DEMANGLE, ptr)
+#define xmalloc(size)       VG_(arena_malloc) (VG_AR_DEMANGLE, size)
+#define xrealloc(ptr, size) VG_(arena_realloc)(VG_AR_DEMANGLE, ptr, \
+                                               /*align*/4, size)
 #define abort() vg_assert(0)
 #undef strstr
-#define strstr VG_(strstr)
+#define strstr  VG_(strstr)
 #define sprintf VG_(sprintf)
 #define strncpy VG_(strncpy)
 #define strncat VG_(strncat)
-#define strchr VG_(strchr)
+#define strchr  VG_(strchr)
 #define strpbrk VG_(strpbrk)
 #endif
 
diff --git a/demangle/dyn-string.c b/demangle/dyn-string.c
index aaa7e36..d6130a1 100644
--- a/demangle/dyn-string.c
+++ b/demangle/dyn-string.c
@@ -36,9 +36,9 @@
 #include "dyn-string.h"
 
 #ifndef STANDALONE
-#define malloc(s) VG_(malloc)(VG_AR_DEMANGLE, s)
-#define free(p) VG_(free)(VG_AR_DEMANGLE, p)
-#define realloc(p,s) VG_(realloc)(VG_AR_DEMANGLE, p, s)
+#define malloc(s)    VG_(arena_malloc) (VG_AR_DEMANGLE, s)
+#define free(p)      VG_(arena_free)   (VG_AR_DEMANGLE, p)
+#define realloc(p,s) VG_(arena_realloc)(VG_AR_DEMANGLE, p, /*alignment*/4, s)
 #endif
 
 /* If this file is being compiled for inclusion in the C++ runtime
diff --git a/docs/manual.html b/docs/manual.html
index b715ee3..95fe840 100644
--- a/docs/manual.html
+++ b/docs/manual.html
@@ -345,7 +345,7 @@
 </pre>
 
 <p>Note that Valgrind also reads options from the environment variable
-<code>$VALGRIND</code>, and processes them before the command-line
+<code>$VALGRIND_OPTS</code>, and processes them before the command-line
 options.
 
 <p>Valgrind's default settings succeed in giving reasonable behaviour
@@ -838,8 +838,8 @@
   <li>The contents of malloc'd blocks, before you write something
       there.  In C++, the new operator is a wrapper round malloc, so
       if you create an object with new, its fields will be
-      uninitialised until you fill them in, which is only Right and
-      Proper.</li>
+      uninitialised until you (or the constructor) fill them in, which
+      is only Right and Proper.</li>
 </ul>
 
 
@@ -1066,16 +1066,16 @@
       <p>
 
   <li>The "immediate location" specification.  For Value and Addr
-      errors, is either the name of the function in which the error
-      occurred, or, failing that, the full path the the .so file
-      containing the error location.  For Param errors, is the name of
-      the offending system call parameter.  For Free errors, is the
-      name of the function doing the freeing (eg, <code>free</code>,
-      <code>__builtin_vec_delete</code>, etc)</li><br>
+      errors, it is either the name of the function in which the error
+      occurred, or, failing that, the full path of the .so file or
+      executable containing the error location.  For Param errors,
+      is the name of the offending system call parameter.  For Free
+      errors, is the name of the function doing the freeing (eg,
+      <code>free</code>, <code>__builtin_vec_delete</code>, etc)</li><br>
       <p>
 
   <li>The caller of the above "immediate location".  Again, either a
-      function or shared-object name.</li><br>
+      function or shared-object/executable name.</li><br>
       <p>
 
   <li>Optionally, one or two extra calling-function or object names,
@@ -1083,8 +1083,8 @@
 </ul>
 
 <p>
-Locations may be either names of shared objects or wildcards matching
-function names.  They begin <code>obj:</code> and <code>fun:</code>
+Locations may be either names of shared objects/executables or wildcards
+matching function names.  They begin <code>obj:</code> and <code>fun:</code>
 respectively.  Function and object names to match against may use the 
 wildcard characters <code>*</code> and <code>?</code>.
 
@@ -1617,11 +1617,11 @@
 
   <li>If the new size is smaller, the dropped-off section is marked as
       unaddressible.  You may only pass to realloc a pointer
-      previously issued to you by malloc/calloc/new/realloc.</li><br>
+      previously issued to you by malloc/calloc/realloc.</li><br>
       <p>
 
   <li>free/delete: you may only pass to free a pointer previously
-      issued to you by malloc/calloc/new/realloc, or the value
+      issued to you by malloc/calloc/realloc, or the value
       NULL. Otherwise, Valgrind complains.  If the pointer is indeed
       valid, Valgrind marks the entire area it points at as
       unaddressible, and places the block in the freed-blocks-queue.
@@ -2058,7 +2058,9 @@
   <li>Run your program with <code>cachegrind</code> in front of the
       normal command line invocation.  When the program finishes,
       Valgrind will print summary cache statistics. It also collects
-      line-by-line information in a file <code>cachegrind.out</code>.
+      line-by-line information in a file
+      <code>cachegrind.out.<i>pid</i></code>, where <code><i>pid</i></code>
+      is the program's process id.
       <p>
       This step should be done every time you want to collect
       information about a new program, a changed program, or about the
@@ -2197,15 +2199,17 @@
 
 As well as printing summary information, Cachegrind also writes
 line-by-line cache profiling information to a file named
-<code>cachegrind.out</code>.  This file is human-readable, but is best
-interpreted by the accompanying program <code>vg_annotate</code>,
+<code>cachegrind.out.<i>pid</i></code>.  This file is human-readable, but is
+best interpreted by the accompanying program <code>vg_annotate</code>,
 described in the next section.
 <p>
-Things to note about the <code>cachegrind.out</code> file:
+Things to note about the <code>cachegrind.out.<i>pid</i></code> file:
 <ul>
   <li>It is written every time <code>valgrind --cachesim=yes</code> or
       <code>cachegrind</code> is run, and will overwrite any existing
-      <code>cachegrind.out</code> in the current directory.</li>
+      <code>cachegrind.out.<i>pid</i></code> in the current directory (but
+      that won't happen very often because it takes some time for process ids
+      to be recycled).</li>
   <p>
   <li>It can be huge: <code>ls -l</code> generates a file of about
       350KB.  Browsing a few files and web pages with a Konqueror
@@ -2213,6 +2217,13 @@
       of around 15 MB.</li>
 </ul>
 
+Note that older versions of Cachegrind used a log file named
+<code>cachegrind.out</code> (i.e. no <code><i>.pid</i></code> suffix).
+The suffix serves two purposes.  Firstly, it means you don't have to rename old
+log files that you don't want to overwrite.  Secondly, and more importantly,
+it allows correct profiling with the <code>--trace-children=yes</code> option
+of programs that spawn child processes.
+
 <a name="profileflags"></a>
 <h3>7.5&nbsp; Cachegrind options</h3>
 Cachegrind accepts all the options that Valgrind does, although some of them
@@ -2245,9 +2256,13 @@
 window to be at least 120-characters wide if possible, as the output
 lines can be quite long.
 <p>
-To get a function-by-function summary, run <code>vg_annotate</code> in
-directory containing a <code>cachegrind.out</code> file.  The output
-looks like this:
+To get a function-by-function summary, run <code>vg_annotate
+--<i>pid</i></code> in a directory containing a
+<code>cachegrind.out.<i>pid</i></code> file.  The <code>--<i>pid</i></code>
+is required so that <code>vg_annotate</code> knows which log file to use when
+several are present.
+<p>
+The output looks like this:
 
 <pre>
 --------------------------------------------------------------------------------
@@ -2468,8 +2483,9 @@
 specific enough.
 
 Beware that vg_annotate can take some time to digest large
-<code>cachegrind.out</code> files, eg. 30 seconds or more.  Also beware that
-auto-annotation can produce a lot of output if your program is large!
+<code>cachegrind.out.<i>pid</i></code> files, e.g. 30 seconds or more.  Also
+beware that auto-annotation can produce a lot of output if your program is
+large!
 
 
 <h3>7.7&nbsp; Annotating assembler programs</h3>
@@ -2492,13 +2508,18 @@
 
 <h3>7.8&nbsp; <code>vg_annotate</code> options</h3>
 <ul>
+  <li><code>--<i>pid</i></code></li><p>
+
+      Indicates which <code>cachegrind.out.<i>pid</i></code> file to read.
+      Not actually an option -- it is required.
+    
   <li><code>-h, --help</code></li><p>
   <li><code>-v, --version</code><p>
 
       Help and version, as usual.</li>
 
   <li><code>--sort=A,B,C</code> [default: order in 
-      <code>cachegrind.out</code>]<p>
+      <code>cachegrind.out.<i>pid</i></code>]<p>
       Specifies the events upon which the sorting of the function-by-function
       entries will be based.  Useful if you want to concentrate on eg. I cache
       misses (<code>--sort=I1mr,I2mr</code>), or D cache misses
@@ -2506,10 +2527,10 @@
       (<code>--sort=D2mr,I2mr</code>).</li><p>
 
   <li><code>--show=A,B,C</code> [default: all, using order in
-      <code>cachegrind.out</code>]<p>
+      <code>cachegrind.out.<i>pid</i></code>]<p>
       Specifies which events to show (and the column order). Default is to use
-      all present in the <code>cachegrind.out</code> file (and use the order in
-      the file).</li><p>
+      all present in the <code>cachegrind.out.<i>pid</i></code> file (and use
+      the order in the file).</li><p>
 
   <li><code>--threshold=X</code> [default: 99%] <p>
       Sets the threshold for the function-by-function summary.  Functions are
@@ -2547,17 +2568,18 @@
 There are a couple of situations in which vg_annotate issues warnings.
 
 <ul>
-  <li>If a source file is more recent than the <code>cachegrind.out</code>
-      file.  This is because the information in <code>cachegrind.out</code> is
-      only recorded with line numbers, so if the line numbers change at all in
-      the source (eg. lines added, deleted, swapped), any annotations will be 
+  <li>If a source file is more recent than the
+      <code>cachegrind.out.<i>pid</i></code> file.  This is because the
+      information in <code>cachegrind.out.<i>pid</i></code> is only recorded
+      with line numbers, so if the line numbers change at all in the source
+      (eg.  lines added, deleted, swapped), any annotations will be
       incorrect.<p>
 
   <li>If information is recorded about line numbers past the end of a file.
       This can be caused by the above problem, ie. shortening the source file
-      while using an old <code>cachegrind.out</code> file.  If this happens,
-      the figures for the bogus lines are printed anyway (clearly marked as
-      bogus) in case they are important.</li><p>
+      while using an old <code>cachegrind.out.<i>pid</i></code> file.  If this
+      happens, the figures for the bogus lines are printed anyway (clearly
+      marked as bogus) in case they are important.</li><p>
 </ul>
 
 
@@ -2677,6 +2699,13 @@
       <blockquote><code>btsl %eax, %edx</code></blockquote>
 
       This should only happen rarely.
+      </li><p>
+
+  <li>FPU instructions with data sizes of 28 and 108 bytes (e.g.
+      <code>fsave</code>) are treated as though they only access 16 bytes.
+      These instructions seem to be rare so hopefully this won't affect
+      accuracy much.
+      </li><p>
 </ul>
 
 Another thing worth nothing is that results are very sensitive.  Changing the
diff --git a/glibc-2.2.supp b/glibc-2.2.supp
index 26dd302..a4e05bb 100644
--- a/glibc-2.2.supp
+++ b/glibc-2.2.supp
@@ -68,8 +68,24 @@
    fun:__libc_freeres
 }
 
-#-------- Threading bugs?
+#-------- Data races
+#{
+#   _dl_lookup_symbol/fixup/_dl_runtime_resolve
+#   Eraser
+#   fun:_dl_lookup_symbol
+#   fun:fixup
+#   fun:_dl_runtime_resolve
+#}
+#
+#{
+#   _dl_lookup_versioned_symbol/fixup/_dl_runtime_resolve
+#   Eraser
+#   fun:_dl_lookup_versioned_symbol
+#   fun:fixup
+#   fun:_dl_runtime_resolve
+#}
 
+#-------- Threading bugs?
 {
    pthread_error/__pthread_mutex_destroy/__closedir
    PThread
diff --git a/helgrind/Makefile.am b/helgrind/Makefile.am
index 60553dd..96911ed 100644
--- a/helgrind/Makefile.am
+++ b/helgrind/Makefile.am
@@ -1,15 +1,17 @@
+
+
 SUBDIRS = demangle . docs tests
 
 CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \
-		-Winline -Wall -Wshadow -O -fomit-frame-pointer -g
+		-Winline -Wall -Wshadow -O -fomit-frame-pointer @PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
 
-LDFLAGS = -Wl,-z -Wl,initfirst
+#LDFLAGS = -Wl,-z -Wl,initfirst
 
 INCLUDES = -I$(srcdir)/demangle
 
-bin_SCRIPTS = valgrind cachegrind vg_annotate
+bin_SCRIPTS = valgrind vg_annotate
 
 SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
 
@@ -26,60 +28,103 @@
 	PATCHES_APPLIED ACKNOWLEDGEMENTS \
 	README_KDE3_FOLKS README_PACKAGERS \
 	README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \
-	valgrind.spec valgrind.spec.in
+	valgrind.spec valgrind.spec.in \
+	vg_profile.c \
+	vg_cachesim_I1.c vg_cachesim_D1.c vg_cachesim_L2.c vg_cachesim_gen.c
 
-val_PROGRAMS = valgrind.so valgrinq.so libpthread.so
+val_PROGRAMS = \
+	valgrind.so \
+	valgrinq.so \
+	libpthread.so \
+	vgskin_memcheck.so \
+	vgskin_cachesim.so \
+	vgskin_eraser.so \
+	vgskin_addrcheck.so \
+	vgskin_none.so \
+	vgskin_lackey.so \
+	vgskin_corecheck.so
 
-libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c
+libpthread_so_SOURCES = \
+	vg_libpthread.c \
+	vg_libpthread_unimp.c
+libpthread_so_DEPENDENCIES = $(srcdir)/vg_libpthread.vs
+libpthread_so_LDFLAGS	   = -Werror -fno-omit-frame-pointer -UVG_LIBDIR -shared -fpic -Wl,-version-script $(srcdir)/vg_libpthread.vs
 
 valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+valgrinq_so_LDFLAGS = -shared
 
 valgrind_so_SOURCES = \
 	vg_clientfuncs.c \
 	vg_scheduler.c \
-        vg_cachesim.c \
 	vg_clientmalloc.c \
-	vg_clientperms.c \
+	vg_default.c \
 	vg_demangle.c \
 	vg_dispatch.S \
 	vg_errcontext.c \
 	vg_execontext.c \
 	vg_from_ucode.c \
 	vg_helpers.S \
+	vg_instrument.c \
 	vg_main.c \
 	vg_malloc2.c \
 	vg_memory.c \
 	vg_messages.c \
 	vg_mylibc.c \
 	vg_procselfmaps.c \
-	vg_profile.c \
+	vg_dummy_profile.c \
 	vg_signals.c \
 	vg_startup.S \
 	vg_symtab2.c \
-	vg_syscall_mem.c \
+	vg_syscalls.c \
 	vg_syscall.S \
 	vg_to_ucode.c \
 	vg_translate.c \
-	vg_transtab.c \
-	vg_vtagops.c
-
+	vg_transtab.c
+valgrind_so_LDFLAGS = -Wl,-z -Wl,initfirst -shared
 valgrind_so_LDADD = \
 	demangle/cp-demangle.o \
 	demangle/cplus-dem.o \
 	demangle/dyn-string.o \
 	demangle/safe-ctype.o
 
+vgskin_memcheck_so_SOURCES = \
+	vg_memcheck.c \
+	vg_memcheck_clientreqs.c \
+	vg_memcheck_errcontext.c \
+	vg_memcheck_from_ucode.c \
+	vg_memcheck_translate.c \
+	vg_memcheck_helpers.S
+vgskin_memcheck_so_LDFLAGS = -shared
+
+vgskin_cachesim_so_SOURCES = vg_cachesim.c
+vgskin_cachesim_so_LDFLAGS = -shared
+
+vgskin_eraser_so_SOURCES = vg_eraser.c
+vgskin_eraser_so_LDFLAGS = -shared
+
+vgskin_addrcheck_so_SOURCES = vg_addrcheck.c
+vgskin_addrcheck_so_LDFLAGS = -shared
+
+vgskin_none_so_SOURCES 	 = vg_none.c
+vgskin_none_so_LDFLAGS   = -shared
+
+vgskin_lackey_so_SOURCES = vg_lackey.c
+vgskin_lackey_so_LDFLAGS = -shared
+
+vgskin_corecheck_so_SOURCES = vg_corecheck.c
+vgskin_corecheck_so_LDFLAGS = -shared
+
 include_HEADERS = valgrind.h
 
 noinst_HEADERS = \
-        vg_cachesim_gen.c       \
-        vg_cachesim_I1.c        \
-        vg_cachesim_D1.c        \
-        vg_cachesim_L2.c        \
         vg_kerneliface.h        \
         vg_include.h            \
+        vg_skin.h               \
         vg_constants.h          \
-        vg_unsafe.h
+        vg_constants_skin.h     \
+        vg_unsafe.h		\
+	vg_memcheck_include.h	\
+	vg_memcheck.h
 
 MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) 
 
@@ -92,19 +137,40 @@
 vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS)
 	$(COMPILE) -fno-omit-frame-pointer -c $<
 
-valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
-	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
-		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+##valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+##		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
 
-valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
-	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
+##valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
+##	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
 
-libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
-	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
-		$(libpthread_so_OBJECTS) \
-		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+##libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
+##	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
+##		$(libpthread_so_OBJECTS) \
+##		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+
+##vgskin_memcheck.so$(EXEEXT): $(vgskin_memcheck_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_memcheck.so \
+##		$(vgskin_memcheck_so_OBJECTS)
+
+##vgskin_cachesim.so$(EXEEXT): $(vgskin_cachesim_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_cachesim.so \
+##		$(vgskin_cachesim_so_OBJECTS)
+
+##vgskin_eraser.so$(EXEEXT): $(vgskin_eraser_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_eraser.so \
+##		$(vgskin_eraser_so_OBJECTS)
+
+##vgskin_none.so$(EXEEXT): $(vgskin_none_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_none.so \
+##		$(vgskin_none_so_OBJECTS)
+
+##vgskin_lackey.so$(EXEEXT): $(vgskin_lackey_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_lackey.so \
+##		$(vgskin_lackey_so_OBJECTS)
 
 install-exec-hook:
 	$(mkinstalldirs) $(DESTDIR)$(valdir)
 	rm -f $(DESTDIR)$(valdir)/libpthread.so.0
 	$(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0
+
diff --git a/helgrind/hg_main.c b/helgrind/hg_main.c
new file mode 100644
index 0000000..43e46bc
--- /dev/null
+++ b/helgrind/hg_main.c
@@ -0,0 +1,1415 @@
+/*--------------------------------------------------------------------*/
+/*--- The Eraser skin: checking for data races in threaded         ---*/
+/*--- programs.                                                    ---*/
+/*---                                                  vg_eraser.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_skin.h"
+
+
+static UInt n_eraser_warnings = 0;
+
+
+/*------------------------------------------------------------*/
+/*--- Debug guff                                           ---*/
+/*------------------------------------------------------------*/
+
+#define DEBUG_LOCK_TABLE    1   /* Print lock table at end */
+
+#define DEBUG_MAKE_ACCESSES 0   /* Print make_access() calls */
+#define DEBUG_LOCKS         0   /* Print lock()/unlock() calls and locksets */
+#define DEBUG_NEW_LOCKSETS  0   /* Print new locksets when created */
+#define DEBUG_ACCESSES      0   /* Print reads, writes */
+#define DEBUG_MEM_LOCKSET_CHANGES 0
+                                /* Print when an address's lockset
+                                   changes; only useful with
+                                   DEBUG_ACCESSES */
+
+#define DEBUG_VIRGIN_READS  0   /* Dump around address on VIRGIN reads */
+
+/* heavyweight LockSet sanity checking:
+   0 == never
+   1 == after important ops
+   2 == As 1 and also after pthread_mutex_* ops (excessively slow)
+ */
+#define LOCKSET_SANITY 0
+
+
+/*------------------------------------------------------------*/
+/*--- Crude profiling machinery.                           ---*/
+/*------------------------------------------------------------*/
+
+// PPP: work out if I want this
+
+#define PROF_EVENT(x)
+#if 0
+#ifdef VG_PROFILE_MEMORY
+
+#define N_PROF_EVENTS 150
+
+static UInt event_ctr[N_PROF_EVENTS];
+
+void VGE_(done_prof_mem) ( void )
+{
+   Int i;
+   for (i = 0; i < N_PROF_EVENTS; i++) {
+      if ((i % 10) == 0)
+         VG_(printf)("\n");
+      if (event_ctr[i] > 0)
+         VG_(printf)( "prof mem event %2d: %d\n", i, event_ctr[i] );
+   }
+   VG_(printf)("\n");
+}
+
+#define PROF_EVENT(ev)                                  \
+   do { vg_assert((ev) >= 0 && (ev) < N_PROF_EVENTS);   \
+        event_ctr[ev]++;                                \
+   } while (False);
+
+#else
+
+//static void init_prof_mem ( void ) { }
+//       void VG_(done_prof_mem) ( void ) { }
+
+#define PROF_EVENT(ev) /* */
+
+#endif /* VG_PROFILE_MEMORY */
+
+/* Event index.  If just the name of the fn is given, this means the
+   number of calls to the fn.  Otherwise it is the specified event.
+
+   [PPP: snip event numbers...]
+*/
+#endif /* 0 */
+
+
+/*------------------------------------------------------------*/
+/*--- Data defns.                                          ---*/
+/*------------------------------------------------------------*/
+
+typedef enum 
+   { Vge_VirginInit, Vge_NonVirginInit, Vge_SegmentInit } 
+   VgeInitStatus;
+
+/* Should add up to 32 to fit in one word */
+#define OTHER_BITS      30
+#define STATE_BITS      2
+
+#define ESEC_MAP_WORDS  16384   /* Words per secondary map */
+
+/* This is for indicating that a memory block has been initialised but not
+ * really directly by a particular thread... (eg. text/data initialised
+ * automatically at startup).
+ * Must be different to virgin_word.other */
+#define TID_INDICATING_NONVIRGIN    1
+
+/* Number of entries must fit in STATE_BITS bits */
+typedef enum { Vge_Virgin, Vge_Excl, Vge_Shar, Vge_SharMod } pth_state;
+
+typedef
+   struct {
+      UInt other:OTHER_BITS;
+      UInt state:STATE_BITS;
+   } shadow_word;
+
+typedef
+   struct {
+      shadow_word swords[ESEC_MAP_WORDS];
+   }
+   ESecMap;
+
+static ESecMap* primary_map[ 65536 ];
+static ESecMap  distinguished_secondary_map;
+
+static shadow_word virgin_sword = { 0, Vge_Virgin };
+
+#define VGE_IS_DISTINGUISHED_SM(smap) \
+   ((smap) == &distinguished_secondary_map)
+
+#define ENSURE_MAPPABLE(addr,caller)                                  \
+   do {                                                               \
+      if (VGE_IS_DISTINGUISHED_SM(primary_map[(addr) >> 16])) {       \
+         primary_map[(addr) >> 16] = alloc_secondary_map(caller);     \
+         /*VG_(printf)("new 2map because of %p\n", addr);*/           \
+      } \
+   } while(0)
+
+
+/*------------------------------------------------------------*/
+/*--- Low-level support for memory tracking.               ---*/
+/*------------------------------------------------------------*/
+
+/*
+   All reads and writes are recorded in the memory map, which
+   records the state of all memory in the process.  The memory map is
+   organised like that for normal Valgrind, except each that everything
+   is done at word-level instead of byte-level, and each word has only
+   one word of shadow (instead of 36 bits).  
+
+   As for normal Valgrind there is a distinguished secondary map.  But we're
+   working at word-granularity, so it has 16k word entries instead of 64k byte
+   entries.  Lookup is done as follows:
+
+     bits 31..16:   primary map lookup
+     bits 15.. 2:   secondary map lookup
+     bits  1.. 0:   ignored
+*/
+
+
+/*------------------------------------------------------------*/
+/*--- Basic bitmap management, reading and writing.        ---*/
+/*------------------------------------------------------------*/
+
+/* Allocate and initialise a secondary map, marking all words as virgin. */
+
+/* Just a value that isn't a real pointer */
+#define SEC_MAP_ACCESS  (shadow_word*)0x99    
+
+
+static 
+ESecMap* alloc_secondary_map ( __attribute__ ((unused)) Char* caller )
+{
+   ESecMap* map;
+   UInt  i;
+   //PROF_EVENT(10); PPP
+
+   /* It just happens that a SecMap occupies exactly 18 pages --
+      although this isn't important, so the following assert is
+      spurious. (SSS: not true for ESecMaps -- they're 16 pages) */
+   vg_assert(0 == (sizeof(ESecMap) % VKI_BYTES_PER_PAGE));
+   map = VG_(get_memory_from_mmap)( sizeof(ESecMap), caller );
+
+   for (i = 0; i < ESEC_MAP_WORDS; i++)
+      map->swords[i] = virgin_sword;
+
+   return map;
+}
+
+
+/* Set a word.  The byte give by 'a' could be anywhere in the word -- the whole
+ * word gets set. */
+static __inline__ 
+void set_sword ( Addr a, shadow_word sword )
+{
+   ESecMap* sm;
+
+   //PROF_EVENT(23); PPP
+   ENSURE_MAPPABLE(a, "VGE_(set_sword)");
+
+   /* Use bits 31..16 for primary, 15..2 for secondary lookup */
+   sm     = primary_map[a >> 16];
+   vg_assert(sm != &distinguished_secondary_map);
+   sm->swords[(a & 0xFFFC) >> 2] = sword;
+
+   if (VGE_IS_DISTINGUISHED_SM(sm)) {
+      VG_(printf)("wrote to distinguished 2ndary map! 0x%x\n", a);
+      // XXX: may be legit, but I want to know when it happens --njn
+      VG_(panic)("wrote to distinguished 2ndary map!");
+   }
+}
+
+
+static __inline__ 
+shadow_word* get_sword_addr ( Addr a )
+{
+   /* Use bits 31..16 for primary, 15..2 for secondary lookup */
+   ESecMap* sm     = primary_map[a >> 16];
+   UInt    sm_off = (a & 0xFFFC) >> 2;
+
+   if (VGE_IS_DISTINGUISHED_SM(sm)) {
+      VG_(printf)("accessed distinguished 2ndary map! 0x%x\n", a);
+      // XXX: may be legit, but I want to know when it happens --njn
+      //VG_(panic)("accessed distinguished 2ndary map!");
+      return SEC_MAP_ACCESS;
+   }
+
+   //PROF_EVENT(21); PPP
+   return & (sm->swords[sm_off]);
+}
+
+
+// SSS: rename these so they're not so similar to memcheck, unless it's
+// appropriate of course
+
+static __inline__ 
+void init_virgin_sword(Addr a)
+{
+   set_sword(a, virgin_sword);
+}
+
+
+/* 'a' is guaranteed to be 4-byte aligned here (not that that's important,
+ * really) */
+static 
+void make_writable_aligned ( Addr a, UInt size )
+{
+   Addr a_past_end = a + size;
+
+   //PROF_EVENT(??)  PPP
+   vg_assert(IS_ALIGNED4_ADDR(a));
+
+   for ( ; a < a_past_end; a += 4) {
+      set_sword(a, virgin_sword);
+   }
+}
+
+static __inline__ 
+void init_nonvirgin_sword(Addr a)
+{
+   shadow_word sword;
+
+   sword.other = VG_(get_current_tid_1_if_root)();
+   sword.state = Vge_Excl;
+   set_sword(a, sword);
+}
+
+
+/* In this case, we treat it for Eraser's sake like virgin (it hasn't
+ * been inited by a particular thread, it's just done automatically upon
+ * startup), but we mark its .state specially so it doesn't look like an 
+ * uninited read. */
+static __inline__ 
+void init_magically_inited_sword(Addr a)
+{
+   shadow_word sword;
+
+   vg_assert(1 == VG_(get_current_tid_1_if_root)());
+   sword.other = TID_INDICATING_NONVIRGIN;
+   sword.state = Vge_Virgin;
+   set_sword(a, virgin_sword);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Implementation of lock sets.                         ---*/
+/*------------------------------------------------------------*/
+
+#define M_LOCKSET_TABLE 1000
+
+#include <pthread.h>
+
+typedef 
+   struct _LockSet {
+       pthread_mutex_t* mutex;
+       struct _LockSet* next;
+   } LockSet;
+
+
+/* Each one is an index into the lockset table. */
+static UInt thread_locks[VG_N_THREADS];
+
+/* # lockset table entries used. */
+static Int n_lockset_table = 1; 
+
+/* lockset_table[0] is always NULL, representing the empty lockset */
+static LockSet* lockset_table[M_LOCKSET_TABLE];
+
+
+static __inline__
+Bool is_valid_lockset_id ( Int id )
+{
+   return id >= 0 && id < n_lockset_table;
+}
+
+
+static
+Int allocate_LockSet(LockSet* set)
+{
+   if (n_lockset_table >= M_LOCKSET_TABLE) 
+      VG_(panic)("lockset table full -- increase M_LOCKSET_TABLE");
+   lockset_table[n_lockset_table] = set;
+   n_lockset_table++;
+#  if DEBUG_MEM_LOCKSET_CHANGES || DEBUG_NEW_LOCKSETS
+   VG_(printf)("allocate LOCKSET VECTOR %p to %d\n", set, n_lockset_table-1);
+#  endif
+   return n_lockset_table-1;
+}
+
+
+static 
+void pp_LockSet(LockSet* p)
+{
+   VG_(printf)("{ ");
+   while (p != NULL) {
+      VG_(printf)("%x ", p->mutex);
+      p = p->next;
+   }
+   VG_(printf)("}\n");
+}
+
+
+static __attribute__((unused))
+void pp_all_LockSets ( void )
+{
+   Int i;
+   for (i = 0; i < n_lockset_table; i++) {
+      VG_(printf)("[%d] = ", i);
+      pp_LockSet(lockset_table[i]);
+   }
+}
+
+
+static 
+void free_LockSet(LockSet *p)
+{
+   LockSet* q;
+   while (NULL != p) {
+      q = p;
+      p = p->next;
+      VG_(free)(q);
+#     if DEBUG_MEM_LOCKSET_CHANGES
+      VG_(printf)("free'd   %x\n", q);
+#     endif
+   }
+}
+
+
+static 
+Bool structural_eq_LockSet(LockSet* a, LockSet* b)
+{
+   while (a && b) {
+      if (a->mutex != b->mutex) {
+         return False;
+      }
+      a = a->next;
+      b = b->next;
+   }
+   return (NULL == a && NULL == b);
+}
+
+
+#if LOCKSET_SANITY 
+/* Check invariants:
+   - all locksets are unique
+   - each set is a linked list in strictly increasing order of mutex addr 
+*/
+static
+void sanity_check_locksets ( Char* caller )
+{
+   Int              i, j, badness;
+   LockSet*         v;
+   pthread_mutex_t* mx_prev;
+
+   badness = 0;
+   i = j = -1;
+
+   //VG_(printf)("sanity %s\n", caller);
+   /* Check really simple things first */
+
+   if (n_lockset_table < 1 || n_lockset_table > M_LOCKSET_TABLE)
+      { badness = 1; goto baaad; }
+
+   if (lockset_table[0] != NULL)
+      { badness = 2; goto baaad; }
+
+   for (i = 1; i < n_lockset_table; i++)
+      if (lockset_table[i] == NULL)
+         { badness = 3; goto baaad; }
+
+   for (i = n_lockset_table; i < M_LOCKSET_TABLE; i++)
+      if (lockset_table[i] != NULL)
+         { badness = 4; goto baaad; }
+
+   /* Check the sanity of each individual set. */
+   for (i = 1; i < n_lockset_table; i++) {
+      v = lockset_table[i];
+      mx_prev = (pthread_mutex_t*)0;
+      while (True) {
+         if (v == NULL) break;
+         if (mx_prev >= v->mutex) 
+            { badness = 5; goto baaad; }
+         mx_prev = v->mutex;
+         v = v->next;
+      }
+   }
+
+   /* Ensure the sets are unique, both structurally and in respect of
+      the address of their first nodes. */
+   for (i = 1; i < n_lockset_table; i++) {
+      for (j = i+1; j < n_lockset_table; j++) {
+         if (lockset_table[i] == lockset_table[j]) 
+            { badness = 6; goto baaad; }
+         if (structural_eq_LockSet(lockset_table[i], lockset_table[j])) 
+            { badness = 7; goto baaad; }
+      }
+   }
+   return;
+
+  baaad:
+   VG_(printf)("sanity_check_locksets: "
+               "i = %d, j = %d, badness = %d, caller = %s\n", 
+               i, j, badness, caller);
+   pp_all_LockSets();
+   VG_(panic)("sanity_check_locksets");
+}
+#endif /* LOCKSET_SANITY */
+
+
+/* Builds ia with mx removed.  mx should actually be in ia! 
+   (a checked assertion).  Resulting set should not already
+   exist in the table (unchecked).
+*/
+static 
+UInt remove ( UInt ia, pthread_mutex_t* mx )
+{
+   Int       found, res;
+   LockSet*  new_vector = NULL;
+   LockSet*  new_node;
+   LockSet** prev_ptr = &new_vector;
+   LockSet*  a = lockset_table[ia];
+   vg_assert(is_valid_lockset_id(ia));
+
+#  if DEBUG_MEM_LOCKSET_CHANGES
+   VG_(printf)("Removing from %d mutex %p:\n", ia, mx);
+#  endif
+
+#  if DEBUG_MEM_LOCKSET_CHANGES
+   print_LockSet(a);
+#  endif
+
+#  if LOCKSET_SANITY 
+   sanity_check_locksets("remove-IN");
+#  endif
+
+   /* Build the intersection of the two lists */
+   found = 0;
+   while (a) {
+      if (a->mutex != mx) {
+         new_node = VG_(malloc)(sizeof(LockSet));
+#        if DEBUG_MEM_LOCKSET_CHANGES
+         VG_(printf)("malloc'd %x\n", new_node);
+#        endif
+         new_node->mutex = a->mutex;
+         *prev_ptr = new_node;
+         prev_ptr = &((*prev_ptr)->next);
+         a = a->next;
+      } else {
+         found++;
+      }
+      *prev_ptr = NULL;
+   }
+   vg_assert(found == 1 /* sigh .. if the client is buggy */ || found == 0 );
+
+   /* Preserve uniqueness invariants in face of client buggyness */
+   if (found == 0) {
+      free_LockSet(new_vector);
+      return ia;
+   }
+
+   /* Add to the table. */
+   res = allocate_LockSet(new_vector);
+
+#  if LOCKSET_SANITY 
+   sanity_check_locksets("remove-OUT");
+#  endif
+
+   return res;
+}
+
+
+/* Tricky: equivalent to (compare(insert(missing_elem, a), b)), but
+ * doesn't do the insertion.  Returns True if they match.
+ */
+static Bool 
+weird_LockSet_equals(LockSet* a, LockSet* b, 
+                     pthread_mutex_t* missing_mutex)
+{
+   /* Idea is to try and match each element of b against either an
+      element of a, or missing_mutex. */
+   while (True) {
+      if (b == NULL) 
+         break;
+      /* deal with missing already being in a */
+      if (a && a->mutex == missing_mutex) 
+         a = a->next;
+      /* match current b element either against a or missing */
+      if (b->mutex == missing_mutex) {
+         b = b->next;
+         continue;
+      }
+      /* wasn't == missing, so have to match from a, or fail */
+      if (a && b->mutex == a->mutex) {
+         a = a->next;
+         b = b->next;
+         continue;
+      }
+      break;
+   }
+   return (b==NULL ? True : False);
+}
+
+
+/* Builds the intersection, and then unbuilds it if it's already in the table.
+ */
+static UInt intersect(UInt ia, UInt ib)
+{
+   Int       i;
+   LockSet*  a = lockset_table[ia];
+   LockSet*  b = lockset_table[ib];
+   LockSet*  new_vector = NULL;
+   LockSet*  new_node;
+   LockSet** prev_ptr = &new_vector;
+
+#  if DEBUG_MEM_LOCKSET_CHANGES
+   VG_(printf)("Intersecting %d %d:\n", ia, ib);
+#  endif
+
+#  if LOCKSET_SANITY 
+   sanity_check_locksets("intersect-IN");
+#  endif
+
+   /* Fast case -- when the two are the same */
+   if (ia == ib) {
+#     if DEBUG_MEM_LOCKSET_CHANGES
+      VG_(printf)("Fast case -- both the same: %u\n", ia);
+      print_LockSet(a);
+#     endif
+      return ia;
+   }
+
+#  if DEBUG_MEM_LOCKSET_CHANGES
+   print_LockSet(a);
+   print_LockSet(b);
+#  endif
+
+   /* Build the intersection of the two lists */
+   while (a && b) {
+      if (a->mutex == b->mutex) {
+         new_node = VG_(malloc)(sizeof(LockSet));
+#        if DEBUG_MEM_LOCKSET_CHANGES
+         VG_(printf)("malloc'd %x\n", new_node);
+#        endif
+         new_node->mutex = a->mutex;
+         *prev_ptr = new_node;
+         prev_ptr = &((*prev_ptr)->next);
+         a = a->next;
+         b = b->next;
+      } else if (a->mutex < b->mutex) {
+         a = a->next;
+      } else if (a->mutex > b->mutex) {
+         b = b->next;
+      } else VG_(panic)("STOP PRESS: Laws of arithmetic broken");
+
+      *prev_ptr = NULL;
+   }
+
+   /* Now search for it in the table, adding it if not seen before */
+   for (i = 0; i < n_lockset_table; i++) {
+      if (structural_eq_LockSet(lockset_table[i], new_vector))
+         break;
+   }
+
+   if (i == n_lockset_table) {
+     i = allocate_LockSet(new_vector);
+   } else {
+     free_LockSet(new_vector);
+   }
+
+   /* Check we won't overflow the OTHER_BITS bits of sword->other */
+   vg_assert(i < (1 << OTHER_BITS));
+
+#  if LOCKSET_SANITY 
+   sanity_check_locksets("intersect-OUT");
+#  endif
+
+   return i;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Setting and checking permissions.                    ---*/
+/*------------------------------------------------------------*/
+
+static
+void set_address_range_state ( Addr a, UInt len /* in bytes */, 
+                               VgeInitStatus status )
+{
+   Addr aligned_a, end, aligned_end;
+
+#  if DEBUG_MAKE_ACCESSES
+   VG_(printf)("make_access: 0x%x, %u, status=%u\n", a, len, status);
+#  endif
+   //PROF_EVENT(30); PPP
+
+   if (len == 0)
+      return;
+
+   if (len > 100 * 1000 * 1000)
+      VG_(message)(Vg_UserMsg,
+                   "Warning: set address range state: large range %d",
+                   len);
+
+   VGP_PUSHCC(VgpSARP);
+
+   /* Memory block may not be aligned or a whole word multiple.  In neat cases,
+    * we have to init len/4 words (len is in bytes).  In nasty cases, it's
+    * len/4+1 words.  This works out which it is by aligning the block and
+    * seeing if the end byte is in the same word as it is for the unaligned
+    * block; if not, it's the awkward case. */
+   aligned_a   = a & 0xc;                       /* zero bottom two bits */
+   end         = a + len;
+   aligned_end = aligned_a + len;
+   if ((end & 0xc) != (aligned_end & 0xc)) {
+       end += 4;    /* len/4 + 1 case */
+   }
+
+   /* Do it ... */
+   switch (status) {
+   case Vge_VirginInit:
+      for ( ; a < end; a += 4) {
+         //PROF_EVENT(31);  PPP
+         init_virgin_sword(a);
+      }
+      break;
+
+   case Vge_NonVirginInit:
+      for ( ; a < end; a += 4) {
+         //PROF_EVENT(31);  PPP
+         init_nonvirgin_sword(a);
+      }
+      break;
+
+   case Vge_SegmentInit:
+      for ( ; a < end; a += 4) {
+         //PROF_EVENT(31);  PPP
+         init_magically_inited_sword(a);
+      }
+      break;
+   
+   default:
+      VG_(printf)("init_status = %u\n", status);
+      VG_(panic)("Unexpected Vge_InitStatus");
+   }
+      
+   /* Check that zero page and highest page have not been written to
+      -- this could happen with buggy syscall wrappers.  Today
+      (2001-04-26) had precisely such a problem with
+      __NR_setitimer. */
+   vg_assert(SK_(cheap_sanity_check)());
+   VGP_POPCC(VgpSARP);
+}
+
+
+static void make_segment_readable ( Addr a, UInt len )
+{
+   //PROF_EVENT(??);    PPP
+   set_address_range_state ( a, len, Vge_SegmentInit );
+}
+
+static void make_writable ( Addr a, UInt len )
+{
+   //PROF_EVENT(36);  PPP
+   set_address_range_state( a, len, Vge_VirginInit );
+}
+
+static void make_readable ( Addr a, UInt len )
+{
+   //PROF_EVENT(37);  PPP
+   set_address_range_state( a, len, Vge_NonVirginInit );
+}
+
+
+// SSS: change name
+/* Block-copy states (needed for implementing realloc()). */
+static void copy_address_range_state(Addr src, Addr dst, UInt len)
+{
+   UInt i;
+
+   //PROF_EVENT(40); PPP
+   for (i = 0; i < len; i += 4) {
+      shadow_word sword = *(get_sword_addr ( src+i ));
+      //PROF_EVENT(41);  PPP
+      set_sword ( dst+i, sword );
+   }
+}
+
+// SSS: put these somewhere better
+static void eraser_mem_read (Addr a, UInt data_size);
+static void eraser_mem_write(Addr a, UInt data_size);
+
+static
+void eraser_pre_mem_read(CorePart part, ThreadState* tst,
+                         Char* s, UInt base, UInt size )
+{
+   eraser_mem_read(base, size);
+}
+
+static
+void eraser_pre_mem_read_asciiz(CorePart part, ThreadState* tst,
+                                Char* s, UInt base )
+{
+   eraser_mem_read(base, VG_(strlen)((Char*)base));
+}
+
+static
+void eraser_pre_mem_write(CorePart part, ThreadState* tst,
+                          Char* s, UInt base, UInt size )
+{
+   eraser_mem_write(base, size);
+}
+
+
+
+static
+void eraser_new_mem_startup( Addr a, UInt len, Bool rr, Bool ww, Bool xx )
+{
+   // JJJ: this ignores the permissions and just makes it readable, like the
+   // old code did, AFAICT
+   make_segment_readable(a, len);
+}
+
+
+static
+void eraser_new_mem_heap ( Addr a, UInt len, Bool is_inited )
+{
+   if (is_inited) {
+      make_readable(a, len);
+   } else {
+      make_writable(a, len);
+   }
+}
+
+static
+void eraser_set_perms (Addr a, UInt len,
+                       Bool nn, Bool rr, Bool ww, Bool xx)
+{
+   if      (rr) make_readable(a, len);
+   else if (ww) make_writable(a, len);
+   /* else do nothing */
+}
+
+
+/*--------------------------------------------------------------*/
+/*--- Initialise the memory audit system on program startup. ---*/
+/*--------------------------------------------------------------*/
+
+static 
+void init_shadow_memory(void)
+{
+   Int i;
+
+   for (i = 0; i < ESEC_MAP_WORDS; i++)
+      distinguished_secondary_map.swords[i] = virgin_sword;
+
+   /* These entries gradually get overwritten as the used address
+      space expands. */
+   for (i = 0; i < 65536; i++)
+      primary_map[i] = &distinguished_secondary_map;
+}
+
+
+/*--------------------------------------------------------------*/
+/*--- Machinery to support sanity checking                   ---*/
+/*--------------------------------------------------------------*/
+
+/* Check that nobody has spuriously claimed that the first or last 16
+   pages (64 KB) of address space have become accessible.  Failure of
+   the following do not per se indicate an internal consistency
+   problem, but they are so likely to that we really want to know
+   about it if so. */
+
+Bool SK_(cheap_sanity_check) ( void )
+{
+   if (VGE_IS_DISTINGUISHED_SM(primary_map[0]) && 
+       VGE_IS_DISTINGUISHED_SM(primary_map[65535]))
+      return True;
+   else
+      return False;
+}
+
+
+Bool SK_(expensive_sanity_check)(void)
+{
+   Int i;
+
+   /* Make sure nobody changed the distinguished secondary. */
+   for (i = 0; i < ESEC_MAP_WORDS; i++)
+      if (distinguished_secondary_map.swords[i].other != virgin_sword.other ||
+          distinguished_secondary_map.swords[i].state != virgin_sword.state)
+         return False;
+
+   return True;
+}
+
+
+/*--------------------------------------------------------------*/
+/*--- Instrumentation                                        ---*/
+/*--------------------------------------------------------------*/
+
+#define uInstr1   VG_(newUInstr1)
+#define uInstr2   VG_(newUInstr2)
+#define uLiteral  VG_(setLiteralField)
+#define uCCall    VG_(setCCallFields)
+#define newTemp   VG_(getNewTemp)
+
+/* Create and return an instrumented version of cb_in.  Free cb_in
+   before returning. */
+UCodeBlock* SK_(instrument) ( UCodeBlock* cb_in, Addr not_used )
+{
+   UCodeBlock* cb;
+   Int         i;
+   UInstr*     u_in;
+   Int         t_size = INVALID_TEMPREG;
+
+   cb = VG_(allocCodeBlock)();
+   cb->nextTemp = cb_in->nextTemp;
+
+   for (i = 0; i < cb_in->used; i++) {
+      u_in = &cb_in->instrs[i];
+
+      /* VG_(ppUInstr)(0, u_in); */
+      switch (u_in->opcode) {
+
+         case NOP: case CALLM_S: case CALLM_E:
+            break;
+
+         /* For LOAD, address is in val1 */
+         case LOAD:
+            t_size = newTemp(cb);
+            uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_size);
+            uLiteral(cb, (UInt)u_in->size);
+
+            vg_assert(1 == u_in->size || 2 == u_in->size || 4 == u_in->size || 
+                      8 == u_in->size || 10 == u_in->size);
+            uInstr2(cb, CCALL, 0, TempReg, u_in->val1, TempReg, t_size);
+            // SSS: make regparms(2) eventually...
+            uCCall(cb, (Addr) & eraser_mem_read, 2, 0, False);
+            VG_(copyUInstr)(cb, u_in);
+            t_size = INVALID_TEMPREG;
+            break;
+
+         /* For others, address is in val2 */
+         case STORE:  case FPU_R:  case FPU_W:
+            t_size = newTemp(cb);
+            uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_size);
+            uLiteral(cb, (UInt)u_in->size);
+
+            vg_assert(1 == u_in->size || 2 == u_in->size || 4 == u_in->size || 
+                      8 == u_in->size || 10 == u_in->size);
+            uInstr2(cb, CCALL, 0, TempReg, u_in->val2, TempReg, t_size);
+            uCCall(cb, (Addr) & eraser_mem_write, 2, 0, False);
+            VG_(copyUInstr)(cb, u_in);
+            t_size = INVALID_TEMPREG;
+            break;
+
+         default:
+            VG_(copyUInstr)(cb, u_in);
+            break;
+      }
+   }
+
+   VG_(freeCodeBlock)(cb_in);
+   return cb;
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- Error and suppression handling                               ---*/
+/*--------------------------------------------------------------------*/
+
+typedef
+   enum {
+      /* Possible data race */
+      EraserSupp
+   }
+   EraserSuppKind;
+
+/* What kind of error it is. */
+typedef
+   enum { 
+      EraserErr 
+   }
+   EraserErrorKind;
+
+
+static void record_eraser_error ( ThreadId tid, Addr a, Bool is_write )
+{
+   VG_(maybe_record_error)( VG_(get_ThreadState)(tid), EraserErr, a, 
+                            (is_write ? "writing" : "reading"),
+                            /*extra*/NULL);
+}
+
+
+Bool SK_(eq_SkinError) ( VgRes not_used,
+                          SkinError* e1, SkinError* e2 )
+{
+   vg_assert(EraserErr == e1->ekind && EraserErr == e2->ekind);
+   if (e1->string != e2->string) return False;
+   if (0 != VG_(strcmp)(e1->string, e2->string)) return False;
+   return True;
+}
+
+
+void SK_(pp_SkinError) ( SkinError* err, void (*pp_ExeContext)(void) )
+{
+   vg_assert(EraserErr == err->ekind);
+   VG_(message)(Vg_UserMsg, "Possible data race %s variable at 0x%x",
+                err->string, err->addr );
+   pp_ExeContext();
+}
+
+
+void SK_(dup_extra_and_update)(SkinError* err)
+{
+   /* do nothing -- extra field not used, and no need to update */
+}
+
+
+Bool SK_(recognised_suppression) ( Char* name, SuppKind *skind )
+{
+   if (0 == VG_(strcmp)(name, "Eraser")) {
+      *skind = EraserSupp;
+      return True;
+   } else {
+      return False;
+   }
+}
+
+
+Bool SK_(read_extra_suppression_info) ( Int fd, Char* buf, 
+                                        Int nBuf, SkinSupp* s )
+{
+   /* do nothing -- no extra suppression info present.  Return True to
+      indicate nothing bad happened. */
+   return True;
+}
+
+
+Bool SK_(error_matches_suppression)(SkinError* err, SkinSupp* su)
+{
+   vg_assert( su->skind == EraserSupp);
+   vg_assert(err->ekind == EraserErr);
+   return True;
+}
+
+
+// SSS: copying mutex's pointer... is that ok?  Could they get deallocated?
+// (does that make sense, deallocating a mutex?)
+static void eraser_post_mutex_lock(ThreadId tid, void* void_mutex)
+{
+   Int i = 1;
+   LockSet*  new_node;
+   LockSet*  p;
+   LockSet** q;
+   pthread_mutex_t* mutex = (pthread_mutex_t*)void_mutex;
+   
+#  if DEBUG_LOCKS
+   VG_(printf)("lock  (%u, %x)\n", tid, mutex);
+#  endif
+
+   vg_assert(tid < VG_N_THREADS &&
+             thread_locks[tid] < M_LOCKSET_TABLE);
+   /* VG_(printf)("LOCK: held %d, new %p\n", thread_locks[tid], mutex); */
+#  if LOCKSET_SANITY > 1
+   sanity_check_locksets("eraser_post_mutex_lock-IN");
+#  endif
+
+   while (True) {
+      if (i == M_LOCKSET_TABLE) 
+         VG_(panic)("lockset table full -- increase M_LOCKSET_TABLE");
+
+      /* the lockset didn't already exist */
+      if (i == n_lockset_table) {
+
+         p = lockset_table[thread_locks[tid]];
+         q = &lockset_table[i];
+
+         /* copy the thread's lockset, creating a new list */
+         while (p != NULL) {
+            new_node = VG_(malloc)(sizeof(LockSet));
+            new_node->mutex = p->mutex;
+            *q = new_node;
+            q = &((*q)->next);
+            p = p->next;
+         }
+         (*q) = NULL;
+
+         /* find spot for the new mutex in the new list */
+         p = lockset_table[i];
+         q = &lockset_table[i];
+         while (NULL != p && mutex > p->mutex) {
+            p = p->next;
+            q = &((*q)->next);
+         }
+
+         /* insert new mutex in new list */
+         new_node = VG_(malloc)(sizeof(LockSet));
+         new_node->mutex = mutex;
+         new_node->next = p;
+         (*q) = new_node;
+
+         p = lockset_table[i];
+         vg_assert(i == n_lockset_table);
+         n_lockset_table++;
+
+#        if DEBUG_NEW_LOCKSETS
+         VG_(printf)("new lockset vector (%d): ", i);
+         print_LockSet(p);
+#        endif
+         
+         goto done;
+
+      } else {
+         /* If this succeeds, the required vector (with the new mutex added)
+          * already exists in the table at position i.  Otherwise, keep
+          * looking. */
+         if (weird_LockSet_equals(lockset_table[thread_locks[tid]],
+                                  lockset_table[i], mutex)) {
+            goto done;
+         }
+      }
+      /* if we get to here, table lockset didn't match the new thread
+       * lockset, so keep looking */
+      i++;
+   }
+
+  done:
+   /* Update the thread's lock vector */
+   thread_locks[tid] = i;
+#  if DEBUG_LOCKS
+   VG_(printf)("tid %u now has lockset %d\n", tid, i);
+#  endif
+
+#  if LOCKSET_SANITY > 1
+   sanity_check_locksets("eraser_post_mutex_lock-OUT");
+#  endif
+
+}
+
+
+static void eraser_post_mutex_unlock(ThreadId tid, void* void_mutex)
+{
+   Int i = 0;
+   pthread_mutex_t* mutex = (pthread_mutex_t*)void_mutex;
+   
+#  if DEBUG_LOCKS
+   VG_(printf)("unlock(%u, %x)\n", tid, mutex);
+#  endif
+
+#  if LOCKSET_SANITY > 1
+   sanity_check_locksets("eraser_post_mutex_unlock-IN");
+#  endif
+
+   // find the lockset that is the current one minus tid, change thread to use
+   // that index.
+   
+   while (True) {
+
+      if (i == n_lockset_table) {
+         /* We can't find a suitable pre-made set, so we'll have to
+            make one. */
+         i = remove ( thread_locks[tid], mutex );
+         break;
+      }
+
+      /* Args are in opposite order to call above, for reverse effect */
+      if (weird_LockSet_equals( lockset_table[i],
+                                lockset_table[thread_locks[tid]], mutex) ) {
+         /* found existing diminished set -- the best outcome. */
+         break;
+      }
+
+      i++;
+   }
+
+   /* Update the thread's lock vector */
+#  if DEBUG_LOCKS
+   VG_(printf)("tid %u reverts from %d to lockset %d\n", 
+               tid, thread_locks[tid], i);
+#  endif
+
+   thread_locks[tid] = i;
+
+#  if LOCKSET_SANITY > 1
+   sanity_check_locksets("eraser_post_mutex_unlock-OUT");
+#  endif
+}
+
+
+/* ---------------------------------------------------------------------
+   Checking memory reads and writes
+   ------------------------------------------------------------------ */
+
+/* Behaviour on reads and writes:
+ *
+ *                      VIR          EXCL        SHAR        SH_MOD
+ * ----------------------------------------------------------------
+ * rd/wr, 1st thread |  -            EXCL        -           -
+ * rd, new thread    |  -            SHAR        -           -
+ * wr, new thread    |  -            SH_MOD      -           -
+ * rd                |  error!       -           SHAR        SH_MOD
+ * wr                |  EXCL         -           SH_MOD      SH_MOD
+ * ----------------------------------------------------------------
+ */
+
+#if 0
+static 
+void dump_around_a(Addr a)
+{
+   UInt i;
+   shadow_word* sword;
+   VG_(printf)("NEARBY:\n");
+   for (i = a - 12; i <= a + 12; i += 4) {
+      sword = get_sword_addr(i); 
+      VG_(printf)("    %x -- tid: %u, state: %u\n", i, sword->other, sword->state);
+   }
+}
+#endif
+
+/* Find which word the first and last bytes are in (by shifting out bottom 2
+ * bits) then find the difference. */
+static __inline__ 
+Int compute_num_words_accessed(Addr a, UInt size) 
+{
+   Int x, y, n_words;
+   x =  a             >> 2;
+   y = (a + size - 1) >> 2;
+   n_words = y - x + 1;
+   return n_words;
+}
+
+
+#if DEBUG_ACCESSES
+   #define DEBUG_STATE(args...)   \
+      VG_(printf)("(%u) ", size), \
+      VG_(printf)(args)
+#else
+   #define DEBUG_STATE(args...)
+#endif
+
+
+static void eraser_mem_read(Addr a, UInt size)
+{
+   shadow_word* sword;
+   ThreadId tid = VG_(get_current_tid_1_if_root)();
+   Addr     end = a + 4*compute_num_words_accessed(a, size);
+
+   for ( ; a < end; a += 4) {
+
+      sword = get_sword_addr(a);
+      if (sword == SEC_MAP_ACCESS) {
+         VG_(printf)("read distinguished 2ndary map! 0x%x\n", a);
+         continue;
+      }
+
+      switch (sword->state) {
+
+      /* This looks like reading of unitialised memory, may be legit.  Eg. 
+       * calloc() zeroes its values, so untouched memory may actually be 
+       * initialised.   Leave that stuff to Valgrind.  */
+      case Vge_Virgin:
+         if (TID_INDICATING_NONVIRGIN == sword->other) {
+            DEBUG_STATE("Read  VIRGIN --> EXCL:   %8x, %u\n", a, tid);
+#           if DEBUG_VIRGIN_READS
+            dump_around_a(a);
+#           endif
+         } else {
+            DEBUG_STATE("Read  SPECIAL --> EXCL:  %8x, %u\n", a, tid);
+         }
+         sword->state = Vge_Excl;
+         sword->other = tid;       /* remember exclusive owner */
+         break;
+
+      case Vge_Excl:
+         if (tid == sword->other) {
+            DEBUG_STATE("Read  EXCL:              %8x, %u\n", a, tid);
+
+         } else {
+            DEBUG_STATE("Read  EXCL(%u) --> SHAR:  %8x, %u\n", sword->other, a, tid);
+            sword->state = Vge_Shar;
+            sword->other = thread_locks[tid];
+#           if DEBUG_MEM_LOCKSET_CHANGES
+            print_LockSet(lockset_table[sword->other]);
+#           endif
+         }
+         break;
+
+      case Vge_Shar:
+         DEBUG_STATE("Read  SHAR:              %8x, %u\n", a, tid);
+         sword->other = intersect(sword->other, thread_locks[tid]);
+         break;
+
+      case Vge_SharMod:
+         DEBUG_STATE("Read  SHAR_MOD:          %8x, %u\n", a, tid);
+         sword->other = intersect(sword->other, thread_locks[tid]);
+
+         if (lockset_table[sword->other] == NULL) {
+            record_eraser_error(tid, a, False /* !is_write */);
+            n_eraser_warnings++;
+         }
+         break;
+
+      default:
+         VG_(panic)("Unknown eraser state");
+      }
+   }
+}
+
+
+static void eraser_mem_write(Addr a, UInt size)
+{
+   shadow_word* sword;
+   ThreadId tid = VG_(get_current_tid_1_if_root)();
+   Addr     end = a + 4*compute_num_words_accessed(a, size);
+
+   for ( ; a < end; a += 4) {
+
+      sword = get_sword_addr(a);
+      if (sword == SEC_MAP_ACCESS) {
+         VG_(printf)("read distinguished 2ndary map! 0x%x\n", a);
+         continue;
+      }
+
+      switch (sword->state) {
+      case Vge_Virgin:
+         if (TID_INDICATING_NONVIRGIN == sword->other)
+            DEBUG_STATE("Write VIRGIN --> EXCL:   %8x, %u\n", a, tid);
+         else
+            DEBUG_STATE("Write SPECIAL --> EXCL:  %8x, %u\n", a, tid);
+         sword->state = Vge_Excl;
+         sword->other = tid;       /* remember exclusive owner */
+         break;
+
+      case Vge_Excl:
+         if (tid == sword->other) {
+            DEBUG_STATE("Write EXCL:              %8x, %u\n", a, tid);
+            break;
+
+         } else {
+            DEBUG_STATE("Write EXCL(%u) --> SHAR_MOD: %8x, %u\n", sword->other, a, tid);
+            sword->state = Vge_SharMod;
+            sword->other = thread_locks[tid];
+#           if DEBUG_MEM_LOCKSET_CHANGES
+            print_LockSet(lockset_table[sword->other]);
+#           endif
+            goto SHARED_MODIFIED;
+         }
+
+      case Vge_Shar:
+         DEBUG_STATE("Write SHAR --> SHAR_MOD: %8x, %u\n", a, tid);
+         sword->state = Vge_SharMod;
+         sword->other = intersect(sword->other, thread_locks[tid]);
+         goto SHARED_MODIFIED;
+
+      case Vge_SharMod:
+         DEBUG_STATE("Write SHAR_MOD:          %8x, %u\n", a, tid);
+         sword->other = intersect(sword->other, thread_locks[tid]);
+         SHARED_MODIFIED:
+         if (lockset_table[sword->other] == NULL) {
+            record_eraser_error(tid, a, True /* is_write */);
+            n_eraser_warnings++;
+         }
+         break;
+
+      default:
+         VG_(panic)("Unknown eraser state");
+      }
+   }
+}
+
+#undef DEBUG_STATE
+
+
+/*--------------------------------------------------------------------*/
+/*--- Setup                                                        ---*/
+/*--------------------------------------------------------------------*/
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* track)
+{
+   Int i;
+
+   needs->name                    = "helgrind";
+   needs->description             = "a data race detector";
+
+   needs->core_errors             = True;
+   needs->skin_errors             = True;
+
+   VG_(register_compact_helper)((Addr) & eraser_mem_read);
+   VG_(register_compact_helper)((Addr) & eraser_mem_write);
+
+   /* Events to track */
+   track->new_mem_startup       = & eraser_new_mem_startup;
+   track->new_mem_heap          = & eraser_new_mem_heap;
+   track->new_mem_stack         = & make_writable;
+   track->new_mem_stack_aligned = & make_writable_aligned;
+   track->new_mem_stack_signal  = & make_writable;
+   track->new_mem_brk           = & make_writable;
+   track->new_mem_mmap          = & eraser_set_perms;
+
+   track->copy_mem_heap         = & copy_address_range_state;
+   track->change_mem_mprotect   = & eraser_set_perms;
+
+   track->ban_mem_heap          = NULL;
+   track->ban_mem_stack         = NULL;
+
+   track->die_mem_heap          = NULL;
+   track->die_mem_stack         = NULL;
+   track->die_mem_stack_aligned = NULL;
+   track->die_mem_stack_signal  = NULL;
+   track->die_mem_brk           = NULL;
+   track->die_mem_munmap        = NULL;
+
+   track->pre_mem_read          = & eraser_pre_mem_read;
+   track->pre_mem_read_asciiz   = & eraser_pre_mem_read_asciiz;
+   track->pre_mem_write         = & eraser_pre_mem_write;
+   track->post_mem_write        = NULL;
+
+   track->post_mutex_lock       = & eraser_post_mutex_lock;
+   track->post_mutex_unlock     = & eraser_post_mutex_unlock;
+
+   /* Init lock table */
+   for (i = 0; i < VG_N_THREADS; i++) 
+      thread_locks[i] = 0 /* the empty lock set */;
+
+   lockset_table[0] = NULL;
+   for (i = 1; i < M_LOCKSET_TABLE; i++) 
+      lockset_table[i] = NULL;
+
+   init_shadow_memory();
+}
+
+
+void SK_(post_clo_init)(void)
+{
+}
+
+
+void SK_(fini)(void)
+{
+#  if DEBUG_LOCK_TABLE
+   pp_all_LockSets();
+#  endif
+#  if LOCKSET_SANITY 
+   sanity_check_locksets("SK_(fini)");
+#  endif
+   VG_(message)(Vg_UserMsg, "%u possible data races found", n_eraser_warnings);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              vg_eraser.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/include/valgrind.h b/include/valgrind.h
index 5a819c7..fe880db 100644
--- a/include/valgrind.h
+++ b/include/valgrind.h
@@ -1,27 +1,58 @@
 
 /*
+   ----------------------------------------------------------------
+
+   Notice that the following BSD-style license applies to this one
+   file (valgrind.h) only.  The entire rest of Valgrind is licensed
+   under the terms of the GNU General Public License, version 2.  See
+   the COPYING file in the source distribution for details.
+
+   ----------------------------------------------------------------
+
    This file is part of Valgrind, an x86 protected-mode emulator 
    designed for debugging and profiling binaries on x86-Unixes.
 
-   Copyright (C) 2000-2002 Julian Seward 
-      jseward@acm.org
+   Copyright (C) 2000-2002 Julian Seward.  All rights reserved.
 
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
 
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
+   1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
 
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307, USA.
+   2. The origin of this software must not be misrepresented; you must 
+      not claim that you wrote the original software.  If you use this 
+      software in a product, an acknowledgment in the product 
+      documentation would be appreciated but is not required.
 
-   The GNU General Public License is contained in the file LICENSE.
+   3. Altered source versions must be plainly marked as such, and must
+      not be misrepresented as being the original software.
+
+   4. The name of the author may not be used to endorse or promote 
+      products derived from this software without specific prior written 
+      permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   ----------------------------------------------------------------
+
+   Notice that the above BSD-style license applies to this one file
+   (valgrind.h) only.  The entire rest of Valgrind is licensed under
+   the terms of the GNU General Public License, version 2.  See the
+   COPYING file in the source distribution for details.
+
+   ---------------------------------------------------------------- 
 */
 
 
@@ -31,8 +62,8 @@
 
 /* This file is for inclusion into client (your!) code.
 
-   You can use these macros to manipulate and query memory permissions
-   inside your own programs.
+   You can use these macros to manipulate and query Valgrind's 
+   execution inside your own programs.
 
    The resulting executables will still run without Valgrind, just a
    little bit more slowly than they otherwise would, but otherwise
@@ -84,121 +115,14 @@
 
 /* Some request codes.  There are many more of these, but most are not
    exposed to end-user view.  These are the public ones, all of the
-   form 0x1000 + small_number. 
+   form 0x1000 + small_number.
 */
 
-#define VG_USERREQ__MAKE_NOACCESS        0x1001
-#define VG_USERREQ__MAKE_WRITABLE        0x1002
-#define VG_USERREQ__MAKE_READABLE        0x1003
-#define VG_USERREQ__DISCARD              0x1004
-#define VG_USERREQ__CHECK_WRITABLE       0x1005
-#define VG_USERREQ__CHECK_READABLE       0x1006
-#define VG_USERREQ__MAKE_NOACCESS_STACK  0x1007
-#define VG_USERREQ__RUNNING_ON_VALGRIND  0x1008
-#define VG_USERREQ__DO_LEAK_CHECK        0x1009 /* untested */
-#define VG_USERREQ__DISCARD_TRANSLATIONS 0x100A
-
-
-/* Client-code macros to manipulate the state of memory. */
-
-/* Mark memory at _qzz_addr as unaddressible and undefined for
-   _qzz_len bytes.  Returns an int handle pertaining to the block
-   descriptions Valgrind will use in subsequent error messages. */
-#define VALGRIND_MAKE_NOACCESS(_qzz_addr,_qzz_len)               \
-   ({unsigned int _qzz_res;                                      \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
-                            VG_USERREQ__MAKE_NOACCESS,           \
-                            _qzz_addr, _qzz_len, 0, 0);          \
-    _qzz_res;                                                    \
-   })
-
-/* Similarly, mark memory at _qzz_addr as addressible but undefined
-   for _qzz_len bytes. */
-#define VALGRIND_MAKE_WRITABLE(_qzz_addr,_qzz_len)               \
-   ({unsigned int _qzz_res;                                      \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
-                            VG_USERREQ__MAKE_WRITABLE,           \
-                            _qzz_addr, _qzz_len, 0, 0);          \
-    _qzz_res;                                                    \
-   })
-
-/* Similarly, mark memory at _qzz_addr as addressible and defined
-   for _qzz_len bytes. */
-#define VALGRIND_MAKE_READABLE(_qzz_addr,_qzz_len)               \
-   ({unsigned int _qzz_res;                                      \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
-                            VG_USERREQ__MAKE_READABLE,           \
-                            _qzz_addr, _qzz_len, 0, 0);          \
-    _qzz_res;                                                    \
-   })
-
-/* Discard a block-description-handle obtained from the above three
-   macros.  After this, Valgrind will no longer be able to relate
-   addressing errors to the user-defined block associated with the
-   handle.  The permissions settings associated with the handle remain
-   in place.  Returns 1 for an invalid handle, 0 for a valid
-   handle. */
-#define VALGRIND_DISCARD(_qzz_blkindex)                          \
-   ({unsigned int _qzz_res;                                      \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
-                            VG_USERREQ__DISCARD,                 \
-                            0, _qzz_blkindex, 0, 0);             \
-    _qzz_res;                                                    \
-   })
-
-
-
-/* Client-code macros to check the state of memory. */
-
-/* Check that memory at _qzz_addr is addressible for _qzz_len bytes.
-   If suitable addressibility is not established, Valgrind prints an
-   error message and returns the address of the first offending byte.
-   Otherwise it returns zero. */
-#define VALGRIND_CHECK_WRITABLE(_qzz_addr,_qzz_len)                \
-   ({unsigned int _qzz_res;                                        \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
-                            VG_USERREQ__CHECK_WRITABLE,            \
-                            _qzz_addr, _qzz_len, 0, 0);            \
-    _qzz_res;                                                      \
-   })
-
-/* Check that memory at _qzz_addr is addressible and defined for
-   _qzz_len bytes.  If suitable addressibility and definedness are not
-   established, Valgrind prints an error message and returns the
-   address of the first offending byte.  Otherwise it returns zero. */
-#define VALGRIND_CHECK_READABLE(_qzz_addr,_qzz_len)                \
-   ({unsigned int _qzz_res;                                        \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
-                            VG_USERREQ__CHECK_READABLE,            \
-                            _qzz_addr, _qzz_len, 0, 0);            \
-    _qzz_res;                                                      \
-   })
-
-
-/* Use this macro to force the definedness and addressibility of a
-   value to be checked.  If suitable addressibility and definedness
-   are not established, Valgrind prints an error message and returns
-   the address of the first offending byte.  Otherwise it returns
-   zero. */
-#define VALGRIND_CHECK_DEFINED(__lvalue)                           \
-   (void)                                                          \
-   VALGRIND_CHECK_READABLE(                                        \
-      (volatile unsigned char *)&(__lvalue),                       \
-                      (unsigned int)(sizeof (__lvalue)))
-
-
-
-/* Mark memory, intended to be on the client's stack, at _qzz_addr as
-   unaddressible and undefined for _qzz_len bytes.  Does not return a
-   value.  The record associated with this setting will be
-   automatically removed by Valgrind when the containing routine
-   exits. */
-#define VALGRIND_MAKE_NOACCESS_STACK(_qzz_addr,_qzz_len)           \
-   {unsigned int _qzz_res;                                         \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
-                            VG_USERREQ__MAKE_NOACCESS_STACK,       \
-                            _qzz_addr, _qzz_len, 0, 0);            \
-   }
+typedef
+   enum { VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001,
+          VG_USERREQ__DISCARD_TRANSLATIONS,
+          VG_USERREQ__FINAL_DUMMY_CLIENT_REQUEST,
+   } Vg_ClientRequest;
 
 
 /* Returns 1 if running on Valgrind, 0 if running on the real CPU. 
@@ -212,22 +136,6 @@
    })
 
 
-/* Mark memory, intended to be on the client's stack, at _qzz_addr as
-   unaddressible and undefined for _qzz_len bytes.  Does not return a
-   value.  The record associated with this setting will be
-   automatically removed by Valgrind when the containing routine
-   exits.  
-
-   Currently implemented but untested.
-*/
-#define VALGRIND_DO_LEAK_CHECK                                     \
-   {unsigned int _qzz_res;                                         \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
-                            VG_USERREQ__DO_LEAK_CHECK,             \
-                            0, 0, 0, 0);                           \
-   }
-
-
 /* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
    _qzz_len - 1].  Useful if you are debugging a JITter or some such,
    since it provides a way to make sure valgrind will retranslate the
diff --git a/include/vg_constants_skin.h b/include/vg_constants_skin.h
new file mode 100644
index 0000000..a151cb0
--- /dev/null
+++ b/include/vg_constants_skin.h
@@ -0,0 +1,55 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A header file containing constants (for assembly code).      ---*/
+/*---                                               vg_constants.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VG_CONSTANTS_SKIN_H
+#define __VG_CONSTANTS_SKIN_H
+
+
+/* All symbols externally visible from valgrind.so are prefixed
+   as specified here.  The prefix can be changed, so as to avoid
+   namespace conflict problems.
+*/
+#define VGAPPEND(str1,str2) str1##str2
+
+/* These macros should add different prefixes so the same base
+   name can safely be used across different macros. */
+#define VG_(str)    VGAPPEND(vgPlain_,str)
+#define VGP_(str)   VGAPPEND(vgProf_,str)
+#define VGOFF_(str) VGAPPEND(vgOff_,str)
+
+/* Skin specific ones.  Note that final name still starts with "vg". */
+#define SK_(str)    VGAPPEND(vgSkin_,str)
+
+#endif /* ndef __VG_CONSTANTS_SKIN_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                           vg_constants.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/include/vg_profile.c b/include/vg_profile.c
index 34e98d6..4b0288d 100644
--- a/include/vg_profile.c
+++ b/include/vg_profile.c
@@ -1,6 +1,8 @@
 
 /*--------------------------------------------------------------------*/
-/*--- Profiling machinery -- not for release builds!               ---*/
+/*--- Profiling machinery.  #include this file into a skin to      ---*/
+/*--- enable --profile=yes, but not for release versions of skins, ---*/
+/*--- because it uses glibc code.                                  ---*/
 /*---                                                 vg_profile.c ---*/
 /*--------------------------------------------------------------------*/
 
@@ -26,46 +28,84 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
-#include "vg_include.h"
+#ifndef __VG_PROFILE_C
+#define __VG_PROFILE_C
 
-#ifdef VG_PROFILE
+#include "vg_skin.h"
 
 /* get rid of these, if possible */
 #include <signal.h>
 #include <sys/time.h>
 
-#define VGP_PAIR(enumname,str) str
-static const Char* vgp_names[VGP_M_CCS] = { VGP_LIST };
-#undef VGP_PAIR
+/* Override the empty definitions from vg_skin.h */
+#undef  VGP_PUSHCC
+#undef  VGP_POPCC
+#define VGP_PUSHCC(x)   if (VG_(clo_profile)) VGP_(pushcc)(x)
+#define VGP_POPCC(x)    if (VG_(clo_profile)) VGP_(popcc)(x)
 
+#define VGP_M_STACK     20
+#define VGP_MAX_CCS     50
+
+
+/* All zeroed initially because they're static */
 static Int   vgp_nticks;
-static Int   vgp_counts[VGP_M_CCS];
-static Int   vgp_entries[VGP_M_CCS];
+
+static Int   vgp_counts [VGP_MAX_CCS];
+static Int   vgp_entries[VGP_MAX_CCS];
+static Char* vgp_names  [VGP_MAX_CCS];
 
 static Int   vgp_sp;
-static VgpCC vgp_stack[VGP_M_STACK];
+static UInt  vgp_stack[VGP_M_STACK];
+
+/* These definitions override the panicking ones in vg_profile.c */
+
+void VGP_(register_profile_event) ( Int n, Char* name )
+{
+   /* Adjust for negative values */
+   n += VgpUnc;
+   if (n >= VGP_MAX_CCS) {
+      VG_(printf)("\nProfile event #%d higher than VGP_MAX_CCS of %d.\n"
+                  "If you really need this many profile events, increase\n"
+                  "VGP_MAX_CCS and recompile Valgrind.\n",
+                  n, VGP_MAX_CCS);
+      VG_(skin_error)("profile event too high");
+   }
+   if (vgp_names[n] != NULL) {
+      VG_(printf)("\nProfile event #%d being registered as `%s'\n"
+                  "already registered as `%s'.\n"
+                  "Note that skin and core event numbers must not overlap.\n",
+                  n, name, vgp_names[n]);
+      VG_(skin_error)("profile event already registered");
+   }
+
+   vgp_names[n] = name;
+}
 
 void VGP_(tick) ( int sigNo )
 {
    Int cc;
    vgp_nticks++;
    cc = vgp_stack[vgp_sp];
-   vg_assert(cc >= 0 && cc < VGP_M_CCS);
+   vg_assert(cc >= 0 && cc < VGP_MAX_CCS);
    vgp_counts[ cc ]++;
 }
 
 void VGP_(init_profiling) ( void )
 {
    struct itimerval value;
-   Int i, ret;
+   Int ret;
 
-   for (i = 0; i < VGP_M_CCS; i++)
-     vgp_counts[i] = vgp_entries[i] = 0;
+   /* Register core events... tricky macro definition causes
+      VGP_(register_profile_event)() to be called once for each core event
+      in VGP_CORE_LIST. */
+   vg_assert(VgpUnc == 0);
+#  define VGP_PAIR(n,name) VGP_(register_profile_event)(n,name)
+   VGP_CORE_LIST;
+#  undef  VGP_PAIR
 
-   vgp_nticks = 0;
    vgp_sp = -1;
    VGP_(pushcc) ( VgpUnc );
 
@@ -81,30 +121,52 @@
 void VGP_(done_profiling) ( void )
 {
    Int i;
-   VG_(printf)("Profiling done, %d ticks\n", vgp_nticks);
-   for (i = 0; i < VGP_M_CCS; i++)
-      VG_(printf)("%2d: %4d (%3d %%%%) ticks,  %8d entries   for  %s\n",
-                  i, vgp_counts[i], 
-                  (Int)(1000.0 * (double)vgp_counts[i] / (double)vgp_nticks),
-                  vgp_entries[i],
-                  vgp_names[i] );
+   VG_(printf)("\nProfiling done, %d ticks\n", vgp_nticks);
+   for (i = 0; i < VGP_MAX_CCS; i++)
+      if (NULL != vgp_names[i])
+         VG_(printf)(
+            "%2d: %4d (%3d %%%%) ticks,  %10d entries   for  %s\n",
+            i, vgp_counts[i], 
+            (Int)(1000.0 * (double)vgp_counts[i] / (double)vgp_nticks),
+            vgp_entries[i], vgp_names[i] );
 }
 
-void VGP_(pushcc) ( VgpCC cc )
+void VGP_(pushcc) ( UInt cc )
 {
-   if (vgp_sp >= VGP_M_STACK-1) VG_(panic)("vgp_pushcc");
+   if (vgp_sp >= VGP_M_STACK-1) { 
+      VG_(printf)(
+         "\nMaximum profile stack depth (%d) reached for event #%d (`%s').\n"
+         "This is probably due to a VGP_(pushcc)() without a matching\n"
+         "VGP_(popcc)().  Make sure they all match.\n"
+         "Or if you are nesting profiling events very deeply, increase\n"
+         "VGP_M_STACK and recompile Valgrind.\n",
+         VGP_M_STACK, cc, vgp_names[cc]);
+      VG_(skin_error)("Profiling stack overflow");
+   }
    vgp_sp++;
    vgp_stack[vgp_sp] = cc;
    vgp_entries[ cc ] ++;
 }
 
-void VGP_(popcc) ( void )
+void VGP_(popcc) ( UInt cc )
 {
-   if (vgp_sp <= 0) VG_(panic)("vgp_popcc");
+   if (vgp_sp <= 0) {
+      VG_(printf)(
+         "\nProfile stack underflow.  This is due to a VGP_(popcc)() without\n"
+         "a matching VGP_(pushcc)().  Make sure they all match.\n");
+      VG_(skin_error)("Profiling stack underflow");
+   }
+   if (vgp_stack[vgp_sp] != cc) {
+      Int i;
+      VG_(printf)("popping %s, stack looks like:\n", vgp_names[cc]);
+      for (i = vgp_sp; i >= 0; i--)
+         VG_(printf)("%2d: %s\n", i, vgp_names[vgp_stack[i]]);
+      VG_(exit)(1);
+   }
    vgp_sp--;
 }
 
-#endif /* VG_PROFILE */
+#endif /* __VG_PROFILE_C */
 
 /*--------------------------------------------------------------------*/
 /*--- end                                             vg_profile.c ---*/
diff --git a/include/vg_skin.h b/include/vg_skin.h
new file mode 100644
index 0000000..2ecd3ae
--- /dev/null
+++ b/include/vg_skin.h
@@ -0,0 +1,1397 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The only header your skin will ever need to #include...      ---*/
+/*---                                                    vg_skin.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VG_SKIN_H
+#define __VG_SKIN_H
+
+#include <stdarg.h>       /* ANSI varargs stuff  */
+#include <setjmp.h>       /* for jmp_buf         */
+
+#include "vg_constants_skin.h"
+
+
+/*====================================================================*/
+/*=== Build options and table sizes.                               ===*/
+/*====================================================================*/
+
+/* You should be able to change these options or sizes, recompile, and 
+   still have a working system. */
+
+/* The maximum number of pthreads that we support.  This is
+   deliberately not very high since our implementation of some of the
+   scheduler algorithms is surely O(N) in the number of threads, since
+   that's simple, at least.  And (in practice) we hope that most
+   programs do not need many threads. */
+#define VG_N_THREADS 50
+
+/* Maximum number of pthread keys available.  Again, we start low until
+   the need for a higher number presents itself. */
+#define VG_N_THREAD_KEYS 50
+
+/* Total number of integer registers available for allocation -- all of
+   them except %esp, %ebp.  %ebp permanently points at VG_(baseBlock).
+   
+   If you change this you'll have to also change at least these:
+     - VG_(rankToRealRegNum)()
+     - VG_(realRegNumToRank)()
+     - ppRegsLiveness()
+     - the RegsLive type (maybe -- RegsLive type must have more than
+                          VG_MAX_REALREGS bits)
+   
+   Do not change this unless you really know what you are doing!  */
+#define VG_MAX_REALREGS 6
+
+
+/*====================================================================*/
+/*=== Basic types                                                  ===*/
+/*====================================================================*/
+
+#define mycat_wrk(aaa,bbb) aaa##bbb
+#define mycat(aaa,bbb) mycat_wrk(aaa,bbb)
+
+typedef unsigned char          UChar;
+typedef unsigned short         UShort;
+typedef unsigned int           UInt;
+typedef unsigned long long int ULong;
+
+typedef signed char            Char;
+typedef signed short           Short;
+typedef signed int             Int;
+typedef signed long long int   Long;
+
+typedef unsigned int           Addr;
+
+typedef unsigned char          Bool;
+#define False                  ((Bool)0)
+#define True                   ((Bool)1)
+
+
+/* ---------------------------------------------------------------------
+   Now the basic types are set up, we can haul in the kernel-interface
+   definitions.
+   ------------------------------------------------------------------ */
+
+#include "./vg_kerneliface.h"
+
+
+/*====================================================================*/
+/*=== Command-line options                                         ===*/
+/*====================================================================*/
+
+/* Verbosity level: 0 = silent, 1 (default), > 1 = more verbose. */
+extern Int   VG_(clo_verbosity);
+
+/* Profile? */
+extern Bool  VG_(clo_profile);
+
+
+/* Call this if a recognised option was bad for some reason.
+   Note: don't use it just because an option was unrecognised -- return 'False'
+   from SKN_(process_cmd_line_option) to indicate that. */
+extern void VG_(bad_option) ( Char* opt );
+
+/* Client args */
+extern Int    VG_(client_argc);
+extern Char** VG_(client_argv);
+
+/* Client environment.  Can be inspected with VG_(getenv)() (below) */
+extern Char** VG_(client_envp);
+
+
+/*====================================================================*/
+/*=== Printing messages for the user                               ===*/
+/*====================================================================*/
+
+/* Print a message prefixed by "??<pid>?? "; '?' depends on the VgMsgKind.
+   Should be used for all user output. */
+
+typedef
+   enum { Vg_UserMsg,         /* '?' == '=' */
+          Vg_DebugMsg,        /* '?' == '-' */
+          Vg_DebugExtraMsg    /* '?' == '+' */
+   }
+   VgMsgKind;
+
+/* Functions for building a message from multiple parts. */
+extern void VG_(start_msg)  ( VgMsgKind kind );
+extern void VG_(add_to_msg) ( Char* format, ... );
+/* Ends and prints the message.  Appends a newline. */
+extern void VG_(end_msg)    ( void );
+
+/* Send a simple, single-part message.  Appends a newline. */
+extern void VG_(message)    ( VgMsgKind kind, Char* format, ... );
+
+
+/*====================================================================*/
+/*=== Profiling                                                    ===*/
+/*====================================================================*/
+
+/* Nb: VGP_(register_profile_event)() relies on VgpUnc being the first one */
+#define VGP_CORE_LIST \
+   /* These ones depend on the core */                \
+   VGP_PAIR(VgpUnc,         "unclassified"),          \
+   VGP_PAIR(VgpRun,         "running"),               \
+   VGP_PAIR(VgpSched,       "scheduler"),             \
+   VGP_PAIR(VgpMalloc,      "low-lev malloc/free"),   \
+   VGP_PAIR(VgpCliMalloc,   "client  malloc/free"),   \
+   VGP_PAIR(VgpStack,       "adjust-stack"),          \
+   VGP_PAIR(VgpTranslate,   "translate-main"),        \
+   VGP_PAIR(VgpToUCode,     "to-ucode"),              \
+   VGP_PAIR(VgpFromUcode,   "from-ucode"),            \
+   VGP_PAIR(VgpImprove,     "improve"),               \
+   VGP_PAIR(VgpRegAlloc,    "reg-alloc"),             \
+   VGP_PAIR(VgpLiveness,    "liveness-analysis"),     \
+   VGP_PAIR(VgpDoLRU,       "do-lru"),                \
+   VGP_PAIR(VgpSlowFindT,   "slow-search-transtab"),  \
+   VGP_PAIR(VgpInitMem,     "init-memory"),           \
+   VGP_PAIR(VgpExeContext,  "exe-context"),           \
+   VGP_PAIR(VgpReadSyms,    "read-syms"),             \
+   VGP_PAIR(VgpSearchSyms,  "search-syms"),           \
+   VGP_PAIR(VgpAddToT,      "add-to-transtab"),       \
+   VGP_PAIR(VgpCoreSysWrap, "core-syscall-wrapper"),  \
+   VGP_PAIR(VgpDemangle,    "demangle"),              \
+   /* These ones depend on the skin */                \
+   VGP_PAIR(VgpPreCloInit,  "pre-clo-init"),          \
+   VGP_PAIR(VgpPostCloInit, "post-clo-init"),         \
+   VGP_PAIR(VgpInstrument,  "instrument"),            \
+   VGP_PAIR(VgpSkinSysWrap, "skin-syscall-wrapper"),  \
+   VGP_PAIR(VgpFini,        "fini")
+
+#define VGP_PAIR(n,name) n
+typedef enum { VGP_CORE_LIST } VgpCoreCC;
+#undef  VGP_PAIR
+
+/* When registering skin profiling events, ensure that the 'n' value is in
+ * the range (VgpFini+1..) */
+extern void VGP_(register_profile_event) ( Int n, Char* name );
+
+extern void VGP_(pushcc) ( UInt cc );
+extern void VGP_(popcc)  ( UInt cc );
+
+/* Define them only if they haven't already been defined by vg_profile.c */
+#ifndef VGP_PUSHCC
+#  define VGP_PUSHCC(x)
+#endif
+#ifndef VGP_POPCC
+#  define VGP_POPCC(x)
+#endif
+
+
+/*====================================================================*/
+/*=== Useful stuff to call from generated code                     ===*/
+/*====================================================================*/
+
+/* ------------------------------------------------------------------ */
+/* General stuff */
+
+/* Get the simulated %esp */
+extern Addr VG_(get_stack_pointer) ( void );
+
+/* Detect if an address is within Valgrind's stack */
+extern Bool VG_(within_stack)(Addr a);
+
+/* Detect if an address is in Valgrind's m_state_static */
+extern Bool VG_(within_m_state_static)(Addr a);
+
+/* Check if an address is 4-byte aligned */
+#define IS_ALIGNED4_ADDR(aaa_p) (0 == (((UInt)(aaa_p)) & 3))
+
+
+/* ------------------------------------------------------------------ */
+/* Thread-related stuff */
+
+/* Special magic value for an invalid ThreadId.  It corresponds to
+   LinuxThreads using zero as the initial value for
+   pthread_mutex_t.__m_owner and pthread_cond_t.__c_waiting. */
+#define VG_INVALID_THREADID ((ThreadId)(0))
+
+/* ThreadIds are simply indices into the vg_threads[] array. */
+typedef 
+   UInt 
+   ThreadId;
+
+typedef
+   struct _ThreadState
+   ThreadState;
+
+extern ThreadId     VG_(get_current_tid_1_if_root) ( void );
+extern ThreadState* VG_(get_ThreadState)           ( ThreadId tid );
+
+
+/*====================================================================*/
+/*=== Valgrind's version of libc                                   ===*/
+/*====================================================================*/
+
+/* Valgrind doesn't use libc at all, for good reasons (trust us).  So here
+   are its own versions of C library functions, but with VG_ prefixes.  Note
+   that the types of some are slightly different to the real ones.  Some
+   extra useful functions are provided too; descriptions of how they work
+   are given below. */
+
+#if !defined(NULL)
+#  define NULL ((void*)0)
+#endif
+
+
+/* ------------------------------------------------------------------ */
+/* stdio.h
+ *
+ * Note that they all output to the file descriptor given by the
+ * --logfile-fd=N argument, which defaults to 2 (stderr).  Hence no
+ * need for VG_(fprintf)().  
+ *
+ * Also note that VG_(printf)() and VG_(vprintf)()
+ */
+extern void VG_(printf)  ( const char *format, ... );
+/* too noisy ...  __attribute__ ((format (printf, 1, 2))) ; */
+extern void VG_(sprintf) ( Char* buf, Char *format, ... );
+extern void VG_(vprintf) ( void(*send)(Char), 
+                           const Char *format, va_list vargs );
+
+/* ------------------------------------------------------------------ */
+/* stdlib.h */
+
+extern void* VG_(malloc)         ( Int nbytes );
+extern void  VG_(free)           ( void* ptr );
+extern void* VG_(calloc)         ( Int nmemb, Int nbytes );
+extern void* VG_(realloc)        ( void* ptr, Int size );
+extern void* VG_(malloc_aligned) ( Int req_alignB, Int req_pszB );
+
+extern void  VG_(print_malloc_stats) ( void );
+
+
+extern void  VG_(exit)( Int status )
+             __attribute__ ((__noreturn__));
+/* Print a (panic) message (constant string) appending newline, and abort. */
+extern void  VG_(panic) ( Char* str )
+             __attribute__ ((__noreturn__));
+
+/* Looks up VG_(client_envp) (above) */
+extern Char* VG_(getenv) ( Char* name );
+
+/* Crude stand-in for the glibc system() call. */
+extern Int   VG_(system) ( Char* cmd );
+
+extern Long  VG_(atoll)   ( Char* str );
+
+/* Like atoll(), but converts a number of base 2..36 */
+extern Long  VG_(atoll36) ( UInt base, Char* str );
+
+
+/* ------------------------------------------------------------------ */
+/* ctype.h functions and related */
+extern Bool VG_(isspace) ( Char c );
+extern Bool VG_(isdigit) ( Char c );
+extern Char VG_(toupper) ( Char c );
+
+
+/* ------------------------------------------------------------------ */
+/* string.h */
+extern Int   VG_(strlen)         ( const Char* str );
+extern Char* VG_(strcat)         ( Char* dest, const Char* src );
+extern Char* VG_(strncat)        ( Char* dest, const Char* src, Int n );
+extern Char* VG_(strpbrk)        ( const Char* s, const Char* accept );
+extern Char* VG_(strcpy)         ( Char* dest, const Char* src );
+extern Char* VG_(strncpy)        ( Char* dest, const Char* src, Int ndest );
+extern Int   VG_(strcmp)         ( const Char* s1, const Char* s2 );
+extern Int   VG_(strncmp)        ( const Char* s1, const Char* s2, Int nmax );
+extern Char* VG_(strstr)         ( const Char* haystack, Char* needle );
+extern Char* VG_(strchr)         ( const Char* s, Char c );
+extern Char* VG_(strdup)         ( const Char* s);
+
+/* Like strcmp(),  but stops comparing at any whitespace. */
+extern Int   VG_(strcmp_ws)      ( const Char* s1, const Char* s2 );
+
+/* Like strncmp(), but stops comparing at any whitespace. */
+extern Int   VG_(strncmp_ws)     ( const Char* s1, const Char* s2, Int nmax );
+
+/* Like strncpy(), but if 'src' is longer than 'ndest' inserts a '\0' at the 
+   Nth character. */
+extern void  VG_(strncpy_safely) ( Char* dest, const Char* src, Int ndest );
+
+/* Mini-regexp function.  Searches for 'pat' in 'str'.  Supports
+ * meta-symbols '*' and '?'.  '\' escapes meta-symbols. */
+extern Bool  VG_(stringMatch)    ( Char* pat, Char* str );
+
+
+/* ------------------------------------------------------------------ */
+/* math.h */
+/* Returns the base-2 logarithm of its argument. */
+extern Int VG_(log2) ( Int x );
+
+
+/* ------------------------------------------------------------------ */
+/* unistd.h */
+extern Int   VG_(getpid) ( void );
+
+
+/* ------------------------------------------------------------------ */
+/* assert.h */
+/* Asserts permanently enabled -- no turning off with NDEBUG.  Hurrah! */
+#define VG__STRING(__str)  #__str
+
+#define vg_assert(expr)                                               \
+  ((void) ((expr) ? 0 :						      \
+	   (VG_(assert_fail) (VG__STRING(expr),			      \
+			      __FILE__, __LINE__,                     \
+                              __PRETTY_FUNCTION__), 0)))
+
+extern void VG_(assert_fail) ( Char* expr, Char* file, 
+                               Int line, Char* fn )
+            __attribute__ ((__noreturn__));
+
+
+/* ------------------------------------------------------------------ */
+/* Reading and writing files. */
+
+/* As per the system calls */
+extern Int  VG_(open)  ( const Char* pathname, Int flags, Int mode );
+extern Int  VG_(read)  ( Int fd, void* buf, Int count);
+extern Int  VG_(write) ( Int fd, void* buf, Int count);
+extern void VG_(close) ( Int fd );
+
+extern Int  VG_(stat)  ( Char* file_name, struct vki_stat* buf );
+
+
+/* ------------------------------------------------------------------ */
+/* mmap and related functions ... */
+extern void* VG_(mmap)( void* start, UInt length, 
+                        UInt prot, UInt flags, UInt fd, UInt offset );
+extern Int  VG_(munmap)( void* start, Int length );
+
+/* Get memory by anonymous mmap. */
+extern void* VG_(get_memory_from_mmap) ( Int nBytes, Char* who );
+
+
+/* ------------------------------------------------------------------ */
+/* signal.h.  
+  
+   Note that these use the vk_ (kernel) structure
+   definitions, which are different in places from those that glibc
+   defines -- hence the 'k' prefix.  Since we're operating right at the
+   kernel interface, glibc's view of the world is entirely irrelevant. */
+
+/* --- Signal set ops --- */
+extern Int  VG_(ksigfillset)( vki_ksigset_t* set );
+extern Int  VG_(ksigemptyset)( vki_ksigset_t* set );
+
+extern Bool VG_(kisfullsigset)( vki_ksigset_t* set );
+extern Bool VG_(kisemptysigset)( vki_ksigset_t* set );
+
+extern Int  VG_(ksigaddset)( vki_ksigset_t* set, Int signum );
+extern Int  VG_(ksigdelset)( vki_ksigset_t* set, Int signum );
+extern Int  VG_(ksigismember) ( vki_ksigset_t* set, Int signum );
+
+extern void VG_(ksigaddset_from_set)( vki_ksigset_t* dst, 
+                                      vki_ksigset_t* src );
+extern void VG_(ksigdelset_from_set)( vki_ksigset_t* dst, 
+                                      vki_ksigset_t* src );
+
+/* --- Mess with the kernel's sig state --- */
+extern Int VG_(ksigprocmask)( Int how, const vki_ksigset_t* set, 
+                                       vki_ksigset_t* oldset );
+extern Int VG_(ksigaction) ( Int signum,  
+                             const vki_ksigaction* act,  
+                             vki_ksigaction* oldact );
+
+extern Int VG_(ksignal)(Int signum, void (*sighandler)(Int));
+
+extern Int VG_(ksigaltstack)( const vki_kstack_t* ss, vki_kstack_t* oss );
+
+extern Int VG_(kill)( Int pid, Int signo );
+extern Int VG_(sigpending) ( vki_ksigset_t* set );
+
+
+/*====================================================================*/
+/*=== UCode definition                                             ===*/
+/*====================================================================*/
+
+/* Tags which describe what operands are. */
+typedef
+   enum { TempReg=0, ArchReg=1, RealReg=2, 
+          SpillNo=3, Literal=4, Lit16=5, 
+          NoValue=6 }
+   Tag;
+
+/* Invalid register numbers :-) */
+#define INVALID_TEMPREG 999999999
+#define INVALID_REALREG 999999999
+
+/* Microinstruction opcodes. */
+typedef
+   enum {
+      NOP,
+      GET,
+      PUT,
+      LOAD,
+      STORE,
+      MOV,
+      CMOV, /* Used for cmpxchg and cmov */
+      WIDEN,
+      JMP,
+
+      /* Read/write the %EFLAGS register into a TempReg. */
+      GETF, PUTF,
+
+      ADD, ADC, AND, OR,  XOR, SUB, SBB,
+      SHL, SHR, SAR, ROL, ROR, RCL, RCR,
+      NOT, NEG, INC, DEC, BSWAP,
+      CC2VAL,
+
+      /* Not strictly needed, but useful for making better
+         translations of address calculations. */
+      LEA1,  /* reg2 := const + reg1 */
+      LEA2,  /* reg3 := const + reg1 + reg2 * 1,2,4 or 8 */
+
+      /* not for translating x86 calls -- only to call helpers */
+      CALLM_S, CALLM_E, /* Mark start and end of push/pop sequences
+                           for CALLM. */
+      PUSH, POP, CLEAR, /* Add/remove/zap args for helpers. */
+      CALLM,  /* call to a machine-code helper */
+
+      /* For calling C functions of up to three arguments (or two if the
+         functions has a return value).  Arguments and return value must be
+         word-sized.  If you want to pass more arguments than this to a C
+         function you have to use global variables to fake it (eg. use
+         VG_(set_global_var)()).
+
+         Seven possibilities: 'arg1..3' show where args go, 'ret' shows
+         where return values go.
+        
+         CCALL(-,    -,    -   )    void f(void)
+         CCALL(arg1, -,    -   )    void f(UInt arg1)
+         CCALL(arg1, arg2, -   )    void f(UInt arg1, UInt arg2)
+         CCALL(arg1, arg2, arg3)    void f(UInt arg1, UInt arg2, UInt arg3)
+         CCALL(-,    -,    ret )    UInt f(UInt)
+         CCALL(arg1, -,    ret )    UInt f(UInt arg1)
+         CCALL(arg1, arg2, ret )    UInt f(UInt arg1, UInt arg2)
+       */
+      CCALL,
+
+      /* Hack for translating string (REP-) insns.  Jump to literal if
+         TempReg/RealReg is zero. */
+      JIFZ,
+
+      /* FPU ops which read/write mem or don't touch mem at all. */
+      FPU_R,
+      FPU_W,
+      FPU,
+
+      /* Advance the simulated %eip by some small (< 128) number. */
+      INCEIP,
+
+      /* Makes it easy for extended-UCode ops by doing:
+
+           enum { EU_OP1 = DUMMY_FINAL_OP + 1, ... } 
+   
+         WARNING: Do not add new opcodes after this one!  They can be added
+         before, though. */
+      DUMMY_FINAL_UOPCODE
+   }
+   Opcode;
+
+
+/* Condition codes, observing the Intel encoding.  CondAlways is an
+   extra. */
+typedef
+   enum {
+      CondO      = 0,  /* overflow           */
+      CondNO     = 1,  /* no overflow        */
+      CondB      = 2,  /* below              */
+      CondNB     = 3,  /* not below          */
+      CondZ      = 4,  /* zero               */
+      CondNZ     = 5,  /* not zero           */
+      CondBE     = 6,  /* below or equal     */
+      CondNBE    = 7,  /* not below or equal */
+      CondS      = 8,  /* negative           */
+      ConsNS     = 9,  /* not negative       */
+      CondP      = 10, /* parity even        */
+      CondNP     = 11, /* not parity even    */
+      CondL      = 12, /* jump less          */
+      CondNL     = 13, /* not less           */
+      CondLE     = 14, /* less or equal      */
+      CondNLE    = 15, /* not less or equal  */
+      CondAlways = 16  /* Jump always        */
+   } 
+   Condcode;
+
+
+/* Descriptions of additional properties of *unconditional* jumps. */
+typedef
+   enum {
+     JmpBoring=0,   /* boring unconditional jump */
+     JmpCall=1,     /* jump due to an x86 call insn */
+     JmpRet=2,      /* jump due to an x86 ret insn */
+     JmpSyscall=3,  /* do a system call, then jump */
+     JmpClientReq=4 /* do a client request, then jump */
+   }
+   JmpKind;
+
+
+/* Flags.  User-level code can only read/write O(verflow), S(ign),
+   Z(ero), A(ux-carry), C(arry), P(arity), and may also write
+   D(irection).  That's a total of 7 flags.  A FlagSet is a bitset,
+   thusly: 
+      76543210
+       DOSZACP
+   and bit 7 must always be zero since it is unused.
+*/
+typedef UChar FlagSet;
+
+#define FlagD (1<<6)
+#define FlagO (1<<5)
+#define FlagS (1<<4)
+#define FlagZ (1<<3)
+#define FlagA (1<<2)
+#define FlagC (1<<1)
+#define FlagP (1<<0)
+
+#define FlagsOSZACP (FlagO | FlagS | FlagZ | FlagA | FlagC | FlagP)
+#define FlagsOSZAP  (FlagO | FlagS | FlagZ | FlagA |         FlagP)
+#define FlagsOSZCP  (FlagO | FlagS | FlagZ |         FlagC | FlagP)
+#define FlagsOSACP  (FlagO | FlagS |         FlagA | FlagC | FlagP)
+#define FlagsSZACP  (        FlagS | FlagZ | FlagA | FlagC | FlagP)
+#define FlagsSZAP   (        FlagS | FlagZ | FlagA |         FlagP)
+#define FlagsZCP    (                FlagZ         | FlagC | FlagP)
+#define FlagsOC     (FlagO |                         FlagC        )
+#define FlagsAC     (                        FlagA | FlagC        )
+
+#define FlagsALL    (FlagsOSZACP | FlagD)
+#define FlagsEmpty  (FlagSet)0
+
+
+/* Liveness of general purpose registers, useful for code generation.
+   Reg rank order 0..N-1 corresponds to bits 0..N-1, ie. first
+   reg's liveness in bit 0, last reg's in bit N-1.  Note that
+   these rankings don't match the Intel register ordering. */
+typedef UInt RRegSet;
+
+#define ALL_RREGS_DEAD      0                           /* 0000...00b */
+#define ALL_RREGS_LIVE      (1 << (VG_MAX_REALREGS-1))  /* 0011...11b */
+#define UNIT_RREGSET(rank)  (1 << (rank))
+
+#define IS_RREG_LIVE(rank,rregs_live) (rregs_live & UNIT_RREGSET(rank))
+#define SET_RREG_LIVENESS(rank,rregs_live,b)       \
+   do { RRegSet unit = UNIT_RREGSET(rank);         \
+        if (b) rregs_live |= unit;                 \
+        else   rregs_live &= ~unit;                \
+   } while(0)
+
+
+/* A Micro (u)-instruction. */
+typedef
+   struct {
+      /* word 1 */
+      UInt    lit32;      /* 32-bit literal */
+
+      /* word 2 */
+      UShort  val1;       /* first operand */
+      UShort  val2;       /* second operand */
+
+      /* word 3 */
+      UShort  val3;       /* third operand */
+      UChar   opcode;     /* opcode */
+      UChar   size;       /* data transfer size */
+
+      /* word 4 */
+      FlagSet flags_r;    /* :: FlagSet */
+      FlagSet flags_w;    /* :: FlagSet */
+      UChar   tag1:4;     /* first  operand tag */
+      UChar   tag2:4;     /* second operand tag */
+      UChar   tag3:4;     /* third  operand tag */
+      UChar   extra4b:4;  /* Spare field, used by WIDEN for src
+                             -size, and by LEA2 for scale (1,2,4 or 8),
+                             and by JMPs for original x86 instr size */
+
+      /* word 5 */
+      UChar   cond;            /* condition, for jumps */
+      Bool    signed_widen:1;  /* signed or unsigned WIDEN ? */
+      JmpKind jmpkind:3;       /* additional properties of unconditional JMP */
+
+      /* Additional properties for UInstrs that call C functions:  
+           - CCALL
+           - PUT (when %ESP is the target)
+           - possibly skin-specific UInstrs
+      */
+      UChar   argc:2;          /* Number of args, max 3 */
+      UChar   regparms_n:2;    /* Number of args passed in registers */
+      Bool    has_ret_val:1;   /* Function has return value? */
+
+      /* RealReg liveness;  only sensical after reg alloc and liveness
+         analysis done.  This info is a little bit arch-specific --
+         VG_MAX_REALREGS can vary on different architectures.  Note that
+         to use this information requires converting between register ranks
+         and the Intel register numbers, using VG_(realRegNumToRank)()
+         and/or VG_(rankToRealRegNum)() */
+      RRegSet regs_live_after:VG_MAX_REALREGS; 
+   }
+   UInstr;
+
+
+/* Expandable arrays of uinstrs. */
+typedef 
+   struct { 
+      Int     used; 
+      Int     size; 
+      UInstr* instrs;
+      Int     nextTemp;
+   }
+   UCodeBlock;
+
+
+/*====================================================================*/
+/*=== Instrumenting UCode                                          ===*/
+/*====================================================================*/
+
+/* A structure for communicating TempReg and RealReg uses of UInstrs. */
+typedef
+   struct {
+      Int   num;
+      Bool  isWrite;
+   }
+   RegUse;
+
+/* Find what this instruction does to its regs.  Tag indicates whether we're
+ * considering TempRegs (pre-reg-alloc) or RealRegs (post-reg-alloc).
+ * Useful for analysis/optimisation passes. */
+extern Int  VG_(getRegUsage) ( UInstr* u, Tag tag, RegUse* arr );
+
+
+/* ------------------------------------------------------------------ */
+/* Used to register helper functions to be called from generated code */
+extern void VG_(register_compact_helper)    ( Addr a );
+extern void VG_(register_noncompact_helper) ( Addr a );
+
+
+/* ------------------------------------------------------------------ */
+/* Virtual register allocation */
+
+/* Get a new virtual register */
+extern Int   VG_(getNewTemp)     ( UCodeBlock* cb );
+
+/* Get a new virtual shadow register */
+extern Int   VG_(getNewShadow)   ( UCodeBlock* cb );
+
+/* Get a virtual register's corresponding virtual shadow register */
+#define SHADOW(tempreg)  ((tempreg)+1)
+
+
+/* ------------------------------------------------------------------ */
+/* Low-level UInstr builders */
+extern void VG_(newNOP)     ( UInstr* u );
+extern void VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz );
+extern void VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                               Tag tag1, UInt val1 );
+extern void VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                              Tag tag1, UInt val1,
+                              Tag tag2, UInt val2 );
+extern void VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                              Tag tag1, UInt val1,
+                              Tag tag2, UInt val2,
+                              Tag tag3, UInt val3 );
+extern void VG_(setFlagRW)  ( UInstr* u, 
+                               FlagSet fr, FlagSet fw );
+extern void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 );
+extern void VG_(setCCallFields)  ( UCodeBlock* cb, Addr fn, UChar argc,
+                                   UChar regparms_n, Bool has_ret_val );
+
+extern void VG_(copyUInstr) ( UCodeBlock* cb, UInstr* instr );
+
+extern Bool VG_(anyFlagUse) ( UInstr* u );
+
+/* Refer to `the last instruction stuffed in' (can be lvalue). */
+#define LAST_UINSTR(cb) (cb)->instrs[(cb)->used-1]
+
+
+/* ------------------------------------------------------------------ */
+/* Higher-level UInstr sequence builders */
+extern void VG_(callHelper_0_0) ( UCodeBlock* cb, Addr f);
+extern void VG_(callHelper_1_0) ( UCodeBlock* cb, Addr f, UInt arg1,
+                                  UInt regparms_n);
+extern void VG_(callHelper_2_0) ( UCodeBlock* cb, Addr f, UInt arg1, UInt arg2,
+                                  UInt regparms_n);
+
+/* One way around the 3-arg C function limit is to pass args via global
+ * variables... ugly, but it works. */
+void VG_(set_global_var) ( UCodeBlock* cb, Addr globvar_ptr, UInt val);
+
+/* ------------------------------------------------------------------ */
+/* UCode pretty/ugly printing, to help debugging skins;  but only useful
+   if VG_(needs).extended_UCode == True. */
+
+/* When True, all generated code is/should be printed. */
+extern Bool  VG_(print_codegen);
+
+extern void  VG_(ppUCodeBlock)     ( UCodeBlock* cb, Char* title );
+extern void  VG_(ppUInstr)         ( Int instrNo, UInstr* u );
+extern void  VG_(ppUInstrWithRegs) ( Int instrNo, UInstr* u );
+extern void  VG_(upUInstr)         ( Int instrNo, UInstr* u );
+extern Char* VG_(nameUOpcode)      ( Bool upper, Opcode opc );
+extern void  VG_(ppUOperand)       ( UInstr* u, Int operandNo, 
+                                     Int sz, Bool parens );
+
+/* ------------------------------------------------------------------ */
+/* Allocating/freeing basic blocks of UCode */
+extern UCodeBlock* VG_(allocCodeBlock) ( void );
+extern void  VG_(freeCodeBlock)        ( UCodeBlock* cb );
+
+/*====================================================================*/
+/*=== Functions for generating x86 code from UCode                 ===*/
+/*====================================================================*/
+
+/* These are only of interest for skins where 
+   VG_(needs).extends_UCode == True. */
+
+/* This is the Intel register encoding. */
+#define R_EAX 0
+#define R_ECX 1
+#define R_EDX 2
+#define R_EBX 3
+#define R_ESP 4
+#define R_EBP 5
+#define R_ESI 6
+#define R_EDI 7
+
+#define R_AL (0+R_EAX)
+#define R_CL (0+R_ECX)
+#define R_DL (0+R_EDX)
+#define R_BL (0+R_EBX)
+#define R_AH (4+R_EAX)
+#define R_CH (4+R_ECX)
+#define R_DH (4+R_EDX)
+#define R_BH (4+R_EBX)
+
+/* For pretty printing x86 code */
+extern Char* VG_(nameOfIntReg)   ( Int size, Int reg );
+extern Char  VG_(nameOfIntSize)  ( Int size );
+
+/* Randomly useful things */
+extern UInt  VG_(extend_s_8to32) ( UInt x );
+
+/* Code emitters */
+extern void VG_(emitB)  ( UInt b );
+extern void VG_(emitW)  ( UInt w );
+extern void VG_(emitL)  ( UInt l );
+extern void VG_(newEmit)( void );
+
+/* Finding offsets */
+extern Int  VG_(helper_offset)     ( Addr a );
+extern Int  VG_(shadowRegOffset)   ( Int arch );
+extern Int  VG_(shadowFlagsOffset) ( void );
+
+/* Converting reg ranks <-> Intel register ordering, for using register
+   liveness info */
+extern Int VG_(realRegNumToRank) ( Int realReg );
+extern Int VG_(rankToRealRegNum) ( Int rank    );
+
+/* Subroutine calls */
+/* This one just calls it. */
+void VG_(synth_call) ( Bool ensure_shortform, Int word_offset );
+
+/* This one is good for calling C functions -- saves caller save regs,
+   pushes args, calls, clears the stack, restores caller save regs.
+   `fn' must be registered in the baseBlock first.  Acceptable tags are
+   RealReg and Literal.  
+
+   WARNING:  a UInstr should *not* be translated with synth_ccall followed
+   by some other x86 assembly code;  this will confuse
+   vg_ccall_reg_save_analysis() and everything will fall over.
+*/
+void VG_(synth_ccall) ( Addr fn, Int argc, Int regparms_n, UInt argv[],
+                        Tag tagv[], Int ret_reg, 
+                        RRegSet regs_live_before, RRegSet regs_live_after );
+
+/* Addressing modes */
+void VG_(emit_amode_offregmem_reg) ( Int off, Int regmem, Int reg );
+void VG_(emit_amode_ereg_greg)     ( Int e_reg, Int g_reg );
+
+/* v-size (4, or 2 with OSO) insn emitters */
+void VG_(emit_movv_offregmem_reg) ( Int sz, Int off, Int areg, Int reg );
+void VG_(emit_movv_reg_offregmem) ( Int sz, Int reg, Int off, Int areg );
+void VG_(emit_movv_reg_reg)       ( Int sz, Int reg1, Int reg2 );
+void VG_(emit_nonshiftopv_lit_reg)( Int sz, Opcode opc, UInt lit, Int reg );
+void VG_(emit_shiftopv_lit_reg)   ( Int sz, Opcode opc, UInt lit, Int reg );
+void VG_(emit_nonshiftopv_reg_reg)( Int sz, Opcode opc, Int reg1, Int reg2 );
+void VG_(emit_movv_lit_reg)       ( Int sz, UInt lit, Int reg );
+void VG_(emit_unaryopv_reg)       ( Int sz, Opcode opc, Int reg );
+void VG_(emit_pushv_reg)          ( Int sz, Int reg );
+void VG_(emit_popv_reg)           ( Int sz, Int reg );
+
+void VG_(emit_pushl_lit32)        ( UInt int32 );
+void VG_(emit_pushl_lit8)         ( Int lit8 );
+void VG_(emit_cmpl_zero_reg)      ( Int reg );
+void VG_(emit_swapl_reg_EAX)      ( Int reg );
+void VG_(emit_movv_lit_offregmem) ( Int sz, UInt lit, Int off, Int memreg );
+
+/* b-size (1 byte) instruction emitters */
+void VG_(emit_movb_lit_offregmem) ( UInt lit, Int off, Int memreg );
+void VG_(emit_movb_reg_offregmem) ( Int reg, Int off, Int areg );
+void VG_(emit_unaryopb_reg)       ( Opcode opc, Int reg );
+void VG_(emit_testb_lit_reg)      ( UInt lit, Int reg );
+
+/* zero-extended load emitters */
+void VG_(emit_movzbl_offregmem_reg) ( Int off, Int regmem, Int reg );
+void VG_(emit_movzwl_offregmem_reg) ( Int off, Int areg, Int reg );
+
+/* misc instruction emitters */
+void VG_(emit_call_reg)         ( Int reg );
+void VG_(emit_add_lit_to_esp)   ( Int lit );
+void VG_(emit_jcondshort_delta) ( Condcode cond, Int delta );
+void VG_(emit_pushal)           ( void );
+void VG_(emit_popal)            ( void );
+void VG_(emit_AMD_prefetch_reg) ( Int reg );
+
+
+/*====================================================================*/
+/*=== Execution contexts                                           ===*/
+/*====================================================================*/
+
+/* Generic resolution type used in a few different ways, such as deciding
+   how closely to compare two errors for equality. */
+typedef 
+   enum { Vg_LowRes, Vg_MedRes, Vg_HighRes } 
+   VgRes;
+
+typedef
+   struct _ExeContext
+   ExeContext;
+
+/* Compare two ExeContexts, just comparing the top two callers. */
+extern Bool VG_(eq_ExeContext) ( VgRes res,
+                                 ExeContext* e1, ExeContext* e2 );
+
+/* Print an ExeContext. */
+extern void VG_(pp_ExeContext) ( ExeContext* );
+
+/* Take a snapshot of the client's stack.  Search our collection of
+   ExeContexts to see if we already have it, and if not, allocate a
+   new one.  Either way, return a pointer to the context. */
+extern ExeContext* VG_(get_ExeContext) ( ThreadState *tst );
+
+
+/*====================================================================*/
+/*=== Error reporting                                              ===*/
+/*====================================================================*/
+
+/* ------------------------------------------------------------------ */
+/* Suppressions describe errors which we want to suppress, ie, not 
+   show the user, usually because it is caused by a problem in a library
+   which we can't fix, replace or work around.  Suppressions are read from 
+   a file at startup time, specified by vg_clo_suppressions, and placed in
+   the vg_suppressions list.  This gives flexibility so that new
+   suppressions can be added to the file as and when needed.
+*/
+
+typedef
+   Int         /* Do not make this unsigned! */
+   SuppKind;
+
+/* An extensible (via the 'extra' field) suppression record.  This holds
+   the suppression details of interest to a skin.  Skins can use a normal
+   enum (with element values in the normal range (0..)) for `skind'. 
+
+   If VG_(needs).report_errors==True, for each suppression read in by core
+   SKN_(recognised_suppression)() and SKN_(read_extra_suppression_info) will
+   be called.  The `skind' field is filled in by the value returned in the
+   argument of the first function;  the second function can fill in the
+   `string' and `extra' fields if it wants. 
+*/
+typedef
+   struct {
+      /* What kind of suppression.  Must use the range (0..) */
+      SuppKind skind;
+      /* String -- use is optional.  NULL by default. */
+      Char* string;
+      /* Anything else -- use is optional.  NULL by default. */
+      void* extra;
+   }
+   SkinSupp;
+
+
+/* ------------------------------------------------------------------ */
+/* Error records contain enough info to generate an error report.  The idea
+   is that (typically) the same few points in the program generate thousands
+   of illegal accesses, and we don't want to spew out a fresh error message
+   for each one.  Instead, we use these structures to common up duplicates.
+*/
+
+typedef
+   Int         /* Do not make this unsigned! */
+   ErrorKind;
+
+/* An extensible (via the 'extra' field) error record.  This holds
+   the error details of interest to a skin.  Skins can use a normal
+   enum (with element values in the normal range (0..)) for `ekind'. 
+
+   When errors are found and recorded with VG_(maybe_record_error)(), all
+   the skin must do is pass in the four parameters;  core will
+   allocate/initialise the error record.
+*/
+typedef
+   struct {
+      /* Used by ALL.  Must be in the range (0..) */
+      Int ekind;
+      /* Used frequently */
+      Addr addr;
+      /* Used frequently */
+      Char* string;
+      /* For any skin-specific extras: size and the extra fields */
+      void* extra;
+   }
+   SkinError;
+
+
+/* ------------------------------------------------------------------ */
+/* Call this when an error occurs.  It will be recorded if it's not been
+   seen before.  If it has, the existing error record will have its count
+   incremented.  
+   
+   If the error occurs in generated code, 'tst' should be NULL.  If the
+   error occurs in non-generated code, 'tst' should be non-NULL.  The
+   `extra' field can be stack-allocated;  it will be copied (using
+   SKN_(dup_extra_and_update)()) if needed.  But it won't be copied
+   if it's NULL.
+*/
+extern void VG_(maybe_record_error) ( ThreadState* tst, ErrorKind ekind, 
+                                      Addr a, Char* s, void* extra );
+
+/* Gets a non-blank, non-comment line of at most nBuf chars from fd.
+   Skips leading spaces on the line.  Returns True if EOF was hit instead. 
+   Useful for reading in extra skin-specific suppression lines.
+*/
+extern Bool VG_(getLine) ( Int fd, Char* buf, Int nBuf );
+
+
+/*====================================================================*/
+/*=== Obtaining debug information                                  ===*/
+/*====================================================================*/
+
+/* Get the file/function/line number of the instruction at address 'a'. 
+   For these four, if debug info for the address is found, it copies the
+   info into the buffer/UInt and returns True.  If not, it returns False and
+   nothing is copied.  VG_(get_fnname) always demangles C++ function names.
+*/
+extern Bool VG_(get_filename) ( Addr a, Char* filename, Int n_filename );
+extern Bool VG_(get_fnname)   ( Addr a, Char* fnname,   Int n_fnname   );
+extern Bool VG_(get_linenum)  ( Addr a, UInt* linenum );
+
+/* This one is more efficient if getting both filename and line number,
+   because the two lookups are done together. */
+extern Bool VG_(get_filename_linenum) 
+                              ( Addr a, Char* filename, Int n_filename,
+                                        UInt* linenum );
+
+/* Succeeds only if we find from debug info that 'a' is the address of the
+   first instruction in a function -- as opposed to VG_(get_fnname) which
+   succeeds if we find from debug info that 'a' is the address of any
+   instruction in a function.  Use this to instrument the start of
+   a particular function.  Nb: if a executable/shared object is stripped
+   of its symbols, this function will not be able to recognise function
+   entry points within it. */
+extern Bool VG_(get_fnname_if_entry) ( Addr a, Char* filename, Int n_filename );
+
+/* Succeeds if the address is within a shared object or the main executable.
+   It doesn't matter if debug info is present or not. */
+extern Bool VG_(get_objname)  ( Addr a, Char* objname,  Int n_objname  );
+
+
+/*====================================================================*/
+/*=== Shadow chunks and block-finding                              ===*/
+/*====================================================================*/
+
+typedef
+   enum { 
+      Vg_AllocMalloc = 0,
+      Vg_AllocNew    = 1,
+      Vg_AllocNewVec = 2 
+   }
+   VgAllocKind;
+
+/* Description of a malloc'd chunk.  skin_extra[] part can be used by
+   the skin;  size of array is given by VG_(needs).sizeof_shadow_chunk. */
+typedef 
+   struct _ShadowChunk {
+      struct _ShadowChunk* next;
+      UInt          size : 30;      /* size requested                   */
+      VgAllocKind   allockind : 2;  /* which wrapper did the allocation */
+      Addr          data;           /* ptr to actual block              */
+      UInt          skin_extra[0];  /* extra skin-specific info         */
+   } 
+   ShadowChunk;
+
+/* Use this to free blocks if VG_(needs).alternative_free == True. 
+   It frees the ShadowChunk and the malloc'd block it points to. */
+extern void VG_(freeShadowChunk) ( ShadowChunk* sc );
+
+/* Makes an array of pointers to all the shadow chunks of malloc'd blocks */
+extern ShadowChunk** VG_(get_malloc_shadows) ( /*OUT*/ UInt* n_shadows );
+
+/* Determines if address 'a' is within the bounds of the block at start.
+   Allows a little 'slop' round the edges. */
+extern Bool VG_(addr_is_in_block) ( Addr a, Addr start, UInt size );
+
+/* Searches through currently malloc'd blocks until a matching one is found.
+   Returns NULL if none match.  Extra arguments can be implicitly passed to
+   p using nested functions; see vg_memcheck_errcontext.c for an example. */
+extern ShadowChunk* VG_(any_matching_mallocd_ShadowChunks) 
+                        ( Bool (*p) ( ShadowChunk* ));
+
+/* Searches through all thread's stacks to see if any match.  Returns
+ * VG_INVALID_THREADID if none match. */
+extern ThreadId VG_(any_matching_thread_stack)
+                        ( Bool (*p) ( Addr stack_min, Addr stack_max ));
+
+/*====================================================================*/
+/*=== Skin-specific stuff                                          ===*/
+/*====================================================================*/
+
+/* Skin-specific settings.
+ *
+ * If new fields are added to this type, update:
+ *  - vg_main.c:VG_(needs) initialisation
+ *  - vg_main.c:sanity_check_needs()
+ *
+ * If the name of this type or any of its fields change, update:
+ *  - dependent comments (just search for "VG_(needs)"). 
+ */
+typedef
+   struct {
+      /* name and description used in the startup message */
+      Char* name;
+      Char* description;
+
+      /* Booleans that decide core behaviour */
+
+      /* Want to have errors detected by Valgrind's core reported?  Includes:
+         - pthread API errors (many;  eg. unlocking a non-locked mutex)
+         - silly arguments to malloc() et al (eg. negative size)
+         - invalid file descriptors to blocking syscalls read() and write()
+         - bad signal numbers passed to sigaction()
+         - attempt to install signal handler for SIGKILL or SIGSTOP */  
+      Bool core_errors;
+      /* Want to report errors from the skin?  This implies use of
+         suppressions, too. */
+      Bool skin_errors;
+
+      /* Should __libc_freeres() be run?  Bugs in it crash the skin. */
+      Bool run_libc_freeres;
+
+      /* Booleans that indicate extra operations are defined;  if these are
+         True, the corresponding template functions (given below) must be
+         defined.  A lot like being a member of a type class. */
+
+      /* Is information kept about specific individual basic blocks?  (Eg. for
+         cachesim there are cost-centres for every instruction, stored at a
+         basic block level.)  If so, it sometimes has to be discarded, because
+         .so mmap/munmap-ping or self-modifying code (informed by the
+         DISCARD_TRANSLATIONS user request) can cause one instruction address
+         to store information about more than one instruction in one program
+         run!  */
+      Bool basic_block_discards;
+
+      /* Maintains information about each register? */
+      Bool shadow_regs;
+
+      /* Skin defines its own command line options? */
+      Bool command_line_options;
+      /* Skin defines its own client requests? */
+      Bool client_requests;
+
+      /* Skin defines its own UInstrs? */
+      Bool extended_UCode;
+
+      /* Skin does stuff before and/or after system calls? */
+      Bool syscall_wrapper;
+
+      /* Size, in words, of extra info about malloc'd blocks recorded by
+         skin.  Be careful to get this right or you'll get seg faults! */
+      UInt sizeof_shadow_block;
+
+      /* Skin does free()s itself? */
+      Bool alternative_free;
+
+      /* Are skin-state sanity checks performed? */
+      Bool sanity_checks;
+   } 
+   VgNeeds;
+
+extern VgNeeds VG_(needs);
+
+
+/* ------------------------------------------------------------------ */
+/* Core events to track */
+
+/* Part of the core from which this call was made.  Useful for determining
+ * what kind of error message should be emitted. */
+typedef 
+   enum { Vg_CorePThread, Vg_CoreSignal, Vg_CoreSysCall, Vg_CoreTranslate }
+   CorePart;
+
+/* Events happening in core to track.  To be notified, assign a function
+ * to the function pointer.  To ignore an event, don't do anything
+ * (default assignment is to NULL in which case the call is skipped). */
+typedef
+   struct {
+      /* Memory events */
+      void (*new_mem_startup)( Addr a, UInt len, Bool rr, Bool ww, Bool xx );
+      void (*new_mem_heap)   ( Addr a, UInt len, Bool is_inited );
+      void (*new_mem_stack)  ( Addr a, UInt len );
+      void (*new_mem_stack_aligned) ( Addr a, UInt len );
+      void (*new_mem_stack_signal)  ( Addr a, UInt len );
+      void (*new_mem_brk)    ( Addr a, UInt len );
+      void (*new_mem_mmap)   ( Addr a, UInt len, 
+                               Bool nn, Bool rr, Bool ww, Bool xx );
+
+      void (*copy_mem_heap)  ( Addr from, Addr to, UInt len );
+      void (*copy_mem_remap) ( Addr from, Addr to, UInt len );
+      void (*change_mem_mprotect) ( Addr a, UInt len,  
+                                    Bool nn, Bool rr, Bool ww, Bool xx );
+      
+      void (*ban_mem_heap)   ( Addr a, UInt len );
+      void (*ban_mem_stack)  ( Addr a, UInt len );
+
+      void (*die_mem_heap)   ( Addr a, UInt len );
+      void (*die_mem_stack)  ( Addr a, UInt len );
+      void (*die_mem_stack_aligned) ( Addr a, UInt len );
+      void (*die_mem_stack_signal)  ( Addr a, UInt len );
+      void (*die_mem_brk)    ( Addr a, UInt len );
+      void (*die_mem_munmap) ( Addr a, UInt len );
+
+      void (*bad_free)        ( ThreadState* tst, Addr a );
+      void (*mismatched_free) ( ThreadState* tst, Addr a );
+
+      void (*pre_mem_read)   ( CorePart part, ThreadState* tst,
+                               Char* s, Addr a, UInt size );
+      void (*pre_mem_read_asciiz) ( CorePart part, ThreadState* tst,
+                                    Char* s, Addr a );
+      void (*pre_mem_write)  ( CorePart part, ThreadState* tst,
+                               Char* s, Addr a, UInt size );
+      /* Not implemented yet -- have to add in lots of places, which is a
+         pain.  Won't bother unless/until there's a need. */
+      /* void (*post_mem_read)  ( ThreadState* tst, Char* s, 
+                                  Addr a, UInt size ); */
+      void (*post_mem_write) ( Addr a, UInt size );
+
+
+      /* Scheduler events */
+      void (*thread_run) ( ThreadId tid );
+
+
+      /* Mutex events */
+      void (*post_mutex_lock)   ( ThreadId tid, 
+                                  void* /*pthread_mutex_t* */ mutex );
+      void (*post_mutex_unlock) ( ThreadId tid, 
+                                  void* /*pthread_mutex_t* */ mutex );
+      
+      /* Others... threads, condition variables, etc... */
+
+      /* ... */
+   }
+   VgTrackEvents;
+
+/* Declare the struct instance */
+extern VgTrackEvents VG_(track_events);
+
+
+/* ------------------------------------------------------------------ */
+/* Template functions */
+
+/* These are the parameterised functions in the core.  The default definitions
+ * are replaced by LD_PRELOADing skin substitutes.  At the very least, a skin
+ * must define the fundamental template functions.  Depending on what needs
+ * boolean variables are set, extra templates will be used too.  For each
+ * group, the need governing its use is mentioned. */
+
+
+/* ------------------------------------------------------------------ */
+/* Fundamental template functions */
+
+/* Initialise skin.   Must do the following:
+     - initialise the 'needs' struct
+     - register any helpers called by generated code
+  
+   May do the following:
+     - indicate events to track by initialising part or all of the 'track'
+       struct
+     - register any skin-specific profiling events
+     - any other skin-specific initialisation
+*/
+extern void        SK_(pre_clo_init) ( VgNeeds* needs, VgTrackEvents* track );
+
+/* Do any initialisation that relies on the results of command line option
+   processing. */
+extern void        SK_(post_clo_init)( void );
+
+/* Instrument a basic block.  Must be a true function, ie. the same input
+   always results in the same output, because basic blocks can be
+   retranslated.  Unless you're doing something really strange...
+   'orig_addr' is the address of the first instruction in the block. */
+extern UCodeBlock* SK_(instrument)   ( UCodeBlock* cb, Addr orig_addr );
+
+/* Finish up, print out any results, etc. */
+extern void        SK_(fini)         ( void );
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).report_errors */
+
+/* Identify if two errors are equal, or equal enough.  `res' indicates how
+   close is "close enough".  `res' should be passed on as necessary, eg. if
+   the SkinError's extra field contains an ExeContext, `res' should be
+   passed to VG_(eq_ExeContext)() if the ExeContexts are considered.  Other
+   than that, probably don't worry about it unless you have lots of very
+   similar errors occurring.
+ */
+extern Bool SK_(eq_SkinError) ( VgRes res,
+                                SkinError* e1, SkinError* e2 );
+
+/* Print error context.  The passed function pp_ExeContext() can be (and
+   probably should be) used to print the location of the error. */
+extern void SK_(pp_SkinError) ( SkinError* ec, void (*pp_ExeContext)(void) );
+
+/* Copy the ec->extra part and replace ec->extra with the new copy.  This is
+   necessary to move from a temporary stack copy to a permanent heap one.
+  
+   Then fill in any details that could be postponed until after the decision
+   whether to ignore the error (ie. details not affecting the result of
+   SK_(eq_SkinError)()).  This saves time when errors are ignored.
+  
+   Yuk.
+*/
+extern void SK_(dup_extra_and_update)(SkinError* ec);
+
+/* Return value indicates recognition.  If recognised, type goes in `skind'. */
+extern Bool SK_(recognised_suppression) ( Char* name, SuppKind *skind );
+
+/* Read any extra info for this suppression kind.  For filling up the
+   `string' and `extra' fields in a `SkinSupp' struct if necessary. */
+extern Bool SK_(read_extra_suppression_info) ( Int fd, Char* buf, 
+                                                Int nBuf, SkinSupp *s );
+
+/* This should just check the kinds match and maybe some stuff in the
+   'extra' field if appropriate */
+extern Bool SK_(error_matches_suppression)(SkinError* ec, SkinSupp* su);
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).basic_block_discards */
+
+extern void SK_(discard_basic_block_info) ( Addr a, UInt size );
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).shadow_regs */
+
+/* Valid values for general registers and EFLAGS register, for initialising
+   and updating registers when written in certain places in core. */
+extern void SK_(written_shadow_regs_values) ( UInt* gen_reg, UInt* eflags );
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).command_line_options */
+
+/* Return True if option was recognised */
+extern Bool SK_(process_cmd_line_option)( Char* argv );
+
+/* Print out command line usage for skin options */
+extern Char* SK_(usage)                  ( void );
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).client_requests */
+
+extern UInt SK_(handle_client_request) ( ThreadState* tst, UInt* arg_block );
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).extends_UCode */
+
+/* Used in VG_(getExtRegUsage)() */
+#  define VG_UINSTR_READS_REG(ono)              \
+   { if (mycat(u->tag,ono) == tag)              \
+        { arr[n].num     = mycat(u->val,ono);   \
+          arr[n].isWrite = False;               \
+          n++;                                  \
+        }                                       \
+   }
+#  define VG_UINSTR_WRITES_REG(ono)             \
+   {  if (mycat(u->tag,ono) == tag)             \
+         { arr[n].num     = mycat(u->val,ono);  \
+           arr[n].isWrite = True;               \
+           n++;                                 \
+         }                                      \
+   }
+
+// SSS: only ones using camel caps
+extern Int   SK_(getExtRegUsage) ( UInstr* u, Tag tag, RegUse* arr );
+extern void  SK_(emitExtUInstr)  ( UInstr* u, RRegSet regs_live_before );
+extern Bool  SK_(saneExtUInstr)  ( Bool beforeRA, Bool beforeLiveness,
+                                   UInstr* u );
+extern Char* SK_(nameExtUOpcode) ( Opcode opc );
+extern void  SK_(ppExtUInstr)    ( UInstr* u );
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).syscall_wrapper */
+
+/* If either of the pre_ functions malloc() something to return, the
+ * corresponding post_ function had better free() it! 
+ */ 
+extern void* SK_( pre_syscall) ( ThreadId tid, UInt syscallno,
+                                 Bool is_blocking );
+extern void  SK_(post_syscall) ( ThreadId tid, UInt syscallno,
+                                 void* pre_result, Int res,
+                                 Bool is_blocking );
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).sizeof_shadow_chunk > 0 */
+
+extern void SK_(complete_shadow_chunk) ( ShadowChunk* sc, ThreadState* tst );
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).alternative_free */
+
+extern void SK_(alt_free) ( ShadowChunk* sc, ThreadState* tst );
+
+/* ---------------------------------------------------------------------
+   VG_(needs).sanity_checks */
+
+extern Bool SK_(cheap_sanity_check)     ( void );
+extern Bool SK_(expensive_sanity_check) ( void );
+
+
+#endif   /* NDEF __VG_SKIN_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                vg_skin.h ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/lackey/Makefile.am b/lackey/Makefile.am
index 60553dd..96911ed 100644
--- a/lackey/Makefile.am
+++ b/lackey/Makefile.am
@@ -1,15 +1,17 @@
+
+
 SUBDIRS = demangle . docs tests
 
 CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \
-		-Winline -Wall -Wshadow -O -fomit-frame-pointer -g
+		-Winline -Wall -Wshadow -O -fomit-frame-pointer @PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
 
-LDFLAGS = -Wl,-z -Wl,initfirst
+#LDFLAGS = -Wl,-z -Wl,initfirst
 
 INCLUDES = -I$(srcdir)/demangle
 
-bin_SCRIPTS = valgrind cachegrind vg_annotate
+bin_SCRIPTS = valgrind vg_annotate
 
 SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
 
@@ -26,60 +28,103 @@
 	PATCHES_APPLIED ACKNOWLEDGEMENTS \
 	README_KDE3_FOLKS README_PACKAGERS \
 	README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \
-	valgrind.spec valgrind.spec.in
+	valgrind.spec valgrind.spec.in \
+	vg_profile.c \
+	vg_cachesim_I1.c vg_cachesim_D1.c vg_cachesim_L2.c vg_cachesim_gen.c
 
-val_PROGRAMS = valgrind.so valgrinq.so libpthread.so
+val_PROGRAMS = \
+	valgrind.so \
+	valgrinq.so \
+	libpthread.so \
+	vgskin_memcheck.so \
+	vgskin_cachesim.so \
+	vgskin_eraser.so \
+	vgskin_addrcheck.so \
+	vgskin_none.so \
+	vgskin_lackey.so \
+	vgskin_corecheck.so
 
-libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c
+libpthread_so_SOURCES = \
+	vg_libpthread.c \
+	vg_libpthread_unimp.c
+libpthread_so_DEPENDENCIES = $(srcdir)/vg_libpthread.vs
+libpthread_so_LDFLAGS	   = -Werror -fno-omit-frame-pointer -UVG_LIBDIR -shared -fpic -Wl,-version-script $(srcdir)/vg_libpthread.vs
 
 valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+valgrinq_so_LDFLAGS = -shared
 
 valgrind_so_SOURCES = \
 	vg_clientfuncs.c \
 	vg_scheduler.c \
-        vg_cachesim.c \
 	vg_clientmalloc.c \
-	vg_clientperms.c \
+	vg_default.c \
 	vg_demangle.c \
 	vg_dispatch.S \
 	vg_errcontext.c \
 	vg_execontext.c \
 	vg_from_ucode.c \
 	vg_helpers.S \
+	vg_instrument.c \
 	vg_main.c \
 	vg_malloc2.c \
 	vg_memory.c \
 	vg_messages.c \
 	vg_mylibc.c \
 	vg_procselfmaps.c \
-	vg_profile.c \
+	vg_dummy_profile.c \
 	vg_signals.c \
 	vg_startup.S \
 	vg_symtab2.c \
-	vg_syscall_mem.c \
+	vg_syscalls.c \
 	vg_syscall.S \
 	vg_to_ucode.c \
 	vg_translate.c \
-	vg_transtab.c \
-	vg_vtagops.c
-
+	vg_transtab.c
+valgrind_so_LDFLAGS = -Wl,-z -Wl,initfirst -shared
 valgrind_so_LDADD = \
 	demangle/cp-demangle.o \
 	demangle/cplus-dem.o \
 	demangle/dyn-string.o \
 	demangle/safe-ctype.o
 
+vgskin_memcheck_so_SOURCES = \
+	vg_memcheck.c \
+	vg_memcheck_clientreqs.c \
+	vg_memcheck_errcontext.c \
+	vg_memcheck_from_ucode.c \
+	vg_memcheck_translate.c \
+	vg_memcheck_helpers.S
+vgskin_memcheck_so_LDFLAGS = -shared
+
+vgskin_cachesim_so_SOURCES = vg_cachesim.c
+vgskin_cachesim_so_LDFLAGS = -shared
+
+vgskin_eraser_so_SOURCES = vg_eraser.c
+vgskin_eraser_so_LDFLAGS = -shared
+
+vgskin_addrcheck_so_SOURCES = vg_addrcheck.c
+vgskin_addrcheck_so_LDFLAGS = -shared
+
+vgskin_none_so_SOURCES 	 = vg_none.c
+vgskin_none_so_LDFLAGS   = -shared
+
+vgskin_lackey_so_SOURCES = vg_lackey.c
+vgskin_lackey_so_LDFLAGS = -shared
+
+vgskin_corecheck_so_SOURCES = vg_corecheck.c
+vgskin_corecheck_so_LDFLAGS = -shared
+
 include_HEADERS = valgrind.h
 
 noinst_HEADERS = \
-        vg_cachesim_gen.c       \
-        vg_cachesim_I1.c        \
-        vg_cachesim_D1.c        \
-        vg_cachesim_L2.c        \
         vg_kerneliface.h        \
         vg_include.h            \
+        vg_skin.h               \
         vg_constants.h          \
-        vg_unsafe.h
+        vg_constants_skin.h     \
+        vg_unsafe.h		\
+	vg_memcheck_include.h	\
+	vg_memcheck.h
 
 MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) 
 
@@ -92,19 +137,40 @@
 vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS)
 	$(COMPILE) -fno-omit-frame-pointer -c $<
 
-valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
-	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
-		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+##valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+##		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
 
-valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
-	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
+##valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
+##	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
 
-libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
-	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
-		$(libpthread_so_OBJECTS) \
-		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+##libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
+##	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
+##		$(libpthread_so_OBJECTS) \
+##		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+
+##vgskin_memcheck.so$(EXEEXT): $(vgskin_memcheck_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_memcheck.so \
+##		$(vgskin_memcheck_so_OBJECTS)
+
+##vgskin_cachesim.so$(EXEEXT): $(vgskin_cachesim_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_cachesim.so \
+##		$(vgskin_cachesim_so_OBJECTS)
+
+##vgskin_eraser.so$(EXEEXT): $(vgskin_eraser_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_eraser.so \
+##		$(vgskin_eraser_so_OBJECTS)
+
+##vgskin_none.so$(EXEEXT): $(vgskin_none_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_none.so \
+##		$(vgskin_none_so_OBJECTS)
+
+##vgskin_lackey.so$(EXEEXT): $(vgskin_lackey_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_lackey.so \
+##		$(vgskin_lackey_so_OBJECTS)
 
 install-exec-hook:
 	$(mkinstalldirs) $(DESTDIR)$(valdir)
 	rm -f $(DESTDIR)$(valdir)/libpthread.so.0
 	$(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0
+
diff --git a/lackey/lk_main.c b/lackey/lk_main.c
new file mode 100644
index 0000000..4592cc6
--- /dev/null
+++ b/lackey/lk_main.c
@@ -0,0 +1,224 @@
+/*--------------------------------------------------------------------*/
+/*--- Simple skin for counting UInstrs, using a C helper.          ---*/
+/*---                                                  vg_lackey.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2002 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_skin.h"
+
+//#define uInstr0   VG_(newUInstr0)
+//#define uLiteral  VG_(setLiteralField)
+
+/* Nb: use ULongs because the numbers can get very big */
+static ULong n_dlrr_calls   = 0;
+static ULong n_BBs          = 0;
+static ULong n_UInstrs      = 0;
+static ULong n_x86_instrs   = 0;
+static ULong n_Jccs         = 0;
+static ULong n_Jccs_untaken = 0;
+
+static void add_one_dlrr_call(void)
+{
+   n_dlrr_calls++;
+}
+
+/* See comment above SK_(instrument) for reason why n_x86_instrs is
+   incremented here. */
+static void add_one_BB(void)
+{
+   n_BBs++;
+   n_x86_instrs++;
+}
+
+static void add_one_UInstr(void)
+{
+   n_UInstrs++;
+}
+
+static void add_one_x86_instr(void)
+{
+   n_x86_instrs++;
+}
+
+static void add_one_Jcc(void)
+{
+   n_Jccs++;
+}
+
+static void add_one_Jcc_untaken(void)
+{
+   n_Jccs_untaken++;
+}
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* not_used)
+{
+   needs->name        = "lackey";
+   needs->description = "a UInstr counter";
+
+   //VG_(register_compact_helper)((Addr) & add_one_dlrr_call);
+   VG_(register_compact_helper)((Addr) & add_one_BB);
+   VG_(register_compact_helper)((Addr) & add_one_x86_instr);
+   VG_(register_compact_helper)((Addr) & add_one_UInstr);
+   VG_(register_compact_helper)((Addr) & add_one_Jcc);
+   VG_(register_compact_helper)((Addr) & add_one_Jcc_untaken);
+}
+
+void SK_(post_clo_init)(void)
+{
+}
+
+/* Note: x86 instructions are marked by an INCEIP at the end of each one,
+   except for the final one in the basic block which ends in an
+   unconditional JMP.  Sometimes the final unconditional JMP is preceded by
+   a conditional JMP (Jcc), and thus it isn't reached.  Eg:
+
+      <code a>
+      INCEIP ...
+
+      <code b>
+      Jcc ...
+      JMP ...     (will not be reached if Jcc succeeds)
+
+   If we simplemindedly added calls to add_one_x86_instr() before INCEIPs
+   and unconditional JMPs, we'd sometimes miss the final call (when a
+   preceding conditional JMP succeeds), underestimating the x86 instruction
+   count.
+
+      <code a>
+      call add_one_x86_instr()
+      INCEIP ...
+
+      <code b>
+      Jcc ...
+      call add_one_x86_instr()
+      JMP ...
+
+   Instead we add a call before each INCEIP, and also one at the start of the
+   block, but not one at the end, viz:
+
+      call add_one_x86_instr()
+
+      <code a>
+      call add_one_x86_instr()
+      INCEIP ...
+
+      <code b>
+      Jcc ...
+      JMP ...
+
+   Which gives us the right answer.  And just to avoid two C calls, we fold
+   the basic-block-beginning call in with add_one_BB().  Phew.
+*/ 
+UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
+{
+   UCodeBlock* cb;
+   Int         i;
+   UInstr*     u;
+   Char        fnname[100];
+
+   cb = VG_(allocCodeBlock)();
+   cb->nextTemp = cb_in->nextTemp;
+
+   /* Count call to dlrr(), if this BB is dlrr()'s entry point */
+   if (VG_(get_fnname_if_entry)(orig_addr, fnname, 100) &&
+       0 == VG_(strcmp)(fnname, "_dl_runtime_resolve")) 
+   {
+      VG_(callHelper_0_0)(cb, (Addr) & add_one_dlrr_call);
+   }
+
+   /* Count basic block */
+   VG_(callHelper_0_0)(cb, (Addr) & add_one_BB);
+
+   for (i = 0; i < cb_in->used; i++) {
+      u = &cb_in->instrs[i];
+
+      switch (u->opcode) {
+         case NOP: case CALLM_S: case CALLM_E:
+            break;
+   
+         case INCEIP:
+            /* Count x86 instr */
+            VG_(callHelper_0_0)(cb, (Addr) & add_one_x86_instr);
+            VG_(copyUInstr)(cb, u);
+            break;
+
+         case JMP:
+            if (u->cond != CondAlways) {
+               /* Count Jcc */
+               VG_(callHelper_0_0)(cb, (Addr) & add_one_Jcc);
+               VG_(copyUInstr)(cb, u);
+               /* Count non-taken Jcc */
+               VG_(callHelper_0_0)(cb, (Addr) & add_one_Jcc_untaken);
+            } else {
+               VG_(copyUInstr)(cb, u);
+            }
+            break;
+            
+         default:
+            /* Count UInstr */
+            VG_(callHelper_0_0)(cb, (Addr) & add_one_UInstr);
+            VG_(copyUInstr)(cb, u);
+            break;
+      }
+   }
+
+   VG_(freeCodeBlock)(cb_in);
+   return cb;
+}
+
+void SK_(fini)(void)
+{
+    VG_(message)(Vg_UserMsg,
+                 "Counted %d calls to _dl_runtime_resolve()", n_dlrr_calls);
+
+    VG_(message)(Vg_UserMsg, "");
+    VG_(message)(Vg_UserMsg, "Executed:");
+    VG_(message)(Vg_UserMsg, "  BBs:         %u", n_BBs);
+    VG_(message)(Vg_UserMsg, "  x86 instrs:  %u", n_x86_instrs);
+    VG_(message)(Vg_UserMsg, "  UInstrs:     %u", n_UInstrs);
+
+    VG_(message)(Vg_UserMsg, "");
+    VG_(message)(Vg_UserMsg, "Jccs:");
+    VG_(message)(Vg_UserMsg, "  total:       %u", n_Jccs);
+    VG_(message)(Vg_UserMsg, "  %% taken:     %u%%",
+                             (n_Jccs - n_Jccs_untaken)*100 / n_Jccs);
+
+    VG_(message)(Vg_UserMsg, "");
+    VG_(message)(Vg_UserMsg, "Ratios:");
+    VG_(message)(Vg_UserMsg, "  x86 instrs : BB        = %3u : 10",
+                             10 * n_x86_instrs / n_BBs);
+    VG_(message)(Vg_UserMsg, "     UInstrs : BB        = %3u : 10",
+                             10 * n_UInstrs / n_BBs);
+    VG_(message)(Vg_UserMsg, "     UInstrs : x86_instr = %3u : 10",
+                             10 * n_UInstrs / n_x86_instrs);
+
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              vg_lackey.c ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/memcheck/Makefile.am b/memcheck/Makefile.am
index 60553dd..96911ed 100644
--- a/memcheck/Makefile.am
+++ b/memcheck/Makefile.am
@@ -1,15 +1,17 @@
+
+
 SUBDIRS = demangle . docs tests
 
 CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \
-		-Winline -Wall -Wshadow -O -fomit-frame-pointer -g
+		-Winline -Wall -Wshadow -O -fomit-frame-pointer @PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
 
-LDFLAGS = -Wl,-z -Wl,initfirst
+#LDFLAGS = -Wl,-z -Wl,initfirst
 
 INCLUDES = -I$(srcdir)/demangle
 
-bin_SCRIPTS = valgrind cachegrind vg_annotate
+bin_SCRIPTS = valgrind vg_annotate
 
 SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
 
@@ -26,60 +28,103 @@
 	PATCHES_APPLIED ACKNOWLEDGEMENTS \
 	README_KDE3_FOLKS README_PACKAGERS \
 	README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \
-	valgrind.spec valgrind.spec.in
+	valgrind.spec valgrind.spec.in \
+	vg_profile.c \
+	vg_cachesim_I1.c vg_cachesim_D1.c vg_cachesim_L2.c vg_cachesim_gen.c
 
-val_PROGRAMS = valgrind.so valgrinq.so libpthread.so
+val_PROGRAMS = \
+	valgrind.so \
+	valgrinq.so \
+	libpthread.so \
+	vgskin_memcheck.so \
+	vgskin_cachesim.so \
+	vgskin_eraser.so \
+	vgskin_addrcheck.so \
+	vgskin_none.so \
+	vgskin_lackey.so \
+	vgskin_corecheck.so
 
-libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c
+libpthread_so_SOURCES = \
+	vg_libpthread.c \
+	vg_libpthread_unimp.c
+libpthread_so_DEPENDENCIES = $(srcdir)/vg_libpthread.vs
+libpthread_so_LDFLAGS	   = -Werror -fno-omit-frame-pointer -UVG_LIBDIR -shared -fpic -Wl,-version-script $(srcdir)/vg_libpthread.vs
 
 valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+valgrinq_so_LDFLAGS = -shared
 
 valgrind_so_SOURCES = \
 	vg_clientfuncs.c \
 	vg_scheduler.c \
-        vg_cachesim.c \
 	vg_clientmalloc.c \
-	vg_clientperms.c \
+	vg_default.c \
 	vg_demangle.c \
 	vg_dispatch.S \
 	vg_errcontext.c \
 	vg_execontext.c \
 	vg_from_ucode.c \
 	vg_helpers.S \
+	vg_instrument.c \
 	vg_main.c \
 	vg_malloc2.c \
 	vg_memory.c \
 	vg_messages.c \
 	vg_mylibc.c \
 	vg_procselfmaps.c \
-	vg_profile.c \
+	vg_dummy_profile.c \
 	vg_signals.c \
 	vg_startup.S \
 	vg_symtab2.c \
-	vg_syscall_mem.c \
+	vg_syscalls.c \
 	vg_syscall.S \
 	vg_to_ucode.c \
 	vg_translate.c \
-	vg_transtab.c \
-	vg_vtagops.c
-
+	vg_transtab.c
+valgrind_so_LDFLAGS = -Wl,-z -Wl,initfirst -shared
 valgrind_so_LDADD = \
 	demangle/cp-demangle.o \
 	demangle/cplus-dem.o \
 	demangle/dyn-string.o \
 	demangle/safe-ctype.o
 
+vgskin_memcheck_so_SOURCES = \
+	vg_memcheck.c \
+	vg_memcheck_clientreqs.c \
+	vg_memcheck_errcontext.c \
+	vg_memcheck_from_ucode.c \
+	vg_memcheck_translate.c \
+	vg_memcheck_helpers.S
+vgskin_memcheck_so_LDFLAGS = -shared
+
+vgskin_cachesim_so_SOURCES = vg_cachesim.c
+vgskin_cachesim_so_LDFLAGS = -shared
+
+vgskin_eraser_so_SOURCES = vg_eraser.c
+vgskin_eraser_so_LDFLAGS = -shared
+
+vgskin_addrcheck_so_SOURCES = vg_addrcheck.c
+vgskin_addrcheck_so_LDFLAGS = -shared
+
+vgskin_none_so_SOURCES 	 = vg_none.c
+vgskin_none_so_LDFLAGS   = -shared
+
+vgskin_lackey_so_SOURCES = vg_lackey.c
+vgskin_lackey_so_LDFLAGS = -shared
+
+vgskin_corecheck_so_SOURCES = vg_corecheck.c
+vgskin_corecheck_so_LDFLAGS = -shared
+
 include_HEADERS = valgrind.h
 
 noinst_HEADERS = \
-        vg_cachesim_gen.c       \
-        vg_cachesim_I1.c        \
-        vg_cachesim_D1.c        \
-        vg_cachesim_L2.c        \
         vg_kerneliface.h        \
         vg_include.h            \
+        vg_skin.h               \
         vg_constants.h          \
-        vg_unsafe.h
+        vg_constants_skin.h     \
+        vg_unsafe.h		\
+	vg_memcheck_include.h	\
+	vg_memcheck.h
 
 MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) 
 
@@ -92,19 +137,40 @@
 vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS)
 	$(COMPILE) -fno-omit-frame-pointer -c $<
 
-valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
-	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
-		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+##valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+##		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
 
-valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
-	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
+##valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
+##	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
 
-libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
-	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
-		$(libpthread_so_OBJECTS) \
-		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+##libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
+##	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
+##		$(libpthread_so_OBJECTS) \
+##		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+
+##vgskin_memcheck.so$(EXEEXT): $(vgskin_memcheck_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_memcheck.so \
+##		$(vgskin_memcheck_so_OBJECTS)
+
+##vgskin_cachesim.so$(EXEEXT): $(vgskin_cachesim_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_cachesim.so \
+##		$(vgskin_cachesim_so_OBJECTS)
+
+##vgskin_eraser.so$(EXEEXT): $(vgskin_eraser_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_eraser.so \
+##		$(vgskin_eraser_so_OBJECTS)
+
+##vgskin_none.so$(EXEEXT): $(vgskin_none_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_none.so \
+##		$(vgskin_none_so_OBJECTS)
+
+##vgskin_lackey.so$(EXEEXT): $(vgskin_lackey_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_lackey.so \
+##		$(vgskin_lackey_so_OBJECTS)
 
 install-exec-hook:
 	$(mkinstalldirs) $(DESTDIR)$(valdir)
 	rm -f $(DESTDIR)$(valdir)/libpthread.so.0
 	$(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0
+
diff --git a/memcheck/docs/manual.html b/memcheck/docs/manual.html
index b715ee3..95fe840 100644
--- a/memcheck/docs/manual.html
+++ b/memcheck/docs/manual.html
@@ -345,7 +345,7 @@
 </pre>
 
 <p>Note that Valgrind also reads options from the environment variable
-<code>$VALGRIND</code>, and processes them before the command-line
+<code>$VALGRIND_OPTS</code>, and processes them before the command-line
 options.
 
 <p>Valgrind's default settings succeed in giving reasonable behaviour
@@ -838,8 +838,8 @@
   <li>The contents of malloc'd blocks, before you write something
       there.  In C++, the new operator is a wrapper round malloc, so
       if you create an object with new, its fields will be
-      uninitialised until you fill them in, which is only Right and
-      Proper.</li>
+      uninitialised until you (or the constructor) fill them in, which
+      is only Right and Proper.</li>
 </ul>
 
 
@@ -1066,16 +1066,16 @@
       <p>
 
   <li>The "immediate location" specification.  For Value and Addr
-      errors, is either the name of the function in which the error
-      occurred, or, failing that, the full path the the .so file
-      containing the error location.  For Param errors, is the name of
-      the offending system call parameter.  For Free errors, is the
-      name of the function doing the freeing (eg, <code>free</code>,
-      <code>__builtin_vec_delete</code>, etc)</li><br>
+      errors, it is either the name of the function in which the error
+      occurred, or, failing that, the full path of the .so file or
+      executable containing the error location.  For Param errors,
+      is the name of the offending system call parameter.  For Free
+      errors, is the name of the function doing the freeing (eg,
+      <code>free</code>, <code>__builtin_vec_delete</code>, etc)</li><br>
       <p>
 
   <li>The caller of the above "immediate location".  Again, either a
-      function or shared-object name.</li><br>
+      function or shared-object/executable name.</li><br>
       <p>
 
   <li>Optionally, one or two extra calling-function or object names,
@@ -1083,8 +1083,8 @@
 </ul>
 
 <p>
-Locations may be either names of shared objects or wildcards matching
-function names.  They begin <code>obj:</code> and <code>fun:</code>
+Locations may be either names of shared objects/executables or wildcards
+matching function names.  They begin <code>obj:</code> and <code>fun:</code>
 respectively.  Function and object names to match against may use the 
 wildcard characters <code>*</code> and <code>?</code>.
 
@@ -1617,11 +1617,11 @@
 
   <li>If the new size is smaller, the dropped-off section is marked as
       unaddressible.  You may only pass to realloc a pointer
-      previously issued to you by malloc/calloc/new/realloc.</li><br>
+      previously issued to you by malloc/calloc/realloc.</li><br>
       <p>
 
   <li>free/delete: you may only pass to free a pointer previously
-      issued to you by malloc/calloc/new/realloc, or the value
+      issued to you by malloc/calloc/realloc, or the value
       NULL. Otherwise, Valgrind complains.  If the pointer is indeed
       valid, Valgrind marks the entire area it points at as
       unaddressible, and places the block in the freed-blocks-queue.
@@ -2058,7 +2058,9 @@
   <li>Run your program with <code>cachegrind</code> in front of the
       normal command line invocation.  When the program finishes,
       Valgrind will print summary cache statistics. It also collects
-      line-by-line information in a file <code>cachegrind.out</code>.
+      line-by-line information in a file
+      <code>cachegrind.out.<i>pid</i></code>, where <code><i>pid</i></code>
+      is the program's process id.
       <p>
       This step should be done every time you want to collect
       information about a new program, a changed program, or about the
@@ -2197,15 +2199,17 @@
 
 As well as printing summary information, Cachegrind also writes
 line-by-line cache profiling information to a file named
-<code>cachegrind.out</code>.  This file is human-readable, but is best
-interpreted by the accompanying program <code>vg_annotate</code>,
+<code>cachegrind.out.<i>pid</i></code>.  This file is human-readable, but is
+best interpreted by the accompanying program <code>vg_annotate</code>,
 described in the next section.
 <p>
-Things to note about the <code>cachegrind.out</code> file:
+Things to note about the <code>cachegrind.out.<i>pid</i></code> file:
 <ul>
   <li>It is written every time <code>valgrind --cachesim=yes</code> or
       <code>cachegrind</code> is run, and will overwrite any existing
-      <code>cachegrind.out</code> in the current directory.</li>
+      <code>cachegrind.out.<i>pid</i></code> in the current directory (but
+      that won't happen very often because it takes some time for process ids
+      to be recycled).</li>
   <p>
   <li>It can be huge: <code>ls -l</code> generates a file of about
       350KB.  Browsing a few files and web pages with a Konqueror
@@ -2213,6 +2217,13 @@
       of around 15 MB.</li>
 </ul>
 
+Note that older versions of Cachegrind used a log file named
+<code>cachegrind.out</code> (i.e. no <code><i>.pid</i></code> suffix).
+The suffix serves two purposes.  Firstly, it means you don't have to rename old
+log files that you don't want to overwrite.  Secondly, and more importantly,
+it allows correct profiling with the <code>--trace-children=yes</code> option
+of programs that spawn child processes.
+
 <a name="profileflags"></a>
 <h3>7.5&nbsp; Cachegrind options</h3>
 Cachegrind accepts all the options that Valgrind does, although some of them
@@ -2245,9 +2256,13 @@
 window to be at least 120-characters wide if possible, as the output
 lines can be quite long.
 <p>
-To get a function-by-function summary, run <code>vg_annotate</code> in
-directory containing a <code>cachegrind.out</code> file.  The output
-looks like this:
+To get a function-by-function summary, run <code>vg_annotate
+--<i>pid</i></code> in a directory containing a
+<code>cachegrind.out.<i>pid</i></code> file.  The <code>--<i>pid</i></code>
+is required so that <code>vg_annotate</code> knows which log file to use when
+several are present.
+<p>
+The output looks like this:
 
 <pre>
 --------------------------------------------------------------------------------
@@ -2468,8 +2483,9 @@
 specific enough.
 
 Beware that vg_annotate can take some time to digest large
-<code>cachegrind.out</code> files, eg. 30 seconds or more.  Also beware that
-auto-annotation can produce a lot of output if your program is large!
+<code>cachegrind.out.<i>pid</i></code> files, e.g. 30 seconds or more.  Also
+beware that auto-annotation can produce a lot of output if your program is
+large!
 
 
 <h3>7.7&nbsp; Annotating assembler programs</h3>
@@ -2492,13 +2508,18 @@
 
 <h3>7.8&nbsp; <code>vg_annotate</code> options</h3>
 <ul>
+  <li><code>--<i>pid</i></code></li><p>
+
+      Indicates which <code>cachegrind.out.<i>pid</i></code> file to read.
+      Not actually an option -- it is required.
+    
   <li><code>-h, --help</code></li><p>
   <li><code>-v, --version</code><p>
 
       Help and version, as usual.</li>
 
   <li><code>--sort=A,B,C</code> [default: order in 
-      <code>cachegrind.out</code>]<p>
+      <code>cachegrind.out.<i>pid</i></code>]<p>
       Specifies the events upon which the sorting of the function-by-function
       entries will be based.  Useful if you want to concentrate on eg. I cache
       misses (<code>--sort=I1mr,I2mr</code>), or D cache misses
@@ -2506,10 +2527,10 @@
       (<code>--sort=D2mr,I2mr</code>).</li><p>
 
   <li><code>--show=A,B,C</code> [default: all, using order in
-      <code>cachegrind.out</code>]<p>
+      <code>cachegrind.out.<i>pid</i></code>]<p>
       Specifies which events to show (and the column order). Default is to use
-      all present in the <code>cachegrind.out</code> file (and use the order in
-      the file).</li><p>
+      all present in the <code>cachegrind.out.<i>pid</i></code> file (and use
+      the order in the file).</li><p>
 
   <li><code>--threshold=X</code> [default: 99%] <p>
       Sets the threshold for the function-by-function summary.  Functions are
@@ -2547,17 +2568,18 @@
 There are a couple of situations in which vg_annotate issues warnings.
 
 <ul>
-  <li>If a source file is more recent than the <code>cachegrind.out</code>
-      file.  This is because the information in <code>cachegrind.out</code> is
-      only recorded with line numbers, so if the line numbers change at all in
-      the source (eg. lines added, deleted, swapped), any annotations will be 
+  <li>If a source file is more recent than the
+      <code>cachegrind.out.<i>pid</i></code> file.  This is because the
+      information in <code>cachegrind.out.<i>pid</i></code> is only recorded
+      with line numbers, so if the line numbers change at all in the source
+      (eg.  lines added, deleted, swapped), any annotations will be
       incorrect.<p>
 
   <li>If information is recorded about line numbers past the end of a file.
       This can be caused by the above problem, ie. shortening the source file
-      while using an old <code>cachegrind.out</code> file.  If this happens,
-      the figures for the bogus lines are printed anyway (clearly marked as
-      bogus) in case they are important.</li><p>
+      while using an old <code>cachegrind.out.<i>pid</i></code> file.  If this
+      happens, the figures for the bogus lines are printed anyway (clearly
+      marked as bogus) in case they are important.</li><p>
 </ul>
 
 
@@ -2677,6 +2699,13 @@
       <blockquote><code>btsl %eax, %edx</code></blockquote>
 
       This should only happen rarely.
+      </li><p>
+
+  <li>FPU instructions with data sizes of 28 and 108 bytes (e.g.
+      <code>fsave</code>) are treated as though they only access 16 bytes.
+      These instructions seem to be rare so hopefully this won't affect
+      accuracy much.
+      </li><p>
 </ul>
 
 Another thing worth nothing is that results are very sensitive.  Changing the
diff --git a/memcheck/mc_clientreqs.c b/memcheck/mc_clientreqs.c
new file mode 100644
index 0000000..b5284bd
--- /dev/null
+++ b/memcheck/mc_clientreqs.c
@@ -0,0 +1,367 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Part of the MemCheck skin: for when the client advises       ---*/
+/*--- Valgrind about memory permissions.                           ---*/
+/*---                                     vg_memcheck_clientreqs.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_memcheck_include.h"
+
+#include "vg_memcheck.h"  /* for VG_USERREQ__* */
+
+
+/*------------------------------------------------------------*/
+/*--- General client block management.                     ---*/
+/*------------------------------------------------------------*/
+
+/* This is managed as an expanding array of client block descriptors.
+   Indices of live descriptors are issued to the client, so it can ask
+   to free them later.  Therefore we cannot slide live entries down
+   over dead ones.  Instead we must use free/inuse flags and scan for
+   an empty slot at allocation time.  This in turn means allocation is
+   relatively expensive, so we hope this does not happen too often. 
+*/
+
+typedef
+   enum { CG_NotInUse, CG_NoAccess, CG_Writable, CG_Readable }
+   CGenBlockKind;
+
+typedef
+   struct {
+      Addr          start;
+      UInt          size;
+      ExeContext*   where;
+      CGenBlockKind kind;
+   } 
+   CGenBlock;
+
+/* This subsystem is self-initialising. */
+static UInt       vg_cgb_size = 0;
+static UInt       vg_cgb_used = 0;
+static CGenBlock* vg_cgbs     = NULL;
+
+/* Stats for this subsystem. */
+static UInt vg_cgb_used_MAX = 0;   /* Max in use. */
+static UInt vg_cgb_allocs   = 0;   /* Number of allocs. */
+static UInt vg_cgb_discards = 0;   /* Number of discards. */
+static UInt vg_cgb_search   = 0;   /* Number of searches. */
+
+
+static
+Int vg_alloc_client_block ( void )
+{
+   Int        i, sz_new;
+   CGenBlock* cgbs_new;
+
+   vg_cgb_allocs++;
+
+   for (i = 0; i < vg_cgb_used; i++) {
+      vg_cgb_search++;
+      if (vg_cgbs[i].kind == CG_NotInUse)
+         return i;
+   }
+
+   /* Not found.  Try to allocate one at the end. */
+   if (vg_cgb_used < vg_cgb_size) {
+      vg_cgb_used++;
+      return vg_cgb_used-1;
+   }
+
+   /* Ok, we have to allocate a new one. */
+   vg_assert(vg_cgb_used == vg_cgb_size);
+   sz_new = (vg_cgbs == NULL) ? 10 : (2 * vg_cgb_size);
+
+   cgbs_new = VG_(malloc)( sz_new * sizeof(CGenBlock) );
+   for (i = 0; i < vg_cgb_used; i++) 
+      cgbs_new[i] = vg_cgbs[i];
+
+   if (vg_cgbs != NULL)
+      VG_(free)( vg_cgbs );
+   vg_cgbs = cgbs_new;
+
+   vg_cgb_size = sz_new;
+   vg_cgb_used++;
+   if (vg_cgb_used > vg_cgb_used_MAX)
+      vg_cgb_used_MAX = vg_cgb_used;
+   return vg_cgb_used-1;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Stack block management.                              ---*/
+/*------------------------------------------------------------*/
+
+/* This is managed as an expanding array of CStackBlocks.  They are
+   packed up against the left-hand end of the array, with no holes.
+   They are kept sorted by the start field, with the [0] having the
+   highest value.  This means it's pretty cheap to put new blocks at
+   the end, corresponding to stack pushes, since the additions put
+   blocks on in what is presumably fairly close to strictly descending
+   order.  If this assumption doesn't hold the performance
+   consequences will be horrible.
+
+   When the client's %ESP jumps back upwards as the result of a RET
+   insn, we shrink the array backwards from the end, in a
+   guaranteed-cheap linear scan.  
+*/
+
+typedef
+   struct {
+      Addr        start;
+      UInt        size;
+      ExeContext* where;
+   } 
+   CStackBlock;
+
+/* This subsystem is self-initialising. */
+static UInt         vg_csb_size = 0;
+static UInt         vg_csb_used = 0;
+static CStackBlock* vg_csbs     = NULL;
+
+/* Stats for this subsystem. */
+static UInt vg_csb_used_MAX = 0;   /* Max in use. */
+static UInt vg_csb_allocs   = 0;   /* Number of allocs. */
+static UInt vg_csb_discards = 0;   /* Number of discards. */
+static UInt vg_csb_swaps    = 0;   /* Number of searches. */
+
+static
+void vg_add_client_stack_block ( ThreadState* tst, Addr aa, UInt sz )
+{
+   UInt i, sz_new;
+   CStackBlock* csbs_new;
+   vg_csb_allocs++;
+
+   /* Ensure there is space for a new block. */
+
+   if (vg_csb_used >= vg_csb_size) {
+
+      /* No; we have to expand the array. */
+      vg_assert(vg_csb_used == vg_csb_size);
+
+      sz_new = (vg_csbs == NULL) ? 10 : (2 * vg_csb_size);
+
+      csbs_new = VG_(malloc)( sz_new * sizeof(CStackBlock) );
+      for (i = 0; i < vg_csb_used; i++) 
+        csbs_new[i] = vg_csbs[i];
+
+      if (vg_csbs != NULL)
+         VG_(free)( vg_csbs );
+      vg_csbs = csbs_new;
+
+      vg_csb_size = sz_new;
+   }
+
+   /* Ok, we can use [vg_csb_used]. */
+   vg_csbs[vg_csb_used].start = aa;
+   vg_csbs[vg_csb_used].size  = sz;
+   /* Actually running a thread at this point. */
+   vg_csbs[vg_csb_used].where = VG_(get_ExeContext) ( tst );
+   vg_csb_used++;
+
+   if (vg_csb_used > vg_csb_used_MAX)
+      vg_csb_used_MAX = vg_csb_used;
+
+   vg_assert(vg_csb_used <= vg_csb_size);
+
+   /* VG_(printf)("acsb  %p %d\n", aa, sz); */
+   SK_(make_noaccess) ( aa, sz );
+
+   /* And make sure that they are in descending order of address. */
+   i = vg_csb_used;
+   while (i > 0 && vg_csbs[i-1].start < vg_csbs[i].start) {
+      CStackBlock tmp = vg_csbs[i-1];
+      vg_csbs[i-1] = vg_csbs[i];
+      vg_csbs[i] = tmp;
+      vg_csb_swaps++;
+   }
+
+#  if 1
+   for (i = 1; i < vg_csb_used; i++)
+      vg_assert(vg_csbs[i-1].start >= vg_csbs[i].start);
+#  endif
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Externally visible functions.                        ---*/
+/*------------------------------------------------------------*/
+
+void SK_(show_client_block_stats) ( void )
+{
+   VG_(message)(Vg_DebugMsg, 
+      "general CBs: %d allocs, %d discards, %d maxinuse, %d search",
+      vg_cgb_allocs, vg_cgb_discards, vg_cgb_used_MAX, vg_cgb_search 
+   );
+   VG_(message)(Vg_DebugMsg, 
+      "  stack CBs: %d allocs, %d discards, %d maxinuse, %d swap",
+      vg_csb_allocs, vg_csb_discards, vg_csb_used_MAX, vg_csb_swaps
+   );
+}
+
+Bool SK_(client_perm_maybe_describe)( Addr a, AddrInfo* ai )
+{
+   Int i;
+   /* VG_(printf)("try to identify %d\n", a); */
+
+   /* First see if it's a stack block.  We do two passes, one exact
+      and one with a bit of slop, so as to try and get the most
+      accurate fix. */
+   for (i = 0; i < vg_csb_used; i++) {
+      if (vg_csbs[i].start <= a
+          && a < vg_csbs[i].start + vg_csbs[i].size) {
+         ai->akind = UserS;
+         ai->blksize = vg_csbs[i].size;
+         ai->rwoffset  = (Int)(a) - (Int)(vg_csbs[i].start);
+         ai->lastchange = vg_csbs[i].where;
+         return True;
+      }
+   }
+
+   /* No exact match on the stack.  Re-do the stack scan with a bit of
+      slop. */
+   for (i = 0; i < vg_csb_used; i++) {
+      if (vg_csbs[i].start - 8 <= a
+          && a < vg_csbs[i].start + vg_csbs[i].size + 8) {
+         ai->akind = UserS;
+         ai->blksize = vg_csbs[i].size;
+         ai->rwoffset  = (Int)(a) - (Int)(vg_csbs[i].start);
+         ai->lastchange = vg_csbs[i].where;
+         return True;
+      }
+   }
+
+   /* No match on the stack.  Perhaps it's a general block ? */
+   for (i = 0; i < vg_cgb_used; i++) {
+      if (vg_cgbs[i].kind == CG_NotInUse) 
+         continue;
+      if (VG_(addr_is_in_block)(a, vg_cgbs[i].start, vg_cgbs[i].size)) {
+         ai->akind = UserG;
+         ai->blksize = vg_cgbs[i].size;
+         ai->rwoffset  = (Int)(a) - (Int)(vg_cgbs[i].start);
+         ai->lastchange = vg_cgbs[i].where;
+         return True;
+      }
+   }
+   return False;
+}
+
+
+void SK_(delete_client_stack_blocks_following_ESP_change) ( void )
+{
+   Addr newESP = VG_(get_stack_pointer)();
+
+   while (vg_csb_used > 0 
+          && vg_csbs[vg_csb_used-1].start + vg_csbs[vg_csb_used-1].size 
+             <= newESP) {
+      vg_csb_used--;
+      vg_csb_discards++;
+      if (VG_(clo_verbosity) > 2)
+         VG_(printf)("discarding stack block %p for %d\n", 
+            (void*)vg_csbs[vg_csb_used].start, 
+            vg_csbs[vg_csb_used].size);
+   }
+}
+
+
+UInt SK_(handle_client_request) ( ThreadState* tst, UInt* arg_block )
+{
+   Int   i;
+   Bool  ok;
+   Addr  bad_addr;
+   UInt* arg = arg_block;
+
+   switch (arg[0]) {
+      case VG_USERREQ__CHECK_WRITABLE: /* check writable */
+         ok = SK_(check_writable) ( arg[1], arg[2], &bad_addr );
+         if (!ok)
+            SK_(record_user_error) ( tst, bad_addr, True );
+         return ok ? (UInt)NULL : bad_addr;
+
+      case VG_USERREQ__CHECK_READABLE: /* check readable */
+         ok = SK_(check_readable) ( arg[1], arg[2], &bad_addr );
+         if (!ok)
+            SK_(record_user_error) ( tst, bad_addr, False );
+         return ok ? (UInt)NULL : bad_addr;
+
+      case VG_USERREQ__DO_LEAK_CHECK:
+         SK_(detect_memory_leaks)();
+         return 0; /* return value is meaningless */
+
+      case VG_USERREQ__MAKE_NOACCESS: /* make no access */
+         i = vg_alloc_client_block();
+         /* VG_(printf)("allocated %d %p\n", i, vg_cgbs); */
+         vg_cgbs[i].kind  = CG_NoAccess;
+         vg_cgbs[i].start = arg[1];
+         vg_cgbs[i].size  = arg[2];
+         vg_cgbs[i].where = VG_(get_ExeContext) ( tst );
+         SK_(make_noaccess) ( arg[1], arg[2] );
+         return i;
+
+      case VG_USERREQ__MAKE_WRITABLE: /* make writable */
+         i = vg_alloc_client_block();
+         vg_cgbs[i].kind  = CG_Writable;
+         vg_cgbs[i].start = arg[1];
+         vg_cgbs[i].size  = arg[2];
+         vg_cgbs[i].where = VG_(get_ExeContext) ( tst );
+         SK_(make_writable) ( arg[1], arg[2] );
+         return i;
+
+      case VG_USERREQ__MAKE_READABLE: /* make readable */
+         i = vg_alloc_client_block();
+         vg_cgbs[i].kind  = CG_Readable;
+         vg_cgbs[i].start = arg[1];
+         vg_cgbs[i].size  = arg[2];
+         vg_cgbs[i].where = VG_(get_ExeContext) ( tst );
+         SK_(make_readable) ( arg[1], arg[2] );
+         return i;
+         
+      case VG_USERREQ__DISCARD: /* discard */
+         if (vg_cgbs == NULL 
+             || arg[2] >= vg_cgb_used || vg_cgbs[arg[2]].kind == CG_NotInUse)
+            return 1;
+         vg_assert(arg[2] >= 0 && arg[2] < vg_cgb_used);
+         vg_cgbs[arg[2]].kind = CG_NotInUse;
+         vg_cgb_discards++;
+         return 0;
+
+      case VG_USERREQ__MAKE_NOACCESS_STACK: /* make noaccess stack block */
+         vg_add_client_stack_block ( tst, arg[1], arg[2] );
+         return 0;
+
+      default:
+         VG_(message)(Vg_UserMsg, 
+                      "Warning: unknown memcheck client request code %d",
+                      arg[0]);
+         return 1;
+   }
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                 vg_memcheck_clientreqs.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/memcheck/mc_errcontext.c b/memcheck/mc_errcontext.c
new file mode 100644
index 0000000..81f420c
--- /dev/null
+++ b/memcheck/mc_errcontext.c
@@ -0,0 +1,610 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Part of the MemCheck skin: management of memory error        ---*/
+/*--- messages.                                                    ---*/
+/*---                                     vg_memcheck_errcontext.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_memcheck_include.h"
+
+/*------------------------------------------------------------*/
+/*--- Defns                                                ---*/
+/*------------------------------------------------------------*/
+
+/* These many bytes below %ESP are considered addressible if we're
+   doing the --workaround-gcc296-bugs hack. */
+#define VG_GCC296_BUG_STACK_SLOP 1024
+
+
+typedef 
+   enum { 
+      /* Bad syscall params */
+      ParamSupp,
+      /* Memory errors in core (pthread ops, signal handling) */
+      CoreMemSupp,
+      /* Use of invalid values of given size */
+      Value0Supp, Value1Supp, Value2Supp, Value4Supp, Value8Supp, 
+      /* Invalid read/write attempt at given size */
+      Addr1Supp, Addr2Supp, Addr4Supp, Addr8Supp,
+      /* Invalid or mismatching free */
+      FreeSupp
+   } 
+   MemCheckSuppKind;
+
+/* What kind of error it is. */
+typedef 
+   enum { ValueErr,
+          CoreMemErr,
+          AddrErr, 
+          ParamErr, UserErr,  /* behaves like an anonymous ParamErr */
+          FreeErr, FreeMismatchErr
+   }
+   MemCheckErrorKind;
+
+/* What kind of memory access is involved in the error? */
+typedef
+   enum { ReadAxs, WriteAxs, ExecAxs }
+   AxsKind;
+
+/* Extra context for memory errors */
+typedef
+   struct {
+      /* AddrErr */
+      AxsKind axskind;
+      /* AddrErr, ValueErr */
+      Int size;
+      /* AddrErr, FreeErr, FreeMismatchErr, ParamErr, UserErr */
+      AddrInfo addrinfo;
+      /* ParamErr, UserErr, CoreMemErr */
+      Bool isWrite;
+   }
+   MemCheckError;
+
+/*------------------------------------------------------------*/
+/*--- Comparing and printing errors                        ---*/
+/*------------------------------------------------------------*/
+
+static __inline__
+void clear_AddrInfo ( AddrInfo* ai )
+{
+   ai->akind      = Unknown;
+   ai->blksize    = 0;
+   ai->rwoffset   = 0;
+   ai->lastchange = NULL;
+   ai->stack_tid  = VG_INVALID_THREADID;
+   ai->maybe_gcc  = False;
+}
+
+static __inline__
+void clear_MemCheckError ( MemCheckError* err_extra )
+{
+   err_extra->axskind   = ReadAxs;
+   err_extra->size      = 0;
+   clear_AddrInfo ( &err_extra->addrinfo );
+   err_extra->isWrite   = False;
+}
+
+__attribute__ ((unused))
+static Bool eq_AddrInfo ( VgRes res, AddrInfo* ai1, AddrInfo* ai2 )
+{
+   if (ai1->akind != Undescribed 
+       && ai2->akind != Undescribed
+       && ai1->akind != ai2->akind) 
+      return False;
+   if (ai1->akind == Freed || ai1->akind == Mallocd) {
+      if (ai1->blksize != ai2->blksize)
+         return False;
+      if (!VG_(eq_ExeContext)(res, ai1->lastchange, ai2->lastchange))
+         return False;
+   }
+   return True;
+}
+
+/* Compare error contexts, to detect duplicates.  Note that if they
+   are otherwise the same, the faulting addrs and associated rwoffsets
+   are allowed to be different.  */
+
+Bool SK_(eq_SkinError) ( VgRes res,
+                          SkinError* e1, SkinError* e2 )
+{
+   MemCheckError* e1_extra = e1->extra;
+   MemCheckError* e2_extra = e2->extra;
+   
+   switch (e1->ekind) {
+      case CoreMemErr:
+         if (e1_extra->isWrite != e2_extra->isWrite)   return False;
+         if (e2->ekind != CoreMemErr)                  return False; 
+         if (e1->string == e2->string)                 return True;
+         if (0 == VG_(strcmp)(e1->string, e2->string)) return True;
+         return False;
+
+      case UserErr:
+      case ParamErr:
+         if (e1_extra->isWrite != e2_extra->isWrite)
+            return False;
+         if (e1->ekind == ParamErr 
+             && 0 != VG_(strcmp)(e1->string, e2->string))
+            return False;
+         return True;
+
+      case FreeErr:
+      case FreeMismatchErr:
+         /* JRS 2002-Aug-26: comparing addrs seems overkill and can
+            cause excessive duplication of errors.  Not even AddrErr
+            below does that.  So don't compare either the .addr field
+            or the .addrinfo fields. */
+         /* if (e1->addr != e2->addr) return False; */
+         /* if (!eq_AddrInfo(res, &e1_extra->addrinfo, &e2_extra->addrinfo)) 
+               return False;
+         */
+         return True;
+
+      case AddrErr:
+         /* if (e1_extra->axskind != e2_extra->axskind) return False; */
+         if (e1_extra->size != e2_extra->size) return False;
+         /*
+         if (!eq_AddrInfo(res, &e1_extra->addrinfo, &e2_extra->addrinfo)) 
+            return False;
+         */
+         return True;
+
+      case ValueErr:
+         if (e1_extra->size != e2_extra->size) return False;
+         return True;
+
+      default: 
+         VG_(printf)("Error:\n  unknown MemCheck error code %d\n", e1->ekind);
+         VG_(panic)("unknown error code in SK_(eq_SkinError)");
+   }
+}
+
+static void pp_AddrInfo ( Addr a, AddrInfo* ai )
+{
+   switch (ai->akind) {
+      case Stack: 
+         VG_(message)(Vg_UserMsg, 
+                      "   Address 0x%x is on thread %d's stack", 
+                      a, ai->stack_tid);
+         break;
+      case Unknown:
+         if (ai->maybe_gcc) {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is just below %%esp.  Possibly a bug in GCC/G++",
+               a);
+            VG_(message)(Vg_UserMsg, 
+               "   v 2.96 or 3.0.X.  To suppress, use: --workaround-gcc296-bugs=yes");
+	 } else {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is not stack'd, malloc'd or free'd", a);
+         }
+         break;
+      case Freed: case Mallocd: case UserG: case UserS: {
+         UInt delta;
+         UChar* relative;
+         if (ai->rwoffset < 0) {
+            delta    = (UInt)(- ai->rwoffset);
+            relative = "before";
+         } else if (ai->rwoffset >= ai->blksize) {
+            delta    = ai->rwoffset - ai->blksize;
+            relative = "after";
+         } else {
+            delta    = ai->rwoffset;
+            relative = "inside";
+         }
+         if (ai->akind == UserS) {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is %d bytes %s a %d-byte stack red-zone created",
+               a, delta, relative, 
+               ai->blksize );
+	 } else {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is %d bytes %s a block of size %d %s",
+               a, delta, relative, 
+               ai->blksize,
+               ai->akind==Mallocd ? "alloc'd" 
+                  : ai->akind==Freed ? "free'd" 
+                                     : "client-defined");
+         }
+         VG_(pp_ExeContext)(ai->lastchange);
+         break;
+      }
+      default:
+         VG_(panic)("pp_AddrInfo");
+   }
+}
+
+void SK_(pp_SkinError) ( SkinError* err, void (*pp_ExeContext)(void) )
+{
+   MemCheckError* err_extra = err->extra;
+
+   switch (err->ekind) {
+      case CoreMemErr:
+         if (err_extra->isWrite) {
+            VG_(message)(Vg_UserMsg, 
+               "%s contains unaddressable byte(s)", err->string );
+         } else {
+            VG_(message)(Vg_UserMsg, 
+                "%s contains uninitialised or unaddressable byte(s)",
+                err->string);
+         }
+         pp_ExeContext();
+         break;
+      
+      case ValueErr:
+         if (err_extra->size == 0) {
+             VG_(message)(
+                Vg_UserMsg,
+                "Conditional jump or move depends on uninitialised value(s)");
+         } else {
+             VG_(message)(Vg_UserMsg,
+                          "Use of uninitialised value of size %d",
+                          err_extra->size);
+         }
+         pp_ExeContext();
+         break;
+
+      case AddrErr:
+         switch (err_extra->axskind) {
+            case ReadAxs:
+               VG_(message)(Vg_UserMsg, "Invalid read of size %d", 
+                                        err_extra->size ); 
+               break;
+            case WriteAxs:
+               VG_(message)(Vg_UserMsg, "Invalid write of size %d", 
+                                        err_extra->size ); 
+               break;
+            case ExecAxs:
+               VG_(message)(Vg_UserMsg, "Jump to the invalid address "
+                                        "stated on the next line");
+               break;
+            default: 
+               VG_(panic)("pp_SkinError(axskind)");
+         }
+         pp_ExeContext();
+         pp_AddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      case FreeErr:
+         VG_(message)(Vg_UserMsg,"Invalid free() / delete / delete[]");
+         /* fall through */
+      case FreeMismatchErr:
+         if (err->ekind == FreeMismatchErr)
+            VG_(message)(Vg_UserMsg, 
+                         "Mismatched free() / delete / delete []");
+         pp_ExeContext();
+         pp_AddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      case ParamErr:
+         if (err_extra->isWrite) {
+            VG_(message)(Vg_UserMsg, 
+               "Syscall param %s contains unaddressable byte(s)",
+                err->string );
+         } else {
+            VG_(message)(Vg_UserMsg, 
+                "Syscall param %s contains uninitialised or "
+                "unaddressable byte(s)",
+            err->string);
+         }
+         pp_ExeContext();
+         pp_AddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      case UserErr:
+         if (err_extra->isWrite) {
+            VG_(message)(Vg_UserMsg, 
+               "Unaddressable byte(s) found during client check request");
+         } else {
+            VG_(message)(Vg_UserMsg, 
+               "Uninitialised or "
+               "unaddressable byte(s) found during client check request");
+         }
+         pp_ExeContext();
+         pp_AddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      default: 
+         VG_(printf)("Error:\n  unknown MemCheck error code %d\n", err->ekind);
+         VG_(panic)("unknown error code in SK_(pp_SkinError)");
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Recording errors                                     ---*/
+/*------------------------------------------------------------*/
+
+/* Describe an address as best you can, for error messages,
+   putting the result in ai. */
+
+static void describe_addr ( Addr a, AddrInfo* ai )
+{
+   ShadowChunk* sc;
+   Bool         ok;
+   ThreadId     tid;
+
+   /* Nested functions, yeah.  Need the lexical scoping of 'a'. */ 
+
+   /* Closure for searching thread stacks */
+   Bool addr_is_in_bounds(Addr stack_min, Addr stack_max)
+   {
+      return (stack_min <= a && a <= stack_max);
+   }
+   /* Closure for searching malloc'd and free'd lists */
+   Bool addr_is_in_block(ShadowChunk *sh_ch)
+   {
+      return VG_(addr_is_in_block) ( a, sh_ch->data, sh_ch->size );
+   }
+
+   /* Perhaps it's a user-def'd block ? */
+   ok = SK_(client_perm_maybe_describe)( a, ai );
+   if (ok)
+      return;
+   /* Perhaps it's on a thread's stack? */
+   tid = VG_(any_matching_thread_stack)(addr_is_in_bounds);
+   if (tid != VG_INVALID_THREADID) {
+      ai->akind     = Stack;
+      ai->stack_tid = tid;
+      return;
+   }
+   /* Search for a recently freed block which might bracket it. */
+   sc = SK_(any_matching_freed_ShadowChunks)(addr_is_in_block);
+   if (NULL != sc) {
+      ai->akind      = Freed;
+      ai->blksize    = sc->size;
+      ai->rwoffset   = (Int)(a) - (Int)(sc->data);
+      ai->lastchange = (ExeContext*)sc->skin_extra[0];
+      return;
+   }
+   /* Search for a currently malloc'd block which might bracket it. */
+   sc = VG_(any_matching_mallocd_ShadowChunks)(addr_is_in_block);
+   if (NULL != sc) {
+      ai->akind      = Mallocd;
+      ai->blksize    = sc->size;
+      ai->rwoffset   = (Int)(a) - (Int)(sc->data);
+      ai->lastchange = (ExeContext*)sc->skin_extra[0];
+      return;
+   } 
+   /* Clueless ... */
+   ai->akind = Unknown;
+   return;
+}
+
+
+/* Creates a copy of the err_extra, updates the copy with address info if
+   necessary, sticks the copy into the SkinError. */
+void SK_(dup_extra_and_update)(SkinError* err)
+{
+   MemCheckError* err_extra;
+
+   err_extra  = VG_(malloc)(sizeof(MemCheckError));
+   *err_extra = *((MemCheckError*)err->extra);
+
+   if (err_extra->addrinfo.akind == Undescribed)
+      describe_addr ( err->addr, &(err_extra->addrinfo) );
+
+   err->extra = err_extra;
+}
+
+/* These two are called from generated code. */
+void SK_(record_value_error) ( Int size )
+{
+   MemCheckError err_extra;
+
+   clear_MemCheckError( &err_extra );
+   err_extra.size = size;
+   VG_(maybe_record_error)( NULL, ValueErr, /*addr*/0, /*s*/NULL, &err_extra );
+}
+
+/* Is this address within some small distance below %ESP?  Used only
+   for the --workaround-gcc296-bugs kludge. */
+Bool VG_(is_just_below_ESP)( Addr esp, Addr aa )
+{
+   if ((UInt)esp > (UInt)aa
+       && ((UInt)esp - (UInt)aa) <= VG_GCC296_BUG_STACK_SLOP)
+      return True;
+   else
+      return False;
+}
+
+void SK_(record_address_error) ( Addr a, Int size, Bool isWrite )
+{
+   MemCheckError err_extra;
+   Bool          just_below_esp;
+
+   just_below_esp 
+      = VG_(is_just_below_ESP)( VG_(get_stack_pointer)(), a );
+
+   /* If this is caused by an access immediately below %ESP, and the
+      user asks nicely, we just ignore it. */
+   if (SK_(clo_workaround_gcc296_bugs) && just_below_esp)
+      return;
+
+   clear_MemCheckError( &err_extra );
+   err_extra.axskind = isWrite ? WriteAxs : ReadAxs;
+   err_extra.size    = size;
+   err_extra.addrinfo.akind     = Undescribed;
+   err_extra.addrinfo.maybe_gcc = just_below_esp;
+   VG_(maybe_record_error)( NULL, AddrErr, a, /*s*/NULL, &err_extra );
+}
+
+/* These ones are called from non-generated code */
+
+/* This is for memory errors in pthread functions, as opposed to pthread API
+   errors which are found by the core. */
+void SK_(record_core_mem_error) ( ThreadState* tst, Bool isWrite, Char* msg )
+{
+   MemCheckError err_extra;
+
+   clear_MemCheckError( &err_extra );
+   err_extra.isWrite = isWrite;
+   VG_(maybe_record_error)( tst, CoreMemErr, /*addr*/0, msg, &err_extra );
+}
+
+void SK_(record_param_error) ( ThreadState* tst, Addr a, Bool isWrite, 
+                               Char* msg )
+{
+   MemCheckError err_extra;
+
+   vg_assert(NULL != tst);
+   clear_MemCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   err_extra.isWrite = isWrite;
+   VG_(maybe_record_error)( tst, ParamErr, a, msg, &err_extra );
+}
+
+void SK_(record_jump_error) ( ThreadState* tst, Addr a )
+{
+   MemCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_MemCheckError( &err_extra );
+   err_extra.axskind = ExecAxs;
+   err_extra.addrinfo.akind = Undescribed;
+   VG_(maybe_record_error)( tst, AddrErr, a, /*s*/NULL, &err_extra );
+}
+
+void SK_(record_free_error) ( ThreadState* tst, Addr a ) 
+{
+   MemCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_MemCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   VG_(maybe_record_error)( tst, FreeErr, a, /*s*/NULL, &err_extra );
+}
+
+void SK_(record_freemismatch_error) ( ThreadState* tst, Addr a )
+{
+   MemCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_MemCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   VG_(maybe_record_error)( tst, FreeMismatchErr, a, /*s*/NULL, &err_extra );
+}
+
+void SK_(record_user_error) ( ThreadState* tst, Addr a, Bool isWrite )
+{
+   MemCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_MemCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   err_extra.isWrite        = isWrite;
+   VG_(maybe_record_error)( tst, UserErr, a, /*s*/NULL, &err_extra );
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Suppressions                                         ---*/
+/*------------------------------------------------------------*/
+
+#define STREQ(s1,s2) (s1 != NULL && s2 != NULL \
+                      && VG_(strcmp)((s1),(s2))==0)
+
+Bool SK_(recognised_suppression) ( Char* name, SuppKind *skind )
+{
+   if      (STREQ(name, "Param"))   *skind = ParamSupp;
+   else if (STREQ(name, "CoreMem")) *skind = CoreMemSupp;
+   else if (STREQ(name, "Value0"))  *skind = Value0Supp; /* backwards compat */ 
+   else if (STREQ(name, "Cond"))    *skind = Value0Supp;
+   else if (STREQ(name, "Value1"))  *skind = Value1Supp;
+   else if (STREQ(name, "Value2"))  *skind = Value2Supp;
+   else if (STREQ(name, "Value4"))  *skind = Value4Supp;
+   else if (STREQ(name, "Value8"))  *skind = Value8Supp;
+   else if (STREQ(name, "Addr1"))   *skind = Addr1Supp;
+   else if (STREQ(name, "Addr2"))   *skind = Addr2Supp;
+   else if (STREQ(name, "Addr4"))   *skind = Addr4Supp;
+   else if (STREQ(name, "Addr8"))   *skind = Addr8Supp;
+   else if (STREQ(name, "Free"))    *skind = FreeSupp;
+   else 
+      return False;
+
+   return True;
+}
+
+Bool SK_(read_extra_suppression_info) ( Int fd, Char* buf, Int nBuf, 
+                                         SkinSupp *s )
+{
+   Bool eof;
+
+   if (s->skind == ParamSupp) {
+      eof = VG_(getLine) ( fd, buf, nBuf );
+      if (eof) return False;
+      s->string = VG_(strdup)(buf);
+   }
+   return True;
+}
+
+extern Bool SK_(error_matches_suppression)(SkinError* err, SkinSupp* su)
+{
+   UInt su_size;
+   MemCheckError* err_extra = err->extra;
+
+   switch (su->skind) {
+      case ParamSupp:
+         return (err->ekind == ParamErr && STREQ(su->string, err->string));
+
+      case CoreMemSupp:
+         return (err->ekind == CoreMemErr && STREQ(su->string, err->string));
+
+      case Value0Supp: su_size = 0; goto value_case;
+      case Value1Supp: su_size = 1; goto value_case;
+      case Value2Supp: su_size = 2; goto value_case;
+      case Value4Supp: su_size = 4; goto value_case;
+      case Value8Supp: su_size = 8; goto value_case;
+      value_case:
+         return (err->ekind == ValueErr && err_extra->size == su_size);
+
+      case Addr1Supp: su_size = 1; goto addr_case;
+      case Addr2Supp: su_size = 2; goto addr_case;
+      case Addr4Supp: su_size = 4; goto addr_case;
+      case Addr8Supp: su_size = 8; goto addr_case;
+      addr_case:
+         return (err->ekind == AddrErr && err_extra->size != su_size);
+
+      case FreeSupp:
+         return (err->ekind == FreeErr || err->ekind == FreeMismatchErr);
+
+      default:
+         VG_(printf)("Error:\n"
+                     "  unknown MemCheck suppression type %d\n", su->skind);
+         VG_(panic)("unknown suppression type in "
+                    "SK_(error_matches_suppression)");
+   }
+}
+
+#  undef STREQ
+
+/*--------------------------------------------------------------------*/
+/*--- end                                 vg_memcheck_errcontext.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/memcheck/mc_from_ucode.c b/memcheck/mc_from_ucode.c
new file mode 100644
index 0000000..82550b7
--- /dev/null
+++ b/memcheck/mc_from_ucode.c
@@ -0,0 +1,642 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Part of the MemCheck skin: Generate code for skin-specific   ---*/
+/*--- UInstrs.                                                     ---*/
+/*---                                     vg_memcheck_from_ucode.c ---*/
+/*--------------------------------------------------------------------*/
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_memcheck_include.h"
+
+/*------------------------------------------------------------*/
+/*--- Renamings of frequently-used global functions.       ---*/
+/*------------------------------------------------------------*/
+
+#define nameIReg  VG_(nameOfIntReg)
+#define nameISize VG_(nameOfIntSize)
+
+#define dis       VG_(print_codegen)
+
+/*------------------------------------------------------------*/
+/*--- Instruction emission -- turning final uinstrs back   ---*/
+/*--- into x86 code.                                       ---*/
+/*------------------------------------------------------------*/
+
+/* See the corresponding comment at the top of vg_from_ucode.c to find out
+ * how all this works */
+
+/*----------------------------------------------------*/
+/*--- v-size (4, or 2 with OSO) insn emitters      ---*/
+/*----------------------------------------------------*/
+
+static void emit_testv_lit_reg ( Int sz, UInt lit, Int reg )
+{
+   VG_(newEmit)();
+   if (sz == 2) {
+      VG_(emitB) ( 0x66 );
+   } else {
+      vg_assert(sz == 4);
+   }
+   VG_(emitB) ( 0xF7 ); /* Grp3 Ev */
+   VG_(emit_amode_ereg_greg) ( reg, 0 /* Grp3 subopcode for TEST */ );
+   if (sz == 2) VG_(emitW) ( lit ); else VG_(emitL) ( lit );
+   if (dis)
+      VG_(printf)("\n\t\ttest%c $0x%x, %s\n", nameISize(sz), 
+                                            lit, nameIReg(sz,reg));
+}
+
+static void emit_testv_lit_offregmem ( Int sz, UInt lit, Int off, Int reg )
+{
+   VG_(newEmit)();
+   if (sz == 2) {
+      VG_(emitB) ( 0x66 );
+   } else {
+      vg_assert(sz == 4);
+   }
+   VG_(emitB) ( 0xF7 ); /* Grp3 Ev */
+   VG_(emit_amode_offregmem_reg) ( off, reg, 0 /* Grp3 subopcode for TEST */ );
+   if (sz == 2) VG_(emitW) ( lit ); else VG_(emitL) ( lit );
+   if (dis)
+      VG_(printf)("\n\t\ttest%c $%d, 0x%x(%s)\n", 
+                  nameISize(sz), lit, off, nameIReg(4,reg) );
+}
+
+/*----------------------------------------------------*/
+/*--- Instruction synthesisers                     ---*/
+/*----------------------------------------------------*/
+
+/* Synthesise a minimal test (and which discards result) of reg32
+   against lit.  It's always safe do simply
+      emit_testv_lit_reg ( 4, lit, reg32 )
+   but we try to do better when possible.
+*/
+static void synth_minimal_test_lit_reg ( UInt lit, Int reg32 )
+{
+   if ((lit & 0xFFFFFF00) == 0 && reg32 < 4) {
+      /* We can get away with a byte insn. */
+      VG_(emit_testb_lit_reg) ( lit, reg32 );
+   }
+   else 
+   if ((lit & 0xFFFF0000) == 0) {
+      /* Literal fits in 16 bits; do a word insn. */
+      emit_testv_lit_reg ( 2, lit, reg32 );
+   }
+   else {
+      /* Totally general ... */
+      emit_testv_lit_reg ( 4, lit, reg32 );
+   }
+}
+
+/*----------------------------------------------------*/
+/*--- Top level of the uinstr -> x86 translation.  ---*/
+/*----------------------------------------------------*/
+
+static void synth_LOADV ( Int sz, Int a_reg, Int tv_reg,
+                          RRegSet regs_live_before,
+                          RRegSet regs_live_after )
+{
+   Addr helper;
+   UInt argv[] = { a_reg };
+   UInt tagv[] = { RealReg };
+
+   switch (sz) {
+      case 4: helper = (Addr) & SK_(helperc_LOADV4); break;
+      case 2: helper = (Addr) & SK_(helperc_LOADV2); break;
+      case 1: helper = (Addr) & SK_(helperc_LOADV1); break;
+      default: VG_(panic)("synth_LOADV");
+   }
+   VG_(synth_ccall) ( helper, 1, 1, argv, tagv, tv_reg,
+                      regs_live_before, regs_live_after );
+}
+
+
+static void synth_STOREV ( Int sz, Int tv_tag, Int tv_val, Int a_reg,
+                           RRegSet regs_live_before,
+                           RRegSet regs_live_after )
+{
+   Addr helper;
+   UInt argv[] = { a_reg,   tv_val };
+   Tag  tagv[] = { RealReg, tv_tag };
+
+   vg_assert(tv_tag == RealReg || tv_tag == Literal);
+   switch (sz) {
+      case 4: helper = (Addr) SK_(helperc_STOREV4); break;
+      case 2: helper = (Addr) SK_(helperc_STOREV2); break;
+      case 1: helper = (Addr) SK_(helperc_STOREV1); break;
+      default: VG_(panic)("synth_STOREV");
+   }
+   VG_(synth_ccall) ( helper, 2, 2, argv, tagv, INVALID_REALREG,
+                      regs_live_before, regs_live_after );
+}
+
+
+static void synth_SETV ( Int sz, Int reg )
+{
+   UInt val;
+   switch (sz) {
+      case 4: val = 0x00000000; break;
+      case 2: val = 0xFFFF0000; break;
+      case 1: val = 0xFFFFFF00; break;
+      case 0: val = 0xFFFFFFFE; break;
+      default: VG_(panic)("synth_SETV");
+   }
+   VG_(emit_movv_lit_reg) ( 4, val, reg );
+}
+
+
+static void synth_TESTV ( Int sz, Int tag, Int val )
+{
+   vg_assert(tag == ArchReg || tag == RealReg);
+   if (tag == ArchReg) {
+      switch (sz) {
+         case 4: 
+            emit_testv_lit_offregmem ( 
+               4, 0xFFFFFFFF, VG_(shadowRegOffset)(val), R_EBP );
+            break;
+         case 2: 
+            emit_testv_lit_offregmem ( 
+               4, 0x0000FFFF, VG_(shadowRegOffset)(val), R_EBP );
+            break;
+         case 1:
+            if (val < 4) {
+               emit_testv_lit_offregmem ( 
+                  4, 0x000000FF, VG_(shadowRegOffset)(val), R_EBP );
+            } else {
+               emit_testv_lit_offregmem ( 
+                  4, 0x0000FF00, VG_(shadowRegOffset)(val-4), R_EBP );
+            }
+            break;
+         case 0: 
+            /* should never happen */
+         default: 
+            VG_(panic)("synth_TESTV(ArchReg)");
+      }
+   } else {
+      switch (sz) {
+         case 4:
+            /* Works, but holds the entire 32-bit literal, hence
+               generating a 6-byte insn.  We want to know if any bits
+               in the reg are set, but since this is for the full reg,
+               we might as well compare it against zero, which can be
+               done with a shorter insn. */
+            /* synth_minimal_test_lit_reg ( 0xFFFFFFFF, val ); */
+            VG_(emit_cmpl_zero_reg) ( val );
+            break;
+         case 2:
+            synth_minimal_test_lit_reg ( 0x0000FFFF, val );
+            break;
+         case 1:
+            synth_minimal_test_lit_reg ( 0x000000FF, val );
+            break;
+         case 0:
+            synth_minimal_test_lit_reg ( 0x00000001, val );
+            break;
+         default: 
+            VG_(panic)("synth_TESTV(RealReg)");
+      }
+   }
+   VG_(emit_jcondshort_delta) ( CondZ, 3 );
+   VG_(synth_call) (
+      True, /* needed to guarantee that this insn is indeed 3 bytes long */
+      ( sz==4 
+      ? VG_(helper_offset)((Addr) & SK_(helper_value_check4_fail))
+      : ( sz==2 
+        ? VG_(helper_offset)((Addr) & SK_(helper_value_check2_fail))
+        : ( sz==1 
+          ? VG_(helper_offset)((Addr) & SK_(helper_value_check1_fail))
+          : VG_(helper_offset)((Addr) & SK_(helper_value_check0_fail)))))
+   );
+}
+
+
+static void synth_GETV ( Int sz, Int arch, Int reg )
+{
+   /* VG_(printf)("synth_GETV %d of Arch %s\n", sz, nameIReg(sz, arch)); */
+   switch (sz) {
+      case 4: 
+         VG_(emit_movv_offregmem_reg) ( 4, VG_(shadowRegOffset)(arch),
+                                        R_EBP, reg );
+         break;
+      case 2: 
+         VG_(emit_movzwl_offregmem_reg) ( VG_(shadowRegOffset)(arch),
+                                          R_EBP, reg );
+         VG_(emit_nonshiftopv_lit_reg) ( 4, OR, 0xFFFF0000, reg );
+         break;
+      case 1: 
+         if (arch < 4) {
+            VG_(emit_movzbl_offregmem_reg) ( VG_(shadowRegOffset)(arch),
+                                             R_EBP, reg );
+         } else {
+            VG_(emit_movzbl_offregmem_reg) ( VG_(shadowRegOffset)(arch-4)+1,
+                                             R_EBP, reg );
+         }
+         VG_(emit_nonshiftopv_lit_reg) ( 4, OR, 0xFFFFFF00, reg );
+         break;
+      default: 
+         VG_(panic)("synth_GETV");
+   }
+}
+
+
+static void synth_PUTV ( Int sz, Int srcTag, UInt lit_or_reg, Int arch )
+{
+   if (srcTag == Literal) {
+     /* PUTV with a Literal is only ever used to set the corresponding
+        ArchReg to `all valid'.  Should really be a kind of SETV. */
+      UInt lit = lit_or_reg;
+      switch (sz) {
+         case 4:
+            vg_assert(lit == 0x00000000);
+            VG_(emit_movv_lit_offregmem) ( 4, 0x00000000, 
+                                      VG_(shadowRegOffset)(arch), R_EBP );
+            break;
+         case 2:
+            vg_assert(lit == 0xFFFF0000);
+            VG_(emit_movv_lit_offregmem) ( 2, 0x0000, 
+                                      VG_(shadowRegOffset)(arch), R_EBP );
+            break;
+         case 1:
+            vg_assert(lit == 0xFFFFFF00);
+            if (arch < 4) {
+               VG_(emit_movb_lit_offregmem) ( 0x00, 
+                                         VG_(shadowRegOffset)(arch), R_EBP );
+            } else {
+               VG_(emit_movb_lit_offregmem) ( 0x00, 
+                                              VG_(shadowRegOffset)(arch-4)+1,
+                                              R_EBP );
+            }
+            break;
+         default: 
+            VG_(panic)("synth_PUTV(lit)");
+      }
+
+   } else {
+
+      UInt reg;
+      vg_assert(srcTag == RealReg);
+
+      if (sz == 1 && lit_or_reg >= 4) {
+         VG_(emit_swapl_reg_EAX) ( lit_or_reg );
+         reg = R_EAX;
+      } else {
+         reg = lit_or_reg;
+      }
+
+      if (sz == 1) vg_assert(reg < 4);
+
+      switch (sz) {
+         case 4:
+            VG_(emit_movv_reg_offregmem) ( 4, reg,
+                                      VG_(shadowRegOffset)(arch), R_EBP );
+            break;
+         case 2:
+            VG_(emit_movv_reg_offregmem) ( 2, reg,
+                                      VG_(shadowRegOffset)(arch), R_EBP );
+            break;
+         case 1:
+            if (arch < 4) {
+               VG_(emit_movb_reg_offregmem) ( reg,
+                                         VG_(shadowRegOffset)(arch), R_EBP );
+	    } else {
+               VG_(emit_movb_reg_offregmem) ( reg,
+                                        VG_(shadowRegOffset)(arch-4)+1, R_EBP );
+            }
+            break;
+         default: 
+            VG_(panic)("synth_PUTV(reg)");
+      }
+
+      if (sz == 1 && lit_or_reg >= 4) {
+         VG_(emit_swapl_reg_EAX) ( lit_or_reg );
+      }
+   }
+}
+
+
+static void synth_GETVF ( Int reg )
+{
+   VG_(emit_movv_offregmem_reg) ( 4, VG_(shadowFlagsOffset)(), R_EBP, reg );
+   /* paranoia only; should be unnecessary ... */
+   /* VG_(emit_nonshiftopv_lit_reg) ( 4, OR, 0xFFFFFFFE, reg ); */
+}
+
+
+static void synth_PUTVF ( UInt reg )
+{
+   VG_(emit_movv_reg_offregmem) ( 4, reg, VG_(shadowFlagsOffset)(), R_EBP );
+}
+
+
+static void synth_TAG1_op ( TagOp op, Int reg, RRegSet regs_live_after )
+{
+   switch (op) {
+
+      /* Scheme is
+            neg<sz> %reg          -- CF = %reg==0 ? 0 : 1
+            sbbl %reg, %reg       -- %reg = -CF
+            or 0xFFFFFFFE, %reg   -- invalidate all bits except lowest
+      */
+      case Tag_PCast40:
+         VG_(emit_unaryopv_reg)(4, NEG, reg);
+         VG_(emit_nonshiftopv_reg_reg)(4, SBB, reg, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFFFFFE, reg);
+         break;
+      case Tag_PCast20:
+         VG_(emit_unaryopv_reg)(2, NEG, reg);
+         VG_(emit_nonshiftopv_reg_reg)(4, SBB, reg, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFFFFFE, reg);
+         break;
+      case Tag_PCast10:
+         if (reg >= 4) {
+            VG_(emit_swapl_reg_EAX)(reg);
+            VG_(emit_unaryopb_reg)(NEG, R_EAX);
+            VG_(emit_swapl_reg_EAX)(reg);
+         } else {
+            VG_(emit_unaryopb_reg)(NEG, reg);
+         }
+         VG_(emit_nonshiftopv_reg_reg)(4, SBB, reg, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFFFFFE, reg);
+         break;
+
+      /* Scheme is
+            andl $1, %reg -- %reg is 0 or 1
+            negl %reg -- %reg is 0 or 0xFFFFFFFF
+            and possibly an OR to invalidate unused bits.
+      */
+      case Tag_PCast04:
+         VG_(emit_nonshiftopv_lit_reg)(4, AND, 0x00000001, reg);
+         VG_(emit_unaryopv_reg)(4, NEG, reg);
+         break;
+      case Tag_PCast02:
+         VG_(emit_nonshiftopv_lit_reg)(4, AND, 0x00000001, reg);
+         VG_(emit_unaryopv_reg)(4, NEG, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFF0000, reg);
+         break;
+      case Tag_PCast01:
+         VG_(emit_nonshiftopv_lit_reg)(4, AND, 0x00000001, reg);
+         VG_(emit_unaryopv_reg)(4, NEG, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFFFF00, reg);
+         break;
+
+      /* Scheme is
+            shl $24, %reg -- make irrelevant bits disappear
+            negl %reg             -- CF = %reg==0 ? 0 : 1
+            sbbl %reg, %reg       -- %reg = -CF
+            and possibly an OR to invalidate unused bits.
+      */
+      case Tag_PCast14:
+         VG_(emit_shiftopv_lit_reg)(4, SHL, 24, reg);
+         VG_(emit_unaryopv_reg)(4, NEG, reg);
+         VG_(emit_nonshiftopv_reg_reg)(4, SBB, reg, reg);
+         break;
+      case Tag_PCast12:
+         VG_(emit_shiftopv_lit_reg)(4, SHL, 24, reg);
+         VG_(emit_unaryopv_reg)(4, NEG, reg);
+         VG_(emit_nonshiftopv_reg_reg)(4, SBB, reg, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFF0000, reg);
+         break;
+      case Tag_PCast11:
+         VG_(emit_shiftopv_lit_reg)(4, SHL, 24, reg);
+         VG_(emit_unaryopv_reg)(4, NEG, reg);
+         VG_(emit_nonshiftopv_reg_reg)(4, SBB, reg, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFFFF00, reg);
+         break;
+
+      /* We use any non-live reg (except %reg) as a temporary,
+         or push/pop %ebp if none available:
+            (%dead_reg = any dead reg, else %ebp)
+            (pushl %ebp if all regs live)
+            movl %reg, %dead_reg
+            negl %dead_reg
+            orl %dead_reg, %reg
+            (popl %ebp if all regs live)
+         This sequence turns out to be correct regardless of the 
+         operation width.
+      */
+      case Tag_Left4:
+      case Tag_Left2:
+      case Tag_Left1: {
+         UInt dead_reg = R_EBP;
+         Int  i, reg_of_i;
+
+         for (i = 0; i < VG_MAX_REALREGS; i++) {
+            if (! IS_RREG_LIVE(i, regs_live_after)) {
+               reg_of_i = VG_(rankToRealRegNum)(i);
+               if (reg != reg_of_i) {
+                  dead_reg = reg_of_i;
+                  break;
+               }
+            }
+         }
+
+         if (R_EBP == dead_reg)
+            VG_(emit_pushv_reg)(4, dead_reg);
+         VG_(emit_movv_reg_reg)(4, reg, dead_reg);
+         VG_(emit_unaryopv_reg)(4, NEG, dead_reg);
+         VG_(emit_nonshiftopv_reg_reg)(4, OR, dead_reg, reg);
+         if (R_EBP == dead_reg)
+            VG_(emit_popv_reg)(4, dead_reg);
+         break;
+      }
+
+      /* These are all fairly obvious; do the op and then, if
+         necessary, invalidate unused bits. */
+      case Tag_SWiden14:
+         VG_(emit_shiftopv_lit_reg)(4, SHL, 24, reg);
+         VG_(emit_shiftopv_lit_reg)(4, SAR, 24, reg);
+         break;
+      case Tag_SWiden24:
+         VG_(emit_shiftopv_lit_reg)(4, SHL, 16, reg);
+         VG_(emit_shiftopv_lit_reg)(4, SAR, 16, reg);
+         break;
+      case Tag_SWiden12:
+         VG_(emit_shiftopv_lit_reg)(4, SHL, 24, reg);
+         VG_(emit_shiftopv_lit_reg)(4, SAR, 24, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFF0000, reg);
+         break;
+      case Tag_ZWiden14:
+         VG_(emit_nonshiftopv_lit_reg)(4, AND, 0x000000FF, reg);
+         break;
+      case Tag_ZWiden24:
+         VG_(emit_nonshiftopv_lit_reg)(4, AND, 0x0000FFFF, reg);
+         break;
+      case Tag_ZWiden12:
+         VG_(emit_nonshiftopv_lit_reg)(4, AND, 0x000000FF, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFF0000, reg);
+         break;
+
+      default:
+         VG_(panic)("synth_TAG1_op");
+   }
+}
+
+
+static void synth_TAG2_op ( TagOp op, Int regs, Int regd )
+{
+   switch (op) {
+
+      /* UifU is implemented by OR, since 1 means Undefined. */
+      case Tag_UifU4:
+      case Tag_UifU2:
+      case Tag_UifU1:
+      case Tag_UifU0:
+         VG_(emit_nonshiftopv_reg_reg)(4, OR, regs, regd);
+         break;
+
+      /* DifD is implemented by AND, since 0 means Defined. */
+      case Tag_DifD4:
+      case Tag_DifD2:
+      case Tag_DifD1:
+         VG_(emit_nonshiftopv_reg_reg)(4, AND, regs, regd);
+         break;
+
+      /* ImproveAND(value, tags) = value OR tags.
+	 Defined (0) value 0s give defined (0); all other -> undefined (1).
+         value is in regs; tags is in regd. 
+         Be paranoid and invalidate unused bits; I don't know whether 
+         or not this is actually necessary. */
+      case Tag_ImproveAND4_TQ:
+         VG_(emit_nonshiftopv_reg_reg)(4, OR, regs, regd);
+         break;
+      case Tag_ImproveAND2_TQ:
+         VG_(emit_nonshiftopv_reg_reg)(4, OR, regs, regd);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFF0000, regd);
+         break;
+      case Tag_ImproveAND1_TQ:
+         VG_(emit_nonshiftopv_reg_reg)(4, OR, regs, regd);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFFFF00, regd);
+         break;
+
+      /* ImproveOR(value, tags) = (not value) OR tags.
+	 Defined (0) value 1s give defined (0); all other -> undefined (1).
+         value is in regs; tags is in regd. 
+         To avoid trashing value, this is implemented (re de Morgan) as
+               not (value AND (not tags))
+         Be paranoid and invalidate unused bits; I don't know whether 
+         or not this is actually necessary. */
+      case Tag_ImproveOR4_TQ:
+         VG_(emit_unaryopv_reg)(4, NOT, regd);
+         VG_(emit_nonshiftopv_reg_reg)(4, AND, regs, regd);
+         VG_(emit_unaryopv_reg)(4, NOT, regd);
+         break;
+      case Tag_ImproveOR2_TQ:
+         VG_(emit_unaryopv_reg)(4, NOT, regd);
+         VG_(emit_nonshiftopv_reg_reg)(4, AND, regs, regd);
+         VG_(emit_unaryopv_reg)(4, NOT, regd);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFF0000, regd);
+         break;
+      case Tag_ImproveOR1_TQ:
+         VG_(emit_unaryopv_reg)(4, NOT, regd);
+         VG_(emit_nonshiftopv_reg_reg)(4, AND, regs, regd);
+         VG_(emit_unaryopv_reg)(4, NOT, regd);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFFFF00, regd);
+         break;
+
+      default:
+         VG_(panic)("synth_TAG2_op");
+   }
+}
+
+/*----------------------------------------------------*/
+/*--- Generate code for a single UInstr.           ---*/
+/*----------------------------------------------------*/
+
+void SK_(emitExtUInstr) ( UInstr* u, RRegSet regs_live_before )
+{
+   switch (u->opcode) {
+
+      case SETV:
+         vg_assert(u->tag1 == RealReg);
+         synth_SETV ( u->size, u->val1 );
+         break;
+
+      case STOREV:
+         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
+         vg_assert(u->tag2 == RealReg);
+         synth_STOREV ( u->size, u->tag1, 
+                                 u->tag1==Literal ? u->lit32 : u->val1, 
+                                 u->val2,
+                        regs_live_before, u->regs_live_after );
+         break;
+
+      case LOADV:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == RealReg);
+         if (0)
+            VG_(emit_AMD_prefetch_reg) ( u->val1 );
+         synth_LOADV ( u->size, u->val1, u->val2,
+                       regs_live_before, u->regs_live_after );
+         break;
+
+      case TESTV:
+         vg_assert(u->tag1 == RealReg || u->tag1 == ArchReg);
+         synth_TESTV(u->size, u->tag1, u->val1);
+         break;
+
+      case GETV:
+         vg_assert(u->tag1 == ArchReg);
+         vg_assert(u->tag2 == RealReg);
+         synth_GETV(u->size, u->val1, u->val2);
+         break;
+
+      case GETVF:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->size == 0);
+         synth_GETVF(u->val1);
+         break;
+
+      case PUTV:
+         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
+         vg_assert(u->tag2 == ArchReg);
+         synth_PUTV(u->size, u->tag1, 
+                             u->tag1==Literal ? u->lit32 : u->val1, 
+                             u->val2 );
+         break;
+
+      case PUTVF:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->size == 0);
+         synth_PUTVF(u->val1);
+         break;
+
+      case TAG1:
+         synth_TAG1_op ( u->val3, u->val1, u->regs_live_after );
+         break;
+
+      case TAG2:
+         synth_TAG2_op ( u->val3, u->val1, u->val2 );
+         break;
+
+      default: 
+         VG_(printf)("emitExtUInstr: unhandled extension insn:\n");
+         VG_(ppUInstr)(0,u);
+         VG_(panic)("emitExtUInstr: unhandled extension opcode");
+   }
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                 vg_memcheck_from_ucode.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/memcheck/mc_helpers.S b/memcheck/mc_helpers.S
new file mode 100644
index 0000000..515c873
--- /dev/null
+++ b/memcheck/mc_helpers.S
@@ -0,0 +1,62 @@
+##--------------------------------------------------------------------##
+##--- Support routines for the memory checker.                     ---##
+##---                                        vg_memcheck_helpers.S ---##
+##--------------------------------------------------------------------##
+
+/*
+  This file is part of Valgrind, an x86 protected-mode emulator 
+  designed for debugging and profiling binaries on x86-Unixes.
+
+  Copyright (C) 2000-2002 Julian Seward 
+     jseward@acm.org
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_constants.h"
+
+.global SK_(helper_value_check0_fail)
+SK_(helper_value_check0_fail):
+	pushal
+	call	SK_(helperc_value_check0_fail)
+	popal
+	ret
+
+.global SK_(helper_value_check1_fail)
+SK_(helper_value_check1_fail):
+	pushal
+	call	SK_(helperc_value_check1_fail)
+	popal
+	ret
+
+.global SK_(helper_value_check2_fail)
+SK_(helper_value_check2_fail):
+	pushal
+	call	SK_(helperc_value_check2_fail)
+	popal
+	ret
+
+.global SK_(helper_value_check4_fail)
+SK_(helper_value_check4_fail):
+	pushal
+	call	SK_(helperc_value_check4_fail)
+	popal
+	ret
+
+
+
diff --git a/memcheck/mc_include.h b/memcheck/mc_include.h
new file mode 100644
index 0000000..82bcae7
--- /dev/null
+++ b/memcheck/mc_include.h
@@ -0,0 +1,209 @@
+/*--------------------------------------------------------------------*/
+/*--- A header file for all parts of the MemCheck skin.            ---*/
+/*---                                        vg_memcheck_include.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VG_MEMCHECK_INCLUDE_H
+#define __VG_MEMCHECK_INCLUDE_H
+
+#include "vg_skin.h"
+
+/* UCode extension for efficient memory checking operations */
+typedef
+   enum {
+      /* uinstrs which are not needed for mere translation of x86 code,
+         only for instrumentation of it. */
+      LOADV = DUMMY_FINAL_UOPCODE + 1,
+      STOREV,
+      GETV,
+      PUTV,
+      TESTV,
+      SETV, 
+      /* Get/set the v-bit (and it is only one bit) for the simulated
+         %eflags register. */
+      GETVF,
+      PUTVF,
+
+      /* Do a unary or binary tag op.  Only for post-instrumented
+         code.  For TAG1, first and only arg is a TempReg, and is both
+         arg and result reg.  For TAG2, first arg is src, second is
+         dst, in the normal way; both are TempRegs.  In both cases,
+         3rd arg is a RiCHelper with a Lit16 tag.  This indicates
+         which tag op to do. */
+      TAG1,
+      TAG2
+   }
+   MemCheckOpcode;
+
+
+/* Lists the names of value-tag operations used in instrumented
+   code.  These are the third argument to TAG1 and TAG2 uinsns. */
+typedef
+   enum { 
+     /* Unary. */
+     Tag_PCast40, Tag_PCast20, Tag_PCast10,
+     Tag_PCast01, Tag_PCast02, Tag_PCast04,
+
+     Tag_PCast14, Tag_PCast12, Tag_PCast11,
+
+     Tag_Left4, Tag_Left2, Tag_Left1,
+
+     Tag_SWiden14, Tag_SWiden24, Tag_SWiden12,
+     Tag_ZWiden14, Tag_ZWiden24, Tag_ZWiden12,
+
+     /* Binary; 1st is rd; 2nd is rd+wr */
+     Tag_UifU4, Tag_UifU2, Tag_UifU1, Tag_UifU0,
+     Tag_DifD4, Tag_DifD2, Tag_DifD1,
+
+     Tag_ImproveAND4_TQ, Tag_ImproveAND2_TQ, Tag_ImproveAND1_TQ,
+     Tag_ImproveOR4_TQ, Tag_ImproveOR2_TQ, Tag_ImproveOR1_TQ,
+     Tag_DebugFn
+   }
+   TagOp;
+
+/* The classification of a faulting address. */
+typedef 
+   enum { Undescribed, /* as-yet unclassified */
+          Stack, 
+          Unknown, /* classification yielded nothing useful */
+          Freed, Mallocd, 
+          UserG, UserS 
+   }
+   AddrKind;
+
+/* Records info about a faulting address. */
+typedef
+   struct {
+      /* ALL */
+      AddrKind akind;
+      /* Freed, Mallocd */
+      Int blksize;
+      /* Freed, Mallocd */
+      Int rwoffset;
+      /* Freed, Mallocd */
+      ExeContext* lastchange;
+      /* Stack */
+      ThreadId stack_tid;
+      /* True if is just-below %esp -- could be a gcc bug. */
+      Bool maybe_gcc;
+   }
+   AddrInfo;
+
+
+/*------------------------------------------------------------*/
+/*--- Skin-specific command line options + defaults        ---*/
+/*------------------------------------------------------------*/
+
+/* Allow loads from partially-valid addresses?  default: YES */
+extern Bool SK_(clo_partial_loads_ok);
+
+/* Max volume of the freed blocks queue. */
+extern Int SK_(clo_freelist_vol);
+
+/* Do leak check at exit?  default: NO */
+extern Bool SK_(clo_leak_check);
+
+/* How closely should we compare ExeContexts in leak records? default: 2 */
+extern VgRes SK_(clo_leak_resolution);
+
+/* In leak check, show reachable-but-not-freed blocks?  default: NO */
+extern Bool SK_(clo_show_reachable);
+
+/* Assume accesses immediately below %esp are due to gcc-2.96 bugs.
+ * default: NO*/
+extern Bool SK_(clo_workaround_gcc296_bugs);
+
+/* Shall we V-check addrs? (they are always A checked too)   default: YES */
+extern Bool SK_(clo_check_addrVs);
+
+/* DEBUG: clean up instrumented code?  default: YES */
+extern Bool SK_(clo_cleanup);
+
+
+/*------------------------------------------------------------*/
+/*--- Functions                                            ---*/
+/*------------------------------------------------------------*/
+
+// SSS: work out a consistent prefix convention here
+
+/* Functions defined in vg_memcheck_helpers.S */
+extern void SK_(helper_value_check4_fail) ( void );
+extern void SK_(helper_value_check2_fail) ( void );
+extern void SK_(helper_value_check1_fail) ( void );
+extern void SK_(helper_value_check0_fail) ( void );
+
+/* Functions defined in vg_memcheck.c */
+extern void SK_(helperc_STOREV4) ( UInt, Addr );
+extern void SK_(helperc_STOREV2) ( UInt, Addr );
+extern void SK_(helperc_STOREV1) ( UInt, Addr );
+   
+extern UInt SK_(helperc_LOADV1) ( Addr );
+extern UInt SK_(helperc_LOADV2) ( Addr );
+extern UInt SK_(helperc_LOADV4) ( Addr );
+
+extern void SK_(fpu_write_check) ( Addr addr, Int size );
+extern void SK_(fpu_read_check)  ( Addr addr, Int size );
+
+extern ShadowChunk* SK_(any_matching_freed_ShadowChunks) 
+                        ( Bool (*p) ( ShadowChunk* ) );
+
+/* For client requests */
+extern void SK_(make_noaccess) ( Addr a, UInt len );
+extern void SK_(make_readable) ( Addr a, UInt len );
+extern void SK_(make_writable) ( Addr a, UInt len );
+
+extern Bool SK_(check_writable) ( Addr a, UInt len, Addr* bad_addr );
+extern Bool SK_(check_readable) ( Addr a, UInt len, Addr* bad_addr );
+
+extern void SK_(detect_memory_leaks) ( void );
+
+
+/* Functions defined in vg_memcheck_clientreqs.c */
+extern Bool SK_(client_perm_maybe_describe)( Addr a, AddrInfo* ai );
+extern void SK_(delete_client_stack_blocks_following_ESP_change) ( void );
+extern void SK_(show_client_block_stats) ( void );
+
+/* Functions defined in vg_memcheck_errcontext.c */
+extern void SK_(record_value_error)       ( Int size );
+extern void SK_(record_address_error)     ( Addr a, Int size, Bool isWrite );
+extern void SK_(record_core_mem_error)    ( ThreadState* tst, Bool isWrite,
+                                            Char* s );
+extern void SK_(record_param_error)       ( ThreadState* tst, Addr a,   
+                                            Bool isWriteLack, Char* msg );
+extern void SK_(record_jump_error)        ( ThreadState* tst, Addr a );
+extern void SK_(record_free_error)        ( ThreadState* tst, Addr a );
+extern void SK_(record_freemismatch_error)( ThreadState* tst, Addr a );
+extern void SK_(record_user_error)        ( ThreadState* tst, Addr a, 
+                                            Bool isWrite );
+
+#endif
+
+/*--------------------------------------------------------------------*/
+/*--- end                                    vg_memcheck_include.h ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c
new file mode 100644
index 0000000..4ee380f
--- /dev/null
+++ b/memcheck/mc_main.c
@@ -0,0 +1,2428 @@
+/*--------------------------------------------------------------------*/
+/*--- Part of the MemCheck skin: Maintain bitmaps of memory,       ---*/
+/*--- tracking the accessibility (A) and validity (V) status of    ---*/
+/*--- each byte.                                                   ---*/
+/*---                                                vg_memcheck.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_memcheck_include.h"
+#include "vg_memcheck.h"   /* for client requests */
+//#include "vg_profile.c"
+
+/* Define to debug the mem audit system. */
+/* #define VG_DEBUG_MEMORY */
+
+/* Define to debug the memory-leak-detector. */
+/* #define VG_DEBUG_LEAKCHECK */
+
+/* Define to collect detailed performance info. */
+/* #define VG_PROFILE_MEMORY */
+
+#define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
+
+/*------------------------------------------------------------*/
+/*--- Command line options                                 ---*/
+/*------------------------------------------------------------*/
+
+Bool  SK_(clo_partial_loads_ok)       = True;
+Int   SK_(clo_freelist_vol)           = 1000000;
+Bool  SK_(clo_leak_check)             = False;
+VgRes SK_(clo_leak_resolution)        = Vg_LowRes;
+Bool  SK_(clo_show_reachable)         = False;
+Bool  SK_(clo_workaround_gcc296_bugs) = False;
+Bool  SK_(clo_check_addrVs)           = True;
+Bool  SK_(clo_cleanup)                = True;
+
+/*------------------------------------------------------------*/
+/*--- Profiling events                                     ---*/
+/*------------------------------------------------------------*/
+
+typedef 
+   enum { 
+      VgpCheckMem = VgpFini+1,
+      VgpSetMem
+   } 
+   VgpSkinCC;
+
+/*------------------------------------------------------------*/
+/*--- Low-level support for memory checking.               ---*/
+/*------------------------------------------------------------*/
+
+/* All reads and writes are checked against a memory map, which
+   records the state of all memory in the process.  The memory map is
+   organised like this:
+
+   The top 16 bits of an address are used to index into a top-level
+   map table, containing 65536 entries.  Each entry is a pointer to a
+   second-level map, which records the accesibililty and validity
+   permissions for the 65536 bytes indexed by the lower 16 bits of the
+   address.  Each byte is represented by nine bits, one indicating
+   accessibility, the other eight validity.  So each second-level map
+   contains 73728 bytes.  This two-level arrangement conveniently
+   divides the 4G address space into 64k lumps, each size 64k bytes.
+
+   All entries in the primary (top-level) map must point to a valid
+   secondary (second-level) map.  Since most of the 4G of address
+   space will not be in use -- ie, not mapped at all -- there is a
+   distinguished secondary map, which indicates `not addressible and
+   not valid' writeable for all bytes.  Entries in the primary map for
+   which the entire 64k is not in use at all point at this
+   distinguished map.
+
+   [...] lots of stuff deleted due to out of date-ness
+
+   As a final optimisation, the alignment and address checks for
+   4-byte loads and stores are combined in a neat way.  The primary
+   map is extended to have 262144 entries (2^18), rather than 2^16.
+   The top 3/4 of these entries are permanently set to the
+   distinguished secondary map.  For a 4-byte load/store, the
+   top-level map is indexed not with (addr >> 16) but instead f(addr),
+   where
+
+    f( XXXX XXXX XXXX XXXX ____ ____ ____ __YZ )
+        = ____ ____ ____ __YZ XXXX XXXX XXXX XXXX  or 
+        = ____ ____ ____ __ZY XXXX XXXX XXXX XXXX
+
+   ie the lowest two bits are placed above the 16 high address bits.
+   If either of these two bits are nonzero, the address is misaligned;
+   this will select a secondary map from the upper 3/4 of the primary
+   map.  Because this is always the distinguished secondary map, a
+   (bogus) address check failure will result.  The failure handling
+   code can then figure out whether this is a genuine addr check
+   failure or whether it is a possibly-legitimate access at a
+   misaligned address.  
+*/
+
+
+/*------------------------------------------------------------*/
+/*--- Crude profiling machinery.                           ---*/
+/*------------------------------------------------------------*/
+
+#ifdef VG_PROFILE_MEMORY
+
+#define N_PROF_EVENTS 150
+
+static UInt event_ctr[N_PROF_EVENTS];
+
+static void init_prof_mem ( void )
+{
+   Int i;
+   for (i = 0; i < N_PROF_EVENTS; i++)
+      event_ctr[i] = 0;
+}
+
+static void done_prof_mem ( void )
+{
+   Int i;
+   for (i = 0; i < N_PROF_EVENTS; i++) {
+      if ((i % 10) == 0) 
+         VG_(printf)("\n");
+      if (event_ctr[i] > 0)
+         VG_(printf)( "prof mem event %2d: %d\n", i, event_ctr[i] );
+   }
+   VG_(printf)("\n");
+}
+
+#define PROF_EVENT(ev)                                  \
+   do { vg_assert((ev) >= 0 && (ev) < N_PROF_EVENTS);   \
+        event_ctr[ev]++;                                \
+   } while (False);
+
+#else
+
+static void init_prof_mem ( void ) { }
+static void done_prof_mem ( void ) { }
+
+#define PROF_EVENT(ev) /* */
+
+#endif
+
+/* Event index.  If just the name of the fn is given, this means the
+   number of calls to the fn.  Otherwise it is the specified event.
+
+   10   alloc_secondary_map
+
+   20   get_abit
+   21   get_vbyte
+   22   set_abit
+   23   set_vbyte
+   24   get_abits4_ALIGNED
+   25   get_vbytes4_ALIGNED
+
+   30   set_address_range_perms
+   31   set_address_range_perms(lower byte loop)
+   32   set_address_range_perms(quadword loop)
+   33   set_address_range_perms(upper byte loop)
+   
+   35   make_noaccess
+   36   make_writable
+   37   make_readable
+
+   40   copy_address_range_state
+   41   copy_address_range_state(byte loop)
+   42   check_writable
+   43   check_writable(byte loop)
+   44   check_readable
+   45   check_readable(byte loop)
+   46   check_readable_asciiz
+   47   check_readable_asciiz(byte loop)
+
+   50   make_aligned_word_NOACCESS
+   51   make_aligned_word_WRITABLE
+
+   60   helperc_LOADV4
+   61   helperc_STOREV4
+   62   helperc_LOADV2
+   63   helperc_STOREV2
+   64   helperc_LOADV1
+   65   helperc_STOREV1
+
+   70   rim_rd_V4_SLOWLY
+   71   rim_wr_V4_SLOWLY
+   72   rim_rd_V2_SLOWLY
+   73   rim_wr_V2_SLOWLY
+   74   rim_rd_V1_SLOWLY
+   75   rim_wr_V1_SLOWLY
+
+   80   fpu_read
+   81   fpu_read aligned 4
+   82   fpu_read aligned 8
+   83   fpu_read 2
+   84   fpu_read 10
+
+   85   fpu_write
+   86   fpu_write aligned 4
+   87   fpu_write aligned 8
+   88   fpu_write 2
+   89   fpu_write 10
+
+   90   fpu_read_check_SLOWLY
+   91   fpu_read_check_SLOWLY(byte loop)
+   92   fpu_write_check_SLOWLY
+   93   fpu_write_check_SLOWLY(byte loop)
+
+   100  is_plausible_stack_addr
+   101  handle_esp_assignment
+   102  handle_esp_assignment(-4)
+   103  handle_esp_assignment(+4)
+   104  handle_esp_assignment(-12)
+   105  handle_esp_assignment(-8)
+   106  handle_esp_assignment(+16)
+   107  handle_esp_assignment(+12)
+   108  handle_esp_assignment(0)
+   109  handle_esp_assignment(+8)
+   110  handle_esp_assignment(-16)
+   111  handle_esp_assignment(+20)
+   112  handle_esp_assignment(-20)
+   113  handle_esp_assignment(+24)
+   114  handle_esp_assignment(-24)
+
+   120  vg_handle_esp_assignment_SLOWLY
+   121  vg_handle_esp_assignment_SLOWLY(normal; move down)
+   122  vg_handle_esp_assignment_SLOWLY(normal; move up)
+   123  vg_handle_esp_assignment_SLOWLY(normal)
+   124  vg_handle_esp_assignment_SLOWLY(>= HUGE_DELTA)
+*/
+
+/*------------------------------------------------------------*/
+/*--- Function declarations.                               ---*/
+/*------------------------------------------------------------*/
+
+static UInt vgmext_rd_V4_SLOWLY ( Addr a );
+static UInt vgmext_rd_V2_SLOWLY ( Addr a );
+static UInt vgmext_rd_V1_SLOWLY ( Addr a );
+static void vgmext_wr_V4_SLOWLY ( Addr a, UInt vbytes );
+static void vgmext_wr_V2_SLOWLY ( Addr a, UInt vbytes );
+static void vgmext_wr_V1_SLOWLY ( Addr a, UInt vbytes );
+static void fpu_read_check_SLOWLY ( Addr addr, Int size );
+static void fpu_write_check_SLOWLY ( Addr addr, Int size );
+
+/*------------------------------------------------------------*/
+/*--- Data defns.                                          ---*/
+/*------------------------------------------------------------*/
+
+typedef 
+   struct {
+      UChar abits[8192];
+      UChar vbyte[65536];
+   }
+   SecMap;
+
+static SecMap* primary_map[ /*65536*/ 262144 ];
+static SecMap  distinguished_secondary_map;
+
+#define IS_DISTINGUISHED_SM(smap) \
+   ((smap) == &distinguished_secondary_map)
+
+#define ENSURE_MAPPABLE(addr,caller)                                   \
+   do {                                                                \
+      if (IS_DISTINGUISHED_SM(primary_map[(addr) >> 16])) {       \
+         primary_map[(addr) >> 16] = alloc_secondary_map(caller); \
+         /* VG_(printf)("new 2map because of %p\n", addr); */          \
+      }                                                                \
+   } while(0)
+
+#define BITARR_SET(aaa_p,iii_p)                         \
+   do {                                                 \
+      UInt   iii = (UInt)iii_p;                         \
+      UChar* aaa = (UChar*)aaa_p;                       \
+      aaa[iii >> 3] |= (1 << (iii & 7));                \
+   } while (0)
+
+#define BITARR_CLEAR(aaa_p,iii_p)                       \
+   do {                                                 \
+      UInt   iii = (UInt)iii_p;                         \
+      UChar* aaa = (UChar*)aaa_p;                       \
+      aaa[iii >> 3] &= ~(1 << (iii & 7));               \
+   } while (0)
+
+#define BITARR_TEST(aaa_p,iii_p)                        \
+      (0 != (((UChar*)aaa_p)[ ((UInt)iii_p) >> 3 ]      \
+               & (1 << (((UInt)iii_p) & 7))))           \
+
+
+#define VGM_BIT_VALID      0
+#define VGM_BIT_INVALID    1
+
+#define VGM_NIBBLE_VALID   0
+#define VGM_NIBBLE_INVALID 0xF
+
+#define VGM_BYTE_VALID     0
+#define VGM_BYTE_INVALID   0xFF
+
+#define VGM_WORD_VALID     0
+#define VGM_WORD_INVALID   0xFFFFFFFF
+
+#define VGM_EFLAGS_VALID   0xFFFFFFFE
+#define VGM_EFLAGS_INVALID 0xFFFFFFFF     /* not used */
+
+
+static void init_shadow_memory ( void )
+{
+   Int i;
+
+   for (i = 0; i < 8192; i++)             /* Invalid address */
+      distinguished_secondary_map.abits[i] = VGM_BYTE_INVALID; 
+   for (i = 0; i < 65536; i++)            /* Invalid Value */
+      distinguished_secondary_map.vbyte[i] = VGM_BYTE_INVALID; 
+
+   /* These entries gradually get overwritten as the used address
+      space expands. */
+   for (i = 0; i < 65536; i++)
+      primary_map[i] = &distinguished_secondary_map;
+
+   /* These ones should never change; it's a bug in Valgrind if they do. */
+   for (i = 65536; i < 262144; i++)
+      primary_map[i] = &distinguished_secondary_map;
+}
+
+void SK_(post_clo_init) ( void )
+{
+}
+
+void SK_(fini) ( void )
+{
+   VG_(print_malloc_stats)();
+
+   if (VG_(clo_verbosity) == 1) {
+      if (!SK_(clo_leak_check))
+         VG_(message)(Vg_UserMsg, 
+             "For a detailed leak analysis,  rerun with: --leak-check=yes");
+
+      VG_(message)(Vg_UserMsg, 
+                   "For counts of detected errors, rerun with: -v");
+   }
+   if (SK_(clo_leak_check)) SK_(detect_memory_leaks)();
+
+   done_prof_mem();
+
+   if (0) {
+      VG_(message)(Vg_DebugMsg, 
+        "------ Valgrind's client block stats follow ---------------" );
+      SK_(show_client_block_stats)();
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Basic bitmap management, reading and writing.        ---*/
+/*------------------------------------------------------------*/
+
+/* Allocate and initialise a secondary map. */
+
+static SecMap* alloc_secondary_map ( __attribute__ ((unused)) 
+                                     Char* caller )
+{
+   SecMap* map;
+   UInt  i;
+   PROF_EVENT(10);
+
+   /* Mark all bytes as invalid access and invalid value. */
+
+   /* It just happens that a SecMap occupies exactly 18 pages --
+      although this isn't important, so the following assert is
+      spurious. */
+   vg_assert(0 == (sizeof(SecMap) % VKI_BYTES_PER_PAGE));
+   map = VG_(get_memory_from_mmap)( sizeof(SecMap), caller );
+
+   for (i = 0; i < 8192; i++)
+      map->abits[i] = VGM_BYTE_INVALID; /* Invalid address */
+   for (i = 0; i < 65536; i++)
+      map->vbyte[i] = VGM_BYTE_INVALID; /* Invalid Value */
+
+   /* VG_(printf)("ALLOC_2MAP(%s)\n", caller ); */
+   return map;
+}
+
+
+/* Basic reading/writing of the bitmaps, for byte-sized accesses. */
+
+static __inline__ UChar get_abit ( Addr a )
+{
+   SecMap* sm     = primary_map[a >> 16];
+   UInt    sm_off = a & 0xFFFF;
+   PROF_EVENT(20);
+#  if 0
+      if (IS_DISTINGUISHED_SM(sm))
+         VG_(message)(Vg_DebugMsg, 
+                      "accessed distinguished 2ndary (A)map! 0x%x\n", a);
+#  endif
+   return BITARR_TEST(sm->abits, sm_off) 
+             ? VGM_BIT_INVALID : VGM_BIT_VALID;
+}
+
+static __inline__ UChar get_vbyte ( Addr a )
+{
+   SecMap* sm     = primary_map[a >> 16];
+   UInt    sm_off = a & 0xFFFF;
+   PROF_EVENT(21);
+#  if 0
+      if (IS_DISTINGUISHED_SM(sm))
+         VG_(message)(Vg_DebugMsg, 
+                      "accessed distinguished 2ndary (V)map! 0x%x\n", a);
+#  endif
+   return sm->vbyte[sm_off];
+}
+
+static __inline__ void set_abit ( Addr a, UChar abit )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   PROF_EVENT(22);
+   ENSURE_MAPPABLE(a, "set_abit");
+   sm     = primary_map[a >> 16];
+   sm_off = a & 0xFFFF;
+   if (abit) 
+      BITARR_SET(sm->abits, sm_off);
+   else
+      BITARR_CLEAR(sm->abits, sm_off);
+}
+
+static __inline__ void set_vbyte ( Addr a, UChar vbyte )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   PROF_EVENT(23);
+   ENSURE_MAPPABLE(a, "set_vbyte");
+   sm     = primary_map[a >> 16];
+   sm_off = a & 0xFFFF;
+   sm->vbyte[sm_off] = vbyte;
+}
+
+
+/* Reading/writing of the bitmaps, for aligned word-sized accesses. */
+
+static __inline__ UChar get_abits4_ALIGNED ( Addr a )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   UChar   abits8;
+   PROF_EVENT(24);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+#  endif
+   sm     = primary_map[a >> 16];
+   sm_off = a & 0xFFFF;
+   abits8 = sm->abits[sm_off >> 3];
+   abits8 >>= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+   abits8 &= 0x0F;
+   return abits8;
+}
+
+static UInt __inline__ get_vbytes4_ALIGNED ( Addr a )
+{
+   SecMap* sm     = primary_map[a >> 16];
+   UInt    sm_off = a & 0xFFFF;
+   PROF_EVENT(25);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+#  endif
+   return ((UInt*)(sm->vbyte))[sm_off >> 2];
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Setting permissions over address ranges.             ---*/
+/*------------------------------------------------------------*/
+
+static void set_address_range_perms ( Addr a, UInt len, 
+                                      UInt example_a_bit,
+                                      UInt example_v_bit )
+{
+   UChar   vbyte, abyte8;
+   UInt    vword4, sm_off;
+   SecMap* sm;
+
+   PROF_EVENT(30);
+
+   if (len == 0)
+      return;
+
+   if (len > 100 * 1000 * 1000) {
+      VG_(message)(Vg_UserMsg, 
+                   "Warning: set address range perms: "
+                   "large range %u, a %d, v %d",
+                   len, example_a_bit, example_v_bit );
+   }
+
+   VGP_PUSHCC(VgpSetMem);
+
+   /* Requests to change permissions of huge address ranges may
+      indicate bugs in our machinery.  30,000,000 is arbitrary, but so
+      far all legitimate requests have fallen beneath that size. */
+   /* 4 Mar 02: this is just stupid; get rid of it. */
+   /* vg_assert(len < 30000000); */
+
+   /* Check the permissions make sense. */
+   vg_assert(example_a_bit == VGM_BIT_VALID 
+             || example_a_bit == VGM_BIT_INVALID);
+   vg_assert(example_v_bit == VGM_BIT_VALID 
+             || example_v_bit == VGM_BIT_INVALID);
+   if (example_a_bit == VGM_BIT_INVALID)
+      vg_assert(example_v_bit == VGM_BIT_INVALID);
+
+   /* The validity bits to write. */
+   vbyte = example_v_bit==VGM_BIT_VALID 
+              ? VGM_BYTE_VALID : VGM_BYTE_INVALID;
+
+   /* In order that we can charge through the address space at 8
+      bytes/main-loop iteration, make up some perms. */
+   abyte8 = (example_a_bit << 7)
+            | (example_a_bit << 6)
+            | (example_a_bit << 5)
+            | (example_a_bit << 4)
+            | (example_a_bit << 3)
+            | (example_a_bit << 2)
+            | (example_a_bit << 1)
+            | (example_a_bit << 0);
+   vword4 = (vbyte << 24) | (vbyte << 16) | (vbyte << 8) | vbyte;
+
+#  ifdef VG_DEBUG_MEMORY
+   /* Do it ... */
+   while (True) {
+      PROF_EVENT(31);
+      if (len == 0) break;
+      set_abit ( a, example_a_bit );
+      set_vbyte ( a, vbyte );
+      a++;
+      len--;
+   }
+
+#  else
+   /* Slowly do parts preceding 8-byte alignment. */
+   while (True) {
+      PROF_EVENT(31);
+      if (len == 0) break;
+      if ((a % 8) == 0) break;
+      set_abit ( a, example_a_bit );
+      set_vbyte ( a, vbyte );
+      a++;
+      len--;
+   }   
+
+   if (len == 0) {
+      VGP_POPCC(VgpSetMem);
+      return;
+   }
+   vg_assert((a % 8) == 0 && len > 0);
+
+   /* Once aligned, go fast. */
+   while (True) {
+      PROF_EVENT(32);
+      if (len < 8) break;
+      ENSURE_MAPPABLE(a, "set_address_range_perms(fast)");
+      sm = primary_map[a >> 16];
+      sm_off = a & 0xFFFF;
+      sm->abits[sm_off >> 3] = abyte8;
+      ((UInt*)(sm->vbyte))[(sm_off >> 2) + 0] = vword4;
+      ((UInt*)(sm->vbyte))[(sm_off >> 2) + 1] = vword4;
+      a += 8;
+      len -= 8;
+   }
+
+   if (len == 0) {
+      VGP_POPCC(VgpSetMem);
+      return;
+   }
+   vg_assert((a % 8) == 0 && len > 0 && len < 8);
+
+   /* Finish the upper fragment. */
+   while (True) {
+      PROF_EVENT(33);
+      if (len == 0) break;
+      set_abit ( a, example_a_bit );
+      set_vbyte ( a, vbyte );
+      a++;
+      len--;
+   }   
+#  endif
+
+   /* Check that zero page and highest page have not been written to
+      -- this could happen with buggy syscall wrappers.  Today
+      (2001-04-26) had precisely such a problem with __NR_setitimer. */
+   vg_assert(SK_(cheap_sanity_check)());
+   VGP_POPCC(VgpSetMem);
+}
+
+/* Set permissions for address ranges ... */
+
+void SK_(make_noaccess) ( Addr a, UInt len )
+{
+   PROF_EVENT(35);
+   DEBUG("SK_(make_noaccess)(%p, %x)\n", a, len);
+   set_address_range_perms ( a, len, VGM_BIT_INVALID, VGM_BIT_INVALID );
+}
+
+void SK_(make_writable) ( Addr a, UInt len )
+{
+   PROF_EVENT(36);
+   DEBUG("SK_(make_writable)(%p, %x)\n", a, len);
+   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_INVALID );
+}
+
+void SK_(make_readable) ( Addr a, UInt len )
+{
+   PROF_EVENT(37);
+   DEBUG("SK_(make_readable)(%p, 0x%x)\n", a, len);
+   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID );
+}
+
+/* Block-copy permissions (needed for implementing realloc()). */
+
+static void copy_address_range_state ( Addr src, Addr dst, UInt len )
+{
+   UInt i;
+
+   DEBUG("copy_address_range_state\n");
+
+   PROF_EVENT(40);
+   for (i = 0; i < len; i++) {
+      UChar abit  = get_abit ( src+i );
+      UChar vbyte = get_vbyte ( src+i );
+      PROF_EVENT(41);
+      set_abit ( dst+i, abit );
+      set_vbyte ( dst+i, vbyte );
+   }
+}
+
+
+/* Check permissions for address range.  If inadequate permissions
+   exist, *bad_addr is set to the offending address, so the caller can
+   know what it is. */
+
+Bool SK_(check_writable) ( Addr a, UInt len, Addr* bad_addr )
+{
+   UInt  i;
+   UChar abit;
+   PROF_EVENT(42);
+   for (i = 0; i < len; i++) {
+      PROF_EVENT(43);
+      abit = get_abit(a);
+      if (abit == VGM_BIT_INVALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      a++;
+   }
+   return True;
+}
+
+Bool SK_(check_readable) ( Addr a, UInt len, Addr* bad_addr )
+{
+   UInt  i;
+   UChar abit;
+   UChar vbyte;
+
+   PROF_EVENT(44);
+   DEBUG("SK_(check_readable)\n");
+   for (i = 0; i < len; i++) {
+      abit  = get_abit(a);
+      vbyte = get_vbyte(a);
+      PROF_EVENT(45);
+      if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      a++;
+   }
+   return True;
+}
+
+
+/* Check a zero-terminated ascii string.  Tricky -- don't want to
+   examine the actual bytes, to find the end, until we're sure it is
+   safe to do so. */
+
+Bool SK_(check_readable_asciiz) ( Addr a, Addr* bad_addr )
+{
+   UChar abit;
+   UChar vbyte;
+   PROF_EVENT(46);
+   DEBUG("SK_(check_readable_asciiz)\n");
+   while (True) {
+      PROF_EVENT(47);
+      abit  = get_abit(a);
+      vbyte = get_vbyte(a);
+      if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      /* Ok, a is safe to read. */
+      if (* ((UChar*)a) == 0) return True;
+      a++;
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Memory event handlers                                ---*/
+/*------------------------------------------------------------*/
+
+/* Setting permissions for aligned words.  This supports fast stack
+   operations. */
+
+static void make_noaccess_aligned ( Addr a, UInt len )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   UChar   mask;
+   Addr    a_past_end = a + len;
+
+   VGP_PUSHCC(VgpSetMem);
+
+   PROF_EVENT(50);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+   vg_assert(IS_ALIGNED4_ADDR(len));
+#  endif
+
+   for ( ; a < a_past_end; a += 4) {
+      ENSURE_MAPPABLE(a, "make_noaccess_aligned");
+      sm     = primary_map[a >> 16];
+      sm_off = a & 0xFFFF;
+      ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID;
+      mask = 0x0F;
+      mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+      /* mask now contains 1s where we wish to make address bits
+         invalid (1s). */
+      sm->abits[sm_off >> 3] |= mask;
+   }
+   VGP_POPCC(VgpSetMem);
+}
+
+static void make_writable_aligned ( Addr a, UInt len )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   UChar   mask;
+   Addr    a_past_end = a + len;
+
+   VGP_PUSHCC(VgpSetMem);
+
+   PROF_EVENT(51);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+   vg_assert(IS_ALIGNED4_ADDR(len));
+#  endif
+
+   for ( ; a < a_past_end; a += 4) {
+      ENSURE_MAPPABLE(a, "make_writable_aligned");
+      sm     = primary_map[a >> 16];
+      sm_off = a & 0xFFFF;
+      ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID;
+      mask = 0x0F;
+      mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+      /* mask now contains 1s where we wish to make address bits
+         invalid (0s). */
+      sm->abits[sm_off >> 3] &= ~mask;
+   }
+   VGP_POPCC(VgpSetMem);
+}
+
+
+static
+void check_is_writable ( CorePart part, ThreadState* tst,
+                         Char* s, UInt base, UInt size )
+{
+   Bool ok;
+   Addr bad_addr;
+
+   VGP_PUSHCC(VgpCheckMem);
+
+   /* VG_(message)(Vg_DebugMsg,"check is writable: %x .. %x",
+                               base,base+size-1); */
+   ok = SK_(check_writable) ( base, size, &bad_addr );
+   if (!ok) {
+      switch (part) {
+      case Vg_CoreSysCall:
+         SK_(record_param_error) ( tst, bad_addr, /*isWrite =*/True, s );
+         break;
+
+      case Vg_CorePThread:
+      case Vg_CoreSignal:
+         SK_(record_core_mem_error)( tst, /*isWrite=*/True, s );
+         break;
+
+      default:
+         VG_(panic)("check_is_readable: Unknown or unexpected CorePart");
+      }
+   }
+
+   VGP_POPCC(VgpCheckMem);
+}
+
+static
+void check_is_readable ( CorePart part, ThreadState* tst,
+                         Char* s, UInt base, UInt size )
+{     
+   Bool ok;
+   Addr bad_addr;
+
+   VGP_PUSHCC(VgpCheckMem);
+   
+   /* VG_(message)(Vg_DebugMsg,"check is readable: %x .. %x",
+                               base,base+size-1); */
+   ok = SK_(check_readable) ( base, size, &bad_addr );
+   if (!ok) {
+      switch (part) {
+      case Vg_CoreSysCall:
+         SK_(record_param_error) ( tst, bad_addr, /*isWrite =*/False, s );
+         break;
+      
+      case Vg_CorePThread:
+         SK_(record_core_mem_error)( tst, /*isWrite=*/False, s );
+         break;
+
+      /* If we're being asked to jump to a silly address, record an error 
+         message before potentially crashing the entire system. */
+      case Vg_CoreTranslate:
+         SK_(record_jump_error)( tst, bad_addr );
+         break;
+
+      default:
+         VG_(panic)("check_is_readable: Unknown or unexpected CorePart");
+      }
+   }
+   VGP_POPCC(VgpCheckMem);
+}
+
+static
+void check_is_readable_asciiz ( CorePart part, ThreadState* tst,
+                                Char* s, UInt str )
+{
+   Bool ok = True;
+   Addr bad_addr;
+   /* VG_(message)(Vg_DebugMsg,"check is readable asciiz: 0x%x",str); */
+
+   VGP_PUSHCC(VgpCheckMem);
+
+   vg_assert(part == Vg_CoreSysCall);
+   ok = SK_(check_readable_asciiz) ( (Addr)str, &bad_addr );
+   if (!ok) {
+      SK_(record_param_error) ( tst, bad_addr, /*is_writable =*/False, s );
+   }
+
+   VGP_POPCC(VgpCheckMem);
+}
+
+
+static
+void memcheck_new_mem_startup( Addr a, UInt len, Bool rr, Bool ww, Bool xx )
+{
+   // JJJ: this ignores the permissions and just makes it readable, like the
+   // old code did, AFAICT
+   DEBUG("new_mem_startup(%p, %u, rr=%u, ww=%u, xx=%u)\n", a,len,rr,ww,xx);
+   SK_(make_readable)(a, len);
+}
+
+static
+void memcheck_new_mem_heap ( Addr a, UInt len, Bool is_inited )
+{
+   if (is_inited) {
+      SK_(make_readable)(a, len);
+   } else {
+      SK_(make_writable)(a, len);
+   }
+}
+
+static
+void memcheck_set_perms (Addr a, UInt len, 
+                         Bool nn, Bool rr, Bool ww, Bool xx)
+{
+   DEBUG("memcheck_set_perms(%p, %u, nn=%u, rr=%u ww=%u, xx=%u)\n",
+                             a, len, nn, rr, ww, xx);
+   if      (rr) SK_(make_readable)(a, len);
+   else if (ww) SK_(make_writable)(a, len);
+   else         SK_(make_noaccess)(a, len);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Functions called directly from generated code.       ---*/
+/*------------------------------------------------------------*/
+
+static __inline__ UInt rotateRight16 ( UInt x )
+{
+   /* Amazingly, gcc turns this into a single rotate insn. */
+   return (x >> 16) | (x << 16);
+}
+
+
+static __inline__ UInt shiftRight16 ( UInt x )
+{
+   return x >> 16;
+}
+
+
+/* Read/write 1/2/4 sized V bytes, and emit an address error if
+   needed. */
+
+/* VG_(helperc_{LD,ST}V{1,2,4}) handle the common case fast.
+   Under all other circumstances, it defers to the relevant _SLOWLY
+   function, which can handle all situations.
+*/
+__attribute__ ((regparm(1)))
+UInt SK_(helperc_LOADV4) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgmext_rd_V4_SLOWLY(a);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
+   SecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   UChar   abits  = sm->abits[a_off];
+   abits >>= (a & 4);
+   abits &= 15;
+   PROF_EVENT(60);
+   if (abits == VGM_NIBBLE_VALID) {
+      /* Handle common case quickly: a is suitably aligned, is mapped,
+         and is addressible. */
+      UInt v_off = a & 0xFFFF;
+      return ((UInt*)(sm->vbyte))[ v_off >> 2 ];
+   } else {
+      /* Slow but general case. */
+      return vgmext_rd_V4_SLOWLY(a);
+   }
+#  endif
+}
+
+__attribute__ ((regparm(2)))
+void SK_(helperc_STOREV4) ( Addr a, UInt vbytes )
+{
+#  ifdef VG_DEBUG_MEMORY
+   vgmext_wr_V4_SLOWLY(a, vbytes);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
+   SecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   UChar   abits  = sm->abits[a_off];
+   abits >>= (a & 4);
+   abits &= 15;
+   PROF_EVENT(61);
+   if (abits == VGM_NIBBLE_VALID) {
+      /* Handle common case quickly: a is suitably aligned, is mapped,
+         and is addressible. */
+      UInt v_off = a & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = vbytes;
+   } else {
+      /* Slow but general case. */
+      vgmext_wr_V4_SLOWLY(a, vbytes);
+   }
+#  endif
+}
+
+__attribute__ ((regparm(1)))
+UInt SK_(helperc_LOADV2) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgmext_rd_V2_SLOWLY(a);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
+   SecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(62);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      return 0xFFFF0000 
+             |  
+             (UInt)( ((UShort*)(sm->vbyte))[ v_off >> 1 ] );
+   } else {
+      /* Slow but general case. */
+      return vgmext_rd_V2_SLOWLY(a);
+   }
+#  endif
+}
+
+__attribute__ ((regparm(2)))
+void SK_(helperc_STOREV2) ( Addr a, UInt vbytes )
+{
+#  ifdef VG_DEBUG_MEMORY
+   vgmext_wr_V2_SLOWLY(a, vbytes);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
+   SecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(63);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      ((UShort*)(sm->vbyte))[ v_off >> 1 ] = vbytes & 0x0000FFFF;
+   } else {
+      /* Slow but general case. */
+      vgmext_wr_V2_SLOWLY(a, vbytes);
+   }
+#  endif
+}
+
+__attribute__ ((regparm(1)))
+UInt SK_(helperc_LOADV1) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgmext_rd_V1_SLOWLY(a);
+#  else
+   UInt    sec_no = shiftRight16(a);
+   SecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(64);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      return 0xFFFFFF00
+             |
+             (UInt)( ((UChar*)(sm->vbyte))[ v_off ] );
+   } else {
+      /* Slow but general case. */
+      return vgmext_rd_V1_SLOWLY(a);
+   }
+#  endif
+}
+
+__attribute__ ((regparm(2)))
+void SK_(helperc_STOREV1) ( Addr a, UInt vbytes )
+{
+#  ifdef VG_DEBUG_MEMORY
+   vgmext_wr_V1_SLOWLY(a, vbytes);
+#  else
+   UInt    sec_no = shiftRight16(a);
+   SecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(65);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      ((UChar*)(sm->vbyte))[ v_off ] = vbytes & 0x000000FF;
+   } else {
+      /* Slow but general case. */
+      vgmext_wr_V1_SLOWLY(a, vbytes);
+   }
+#  endif
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Fallback functions to handle cases that the above    ---*/
+/*--- VG_(helperc_{LD,ST}V{1,2,4}) can't manage.           ---*/
+/*------------------------------------------------------------*/
+
+static UInt vgmext_rd_V4_SLOWLY ( Addr a )
+{
+   Bool a0ok, a1ok, a2ok, a3ok;
+   UInt vb0, vb1, vb2, vb3;
+
+   PROF_EVENT(70);
+
+   /* First establish independently the addressibility of the 4 bytes
+      involved. */
+   a0ok = get_abit(a+0) == VGM_BIT_VALID;
+   a1ok = get_abit(a+1) == VGM_BIT_VALID;
+   a2ok = get_abit(a+2) == VGM_BIT_VALID;
+   a3ok = get_abit(a+3) == VGM_BIT_VALID;
+
+   /* Also get the validity bytes for the address. */
+   vb0 = (UInt)get_vbyte(a+0);
+   vb1 = (UInt)get_vbyte(a+1);
+   vb2 = (UInt)get_vbyte(a+2);
+   vb3 = (UInt)get_vbyte(a+3);
+
+   /* Now distinguish 3 cases */
+
+   /* Case 1: the address is completely valid, so:
+      - no addressing error
+      - return V bytes as read from memory
+   */
+   if (a0ok && a1ok && a2ok && a3ok) {
+      UInt vw = VGM_WORD_INVALID;
+      vw <<= 8; vw |= vb3;
+      vw <<= 8; vw |= vb2;
+      vw <<= 8; vw |= vb1;
+      vw <<= 8; vw |= vb0;
+      return vw;
+   }
+
+   /* Case 2: the address is completely invalid.  
+      - emit addressing error
+      - return V word indicating validity.  
+      This sounds strange, but if we make loads from invalid addresses 
+      give invalid data, we also risk producing a number of confusing
+      undefined-value errors later, which confuses the fact that the
+      error arose in the first place from an invalid address. 
+   */
+   /* VG_(printf)("%p (%d %d %d %d)\n", a, a0ok, a1ok, a2ok, a3ok); */
+   if (!SK_(clo_partial_loads_ok) 
+       || ((a & 3) != 0)
+       || (!a0ok && !a1ok && !a2ok && !a3ok)) {
+      SK_(record_address_error)( a, 4, False );
+      return (VGM_BYTE_VALID << 24) | (VGM_BYTE_VALID << 16) 
+             | (VGM_BYTE_VALID << 8) | VGM_BYTE_VALID;
+   }
+
+   /* Case 3: the address is partially valid.  
+      - no addressing error
+      - returned V word is invalid where the address is invalid, 
+        and contains V bytes from memory otherwise. 
+      Case 3 is only allowed if SK_(clo_partial_loads_ok) is True
+      (which is the default), and the address is 4-aligned.  
+      If not, Case 2 will have applied.
+   */
+   vg_assert(SK_(clo_partial_loads_ok));
+   {
+      UInt vw = VGM_WORD_INVALID;
+      vw <<= 8; vw |= (a3ok ? vb3 : VGM_BYTE_INVALID);
+      vw <<= 8; vw |= (a2ok ? vb2 : VGM_BYTE_INVALID);
+      vw <<= 8; vw |= (a1ok ? vb1 : VGM_BYTE_INVALID);
+      vw <<= 8; vw |= (a0ok ? vb0 : VGM_BYTE_INVALID);
+      return vw;
+   }
+}
+
+static void vgmext_wr_V4_SLOWLY ( Addr a, UInt vbytes )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(71);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+2) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+3) != VGM_BIT_VALID) aerr = True;
+
+   /* Store the V bytes, remembering to do it little-endian-ly. */
+   set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+1, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+2, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+3, vbytes & 0x000000FF );
+
+   /* If an address error has happened, report it. */
+   if (aerr)
+      SK_(record_address_error)( a, 4, True );
+}
+
+static UInt vgmext_rd_V2_SLOWLY ( Addr a )
+{
+   /* Check the address for validity. */
+   UInt vw   = VGM_WORD_INVALID;
+   Bool aerr = False;
+   PROF_EVENT(72);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
+
+   /* Fetch the V bytes, remembering to do it little-endian-ly. */
+   vw <<= 8; vw |= (UInt)get_vbyte(a+1);
+   vw <<= 8; vw |= (UInt)get_vbyte(a+0);
+
+   /* If an address error has happened, report it. */
+   if (aerr) {
+      SK_(record_address_error)( a, 2, False );
+      vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) 
+           | (VGM_BYTE_VALID << 8) | (VGM_BYTE_VALID);
+   }
+   return vw;   
+}
+
+static void vgmext_wr_V2_SLOWLY ( Addr a, UInt vbytes )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(73);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
+
+   /* Store the V bytes, remembering to do it little-endian-ly. */
+   set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+1, vbytes & 0x000000FF );
+
+   /* If an address error has happened, report it. */
+   if (aerr)
+      SK_(record_address_error)( a, 2, True );
+}
+
+static UInt vgmext_rd_V1_SLOWLY ( Addr a )
+{
+   /* Check the address for validity. */
+   UInt vw   = VGM_WORD_INVALID;
+   Bool aerr = False;
+   PROF_EVENT(74);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+
+   /* Fetch the V byte. */
+   vw <<= 8; vw |= (UInt)get_vbyte(a+0);
+
+   /* If an address error has happened, report it. */
+   if (aerr) {
+      SK_(record_address_error)( a, 1, False );
+      vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) 
+           | (VGM_BYTE_INVALID << 8) | (VGM_BYTE_VALID);
+   }
+   return vw;   
+}
+
+static void vgmext_wr_V1_SLOWLY ( Addr a, UInt vbytes )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(75);
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+
+   /* Store the V bytes, remembering to do it little-endian-ly. */
+   set_vbyte( a+0, vbytes & 0x000000FF );
+
+   /* If an address error has happened, report it. */
+   if (aerr)
+      SK_(record_address_error)( a, 1, True );
+}
+
+
+/* ---------------------------------------------------------------------
+   Called from generated code, or from the assembly helpers.
+   Handlers for value check failures.
+   ------------------------------------------------------------------ */
+
+void SK_(helperc_value_check0_fail) ( void )
+{
+   SK_(record_value_error) ( 0 );
+}
+
+void SK_(helperc_value_check1_fail) ( void )
+{
+   SK_(record_value_error) ( 1 );
+}
+
+void SK_(helperc_value_check2_fail) ( void )
+{
+   SK_(record_value_error) ( 2 );
+}
+
+void SK_(helperc_value_check4_fail) ( void )
+{
+   SK_(record_value_error) ( 4 );
+}
+
+
+/* ---------------------------------------------------------------------
+   FPU load and store checks, called from generated code.
+   ------------------------------------------------------------------ */
+
+__attribute__ ((regparm(2)))
+void SK_(fpu_read_check) ( Addr addr, Int size )
+{
+   /* Ensure the read area is both addressible and valid (ie,
+      readable).  If there's an address error, don't report a value
+      error too; but if there isn't an address error, check for a
+      value error. 
+
+      Try to be reasonably fast on the common case; wimp out and defer
+      to fpu_read_check_SLOWLY for everything else.  */
+
+   SecMap* sm;
+   UInt    sm_off, v_off, a_off;
+   Addr    addr4;
+
+   PROF_EVENT(80);
+
+#  ifdef VG_DEBUG_MEMORY
+   fpu_read_check_SLOWLY ( addr, size );
+#  else
+
+   if (size == 4) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
+      PROF_EVENT(81);
+      /* Properly aligned. */
+      sm     = primary_map[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
+      /* Properly aligned and addressible. */
+      v_off = addr & 0xFFFF;
+      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
+         goto slow4;
+      /* Properly aligned, addressible and with valid data. */
+      return;
+     slow4:
+      fpu_read_check_SLOWLY ( addr, 4 );
+      return;
+   }
+
+   if (size == 8) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
+      PROF_EVENT(82);
+      /* Properly aligned.  Do it in two halves. */
+      addr4 = addr + 4;
+      /* First half. */
+      sm     = primary_map[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* First half properly aligned and addressible. */
+      v_off = addr & 0xFFFF;
+      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
+         goto slow8;
+      /* Second half. */
+      sm     = primary_map[addr4 >> 16];
+      sm_off = addr4 & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* Second half properly aligned and addressible. */
+      v_off = addr4 & 0xFFFF;
+      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
+         goto slow8;
+      /* Both halves properly aligned, addressible and with valid
+         data. */
+      return;
+     slow8:
+      fpu_read_check_SLOWLY ( addr, 8 );
+      return;
+   }
+
+   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
+      cases go quickly.  */
+   if (size == 2) {
+      PROF_EVENT(83);
+      fpu_read_check_SLOWLY ( addr, 2 );
+      return;
+   }
+
+   if (size == 10) {
+      PROF_EVENT(84);
+      fpu_read_check_SLOWLY ( addr, 10 );
+      return;
+   }
+
+   if (size == 28 || size == 108) {
+      PROF_EVENT(84); /* XXX assign correct event number */
+      fpu_read_check_SLOWLY ( addr, 28 );
+      return;
+   }
+
+   VG_(printf)("size is %d\n", size);
+   VG_(panic)("vgmext_fpu_read_check: unhandled size");
+#  endif
+}
+
+
+__attribute__ ((regparm(2)))
+void SK_(fpu_write_check) ( Addr addr, Int size )
+{
+   /* Ensure the written area is addressible, and moan if otherwise.
+      If it is addressible, make it valid, otherwise invalid. 
+   */
+
+   SecMap* sm;
+   UInt    sm_off, v_off, a_off;
+   Addr    addr4;
+
+   PROF_EVENT(85);
+
+#  ifdef VG_DEBUG_MEMORY
+   fpu_write_check_SLOWLY ( addr, size );
+#  else
+
+   if (size == 4) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
+      PROF_EVENT(86);
+      /* Properly aligned. */
+      sm     = primary_map[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
+      /* Properly aligned and addressible.  Make valid. */
+      v_off = addr & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
+      return;
+     slow4:
+      fpu_write_check_SLOWLY ( addr, 4 );
+      return;
+   }
+
+   if (size == 8) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
+      PROF_EVENT(87);
+      /* Properly aligned.  Do it in two halves. */
+      addr4 = addr + 4;
+      /* First half. */
+      sm     = primary_map[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* First half properly aligned and addressible.  Make valid. */
+      v_off = addr & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
+      /* Second half. */
+      sm     = primary_map[addr4 >> 16];
+      sm_off = addr4 & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* Second half properly aligned and addressible. */
+      v_off = addr4 & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
+      /* Properly aligned, addressible and with valid data. */
+      return;
+     slow8:
+      fpu_write_check_SLOWLY ( addr, 8 );
+      return;
+   }
+
+   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
+      cases go quickly.  */
+   if (size == 2) {
+      PROF_EVENT(88);
+      fpu_write_check_SLOWLY ( addr, 2 );
+      return;
+   }
+
+   if (size == 10) {
+      PROF_EVENT(89);
+      fpu_write_check_SLOWLY ( addr, 10 );
+      return;
+   }
+
+   if (size == 28 || size == 108) {
+      PROF_EVENT(89); /* XXX assign correct event number */
+      fpu_write_check_SLOWLY ( addr, 28 );
+      return;
+   }
+
+   VG_(printf)("size is %d\n", size);
+   VG_(panic)("vgmext_fpu_write_check: unhandled size");
+#  endif
+}
+
+
+/* ---------------------------------------------------------------------
+   Slow, general cases for FPU load and store checks.
+   ------------------------------------------------------------------ */
+
+/* Generic version.  Test for both addr and value errors, but if
+   there's an addr error, don't report a value error even if it
+   exists. */
+
+void fpu_read_check_SLOWLY ( Addr addr, Int size )
+{
+   Int  i;
+   Bool aerr = False;
+   Bool verr = False;
+   PROF_EVENT(90);
+   for (i = 0; i < size; i++) {
+      PROF_EVENT(91);
+      if (get_abit(addr+i) != VGM_BIT_VALID)
+         aerr = True;
+      if (get_vbyte(addr+i) != VGM_BYTE_VALID)
+         verr = True;
+   }
+
+   if (aerr) {
+      SK_(record_address_error)( addr, size, False );
+   } else {
+     if (verr)
+        SK_(record_value_error)( size );
+   }
+}
+
+
+/* Generic version.  Test for addr errors.  Valid addresses are
+   given valid values, and invalid addresses invalid values. */
+
+void fpu_write_check_SLOWLY ( Addr addr, Int size )
+{
+   Int  i;
+   Addr a_here;
+   Bool a_ok;
+   Bool aerr = False;
+   PROF_EVENT(92);
+   for (i = 0; i < size; i++) {
+      PROF_EVENT(93);
+      a_here = addr+i;
+      a_ok = get_abit(a_here) == VGM_BIT_VALID;
+      if (a_ok) {
+	set_vbyte(a_here, VGM_BYTE_VALID);
+      } else {
+	set_vbyte(a_here, VGM_BYTE_INVALID);
+        aerr = True;
+      }
+   }
+   if (aerr) {
+      SK_(record_address_error)( addr, size, True );
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Shadow chunks info                                   ---*/
+/*------------------------------------------------------------*/
+
+static __inline__
+void set_where( ShadowChunk* sc, ExeContext* ec )
+{
+   sc->skin_extra[0] = (UInt)ec;
+}
+
+static __inline__
+ExeContext *get_where( ShadowChunk* sc )
+{
+   return (ExeContext*)sc->skin_extra[0];
+}
+
+void SK_(complete_shadow_chunk) ( ShadowChunk* sc, ThreadState* tst )
+{
+   set_where( sc, VG_(get_ExeContext) ( tst ) );
+}
+
+/*------------------------------------------------------------*/
+/*--- Postponing free()ing                                 ---*/
+/*------------------------------------------------------------*/
+
+/* Holds blocks after freeing. */
+static ShadowChunk* vg_freed_list_start   = NULL;
+static ShadowChunk* vg_freed_list_end     = NULL;
+static Int          vg_freed_list_volume  = 0;
+
+static __attribute__ ((unused))
+       Int count_freelist ( void )
+{
+   ShadowChunk* sc;
+   Int n = 0;
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      n++;
+   return n;
+}
+
+static __attribute__ ((unused))
+       void freelist_sanity ( void )
+{
+   ShadowChunk* sc;
+   Int n = 0;
+   /* VG_(printf)("freelist sanity\n"); */
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      n += sc->size;
+   vg_assert(n == vg_freed_list_volume);
+}
+
+/* Put a shadow chunk on the freed blocks queue, possibly freeing up
+   some of the oldest blocks in the queue at the same time. */
+static void add_to_freed_queue ( ShadowChunk* sc )
+{
+   ShadowChunk* sc1;
+
+   /* Put it at the end of the freed list */
+   if (vg_freed_list_end == NULL) {
+      vg_assert(vg_freed_list_start == NULL);
+      vg_freed_list_end = vg_freed_list_start = sc;
+      vg_freed_list_volume = sc->size;
+   } else {    
+      vg_assert(vg_freed_list_end->next == NULL);
+      vg_freed_list_end->next = sc;
+      vg_freed_list_end = sc;
+      vg_freed_list_volume += sc->size;
+   }
+   sc->next = NULL;
+
+   /* Release enough of the oldest blocks to bring the free queue
+      volume below vg_clo_freelist_vol. */
+   
+   while (vg_freed_list_volume > SK_(clo_freelist_vol)) {
+      /* freelist_sanity(); */
+      vg_assert(vg_freed_list_start != NULL);
+      vg_assert(vg_freed_list_end != NULL);
+
+      sc1 = vg_freed_list_start;
+      vg_freed_list_volume -= sc1->size;
+      /* VG_(printf)("volume now %d\n", vg_freed_list_volume); */
+      vg_assert(vg_freed_list_volume >= 0);
+
+      if (vg_freed_list_start == vg_freed_list_end) {
+         vg_freed_list_start = vg_freed_list_end = NULL;
+      } else {
+         vg_freed_list_start = sc1->next;
+      }
+      sc1->next = NULL; /* just paranoia */
+      VG_(freeShadowChunk) ( sc1 );
+   }
+}
+
+/* Return the first shadow chunk satisfying the predicate p. */
+ShadowChunk* SK_(any_matching_freed_ShadowChunks)
+                        ( Bool (*p) ( ShadowChunk* ))
+{
+   ShadowChunk* sc;
+
+   /* No point looking through freed blocks if we're not keeping
+      them around for a while... */
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      if (p(sc))
+         return sc;
+
+   return NULL;
+}
+
+void SK_(alt_free) ( ShadowChunk* sc, ThreadState* tst )
+{
+   /* Record where freed */
+   set_where( sc, VG_(get_ExeContext) ( tst ) );
+
+   /* Put it out of harm's way for a while. */
+   add_to_freed_queue ( sc );
+}
+
+/*------------------------------------------------------------*/
+/*--- Low-level address-space scanning, for the leak       ---*/
+/*--- detector.                                            ---*/
+/*------------------------------------------------------------*/
+
+static 
+jmp_buf memscan_jmpbuf;
+
+static
+void vg_scan_all_valid_memory_sighandler ( Int sigNo )
+{
+   __builtin_longjmp(memscan_jmpbuf, 1);
+}
+
+/* Safely (avoiding SIGSEGV / SIGBUS) scan the entire valid address
+   space and pass the addresses and values of all addressible,
+   defined, aligned words to notify_word.  This is the basis for the
+   leak detector.  Returns the number of calls made to notify_word.  */
+UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) )
+{
+   /* All volatile, because some gccs seem paranoid about longjmp(). */
+   volatile UInt res, numPages, page, vbytes, primaryMapNo, nWordsNotified;
+   volatile Addr pageBase, addr;
+   volatile SecMap* sm;
+   volatile UChar abits;
+   volatile UInt page_first_word;
+
+   vki_ksigaction sigbus_saved;
+   vki_ksigaction sigbus_new;
+   vki_ksigaction sigsegv_saved;
+   vki_ksigaction sigsegv_new;
+   vki_ksigset_t  blockmask_saved;
+   vki_ksigset_t  unblockmask_new;
+
+   /* Temporarily install a new sigsegv and sigbus handler, and make
+      sure SIGBUS, SIGSEGV and SIGTERM are unblocked.  (Perhaps the
+      first two can never be blocked anyway?)  */
+
+   sigbus_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
+   sigbus_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
+   sigbus_new.ksa_restorer = NULL;
+   res = VG_(ksigemptyset)( &sigbus_new.ksa_mask );
+   vg_assert(res == 0);
+
+   sigsegv_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
+   sigsegv_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
+   sigsegv_new.ksa_restorer = NULL;
+   res = VG_(ksigemptyset)( &sigsegv_new.ksa_mask );
+   vg_assert(res == 0+0);
+
+   res =  VG_(ksigemptyset)( &unblockmask_new );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGBUS );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGSEGV );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGTERM );
+   vg_assert(res == 0+0+0);
+
+   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_new, &sigbus_saved );
+   vg_assert(res == 0+0+0+0);
+
+   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_new, &sigsegv_saved );
+   vg_assert(res == 0+0+0+0+0);
+
+   res = VG_(ksigprocmask)( VKI_SIG_UNBLOCK, &unblockmask_new, &blockmask_saved );
+   vg_assert(res == 0+0+0+0+0+0);
+
+   /* The signal handlers are installed.  Actually do the memory scan. */
+   numPages = 1 << (32-VKI_BYTES_PER_PAGE_BITS);
+   vg_assert(numPages == 1048576);
+   vg_assert(4096 == (1 << VKI_BYTES_PER_PAGE_BITS));
+
+   nWordsNotified = 0;
+
+   for (page = 0; page < numPages; page++) {
+      pageBase = page << VKI_BYTES_PER_PAGE_BITS;
+      primaryMapNo = pageBase >> 16;
+      sm = primary_map[primaryMapNo];
+      if (IS_DISTINGUISHED_SM(sm)) continue;
+      if (__builtin_setjmp(memscan_jmpbuf) == 0) {
+         /* try this ... */
+         page_first_word = * (volatile UInt*)pageBase;
+         /* we get here if we didn't get a fault */
+         /* Scan the page */
+         for (addr = pageBase; addr < pageBase+VKI_BYTES_PER_PAGE; addr += 4) {
+            abits  = get_abits4_ALIGNED(addr);
+            vbytes = get_vbytes4_ALIGNED(addr);
+            if (abits == VGM_NIBBLE_VALID 
+                && vbytes == VGM_WORD_VALID) {
+               nWordsNotified++;
+               notify_word ( addr, *(UInt*)addr );
+	    }
+         }
+      } else {
+         /* We get here if reading the first word of the page caused a
+            fault, which in turn caused the signal handler to longjmp.
+            Ignore this page. */
+         if (0)
+         VG_(printf)(
+            "vg_scan_all_valid_memory_sighandler: ignoring page at %p\n",
+            (void*)pageBase 
+         );
+      }
+   }
+
+   /* Restore signal state to whatever it was before. */
+   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_saved, NULL );
+   vg_assert(res == 0 +0);
+
+   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_saved, NULL );
+   vg_assert(res == 0 +0 +0);
+
+   res = VG_(ksigprocmask)( VKI_SIG_SETMASK, &blockmask_saved, NULL );
+   vg_assert(res == 0 +0 +0 +0);
+
+   return nWordsNotified;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
+/*------------------------------------------------------------*/
+
+/* A block is either 
+   -- Proper-ly reached; a pointer to its start has been found
+   -- Interior-ly reached; only an interior pointer to it has been found
+   -- Unreached; so far, no pointers to any part of it have been found. 
+*/
+typedef 
+   enum { Unreached, Interior, Proper } 
+   Reachedness;
+
+/* A block record, used for generating err msgs. */
+typedef
+   struct _LossRecord {
+      struct _LossRecord* next;
+      /* Where these lost blocks were allocated. */
+      ExeContext*  allocated_at;
+      /* Their reachability. */
+      Reachedness  loss_mode;
+      /* Number of blocks and total # bytes involved. */
+      UInt         total_bytes;
+      UInt         num_blocks;
+   }
+   LossRecord;
+
+
+/* Find the i such that ptr points at or inside the block described by
+   shadows[i].  Return -1 if none found.  This assumes that shadows[]
+   has been sorted on the ->data field. */
+
+#ifdef VG_DEBUG_LEAKCHECK
+/* Used to sanity-check the fast binary-search mechanism. */
+static Int find_shadow_for_OLD ( Addr          ptr, 
+                                 ShadowChunk** shadows,
+                                 Int           n_shadows )
+
+{
+   Int  i;
+   Addr a_lo, a_hi;
+   PROF_EVENT(70);
+   for (i = 0; i < n_shadows; i++) {
+      PROF_EVENT(71);
+      a_lo = shadows[i]->data;
+      a_hi = ((Addr)shadows[i]->data) + shadows[i]->size - 1;
+      if (a_lo <= ptr && ptr <= a_hi)
+         return i;
+   }
+   return -1;
+}
+#endif
+
+
+static Int find_shadow_for ( Addr          ptr, 
+                             ShadowChunk** shadows,
+                             Int           n_shadows )
+{
+   Addr a_mid_lo, a_mid_hi;
+   Int lo, mid, hi, retVal;
+   PROF_EVENT(70);
+   /* VG_(printf)("find shadow for %p = ", ptr); */
+   retVal = -1;
+   lo = 0;
+   hi = n_shadows-1;
+   while (True) {
+      PROF_EVENT(71);
+
+      /* invariant: current unsearched space is from lo to hi,
+         inclusive. */
+      if (lo > hi) break; /* not found */
+
+      mid      = (lo + hi) / 2;
+      a_mid_lo = shadows[mid]->data;
+      a_mid_hi = ((Addr)shadows[mid]->data) + shadows[mid]->size - 1;
+
+      if (ptr < a_mid_lo) {
+         hi = mid-1;
+         continue;
+      } 
+      if (ptr > a_mid_hi) {
+         lo = mid+1;
+         continue;
+      }
+      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
+      retVal = mid;
+      break;
+   }
+
+#  ifdef VG_DEBUG_LEAKCHECK
+   vg_assert(retVal == find_shadow_for_OLD ( ptr, shadows, n_shadows ));
+#  endif
+   /* VG_(printf)("%d\n", retVal); */
+   return retVal;
+}
+
+
+
+static void sort_malloc_shadows ( ShadowChunk** shadows, UInt n_shadows )
+{
+   Int   incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+                      9841, 29524, 88573, 265720,
+                      797161, 2391484 };
+   Int          lo = 0;
+   Int          hi = n_shadows-1;
+   Int          i, j, h, bigN, hp;
+   ShadowChunk* v;
+
+   PROF_EVENT(72);
+   bigN = hi - lo + 1; if (bigN < 2) return;
+   hp = 0; while (incs[hp] < bigN) hp++; hp--;
+
+   for (; hp >= 0; hp--) {
+      PROF_EVENT(73);
+      h = incs[hp];
+      i = lo + h;
+      while (1) {
+         PROF_EVENT(74);
+         if (i > hi) break;
+         v = shadows[i];
+         j = i;
+         while (shadows[j-h]->data > v->data) {
+            PROF_EVENT(75);
+            shadows[j] = shadows[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         shadows[j] = v;
+         i++;
+      }
+   }
+}
+
+/* Globals, for the callback used by SK_(detect_memory_leaks). */
+
+static ShadowChunk** vglc_shadows;
+static Int           vglc_n_shadows;
+static Reachedness*  vglc_reachedness;
+static Addr          vglc_min_mallocd_addr;
+static Addr          vglc_max_mallocd_addr;
+
+static 
+void vg_detect_memory_leaks_notify_addr ( Addr a, UInt word_at_a )
+{
+   Int  sh_no;
+   Addr ptr;
+
+   /* Rule out some known causes of bogus pointers.  Mostly these do
+      not cause much trouble because only a few false pointers can
+      ever lurk in these places.  This mainly stops it reporting that
+      blocks are still reachable in stupid test programs like this
+
+         int main (void) { char* a = malloc(100); return 0; }
+
+      which people seem inordinately fond of writing, for some reason.  
+
+      Note that this is a complete kludge.  It would be better to
+      ignore any addresses corresponding to valgrind.so's .bss and
+      .data segments, but I cannot think of a reliable way to identify
+      where the .bss segment has been put.  If you can, drop me a
+      line.  
+   */
+   if (VG_(within_stack)(a))                return;
+   if (VG_(within_m_state_static)(a))       return;
+   if (a == (Addr)(&vglc_min_mallocd_addr)) return;
+   if (a == (Addr)(&vglc_max_mallocd_addr)) return;
+
+   /* OK, let's get on and do something Useful for a change. */
+
+   ptr = (Addr)word_at_a;
+   if (ptr >= vglc_min_mallocd_addr && ptr <= vglc_max_mallocd_addr) {
+      /* Might be legitimate; we'll have to investigate further. */
+      sh_no = find_shadow_for ( ptr, vglc_shadows, vglc_n_shadows );
+      if (sh_no != -1) {
+         /* Found a block at/into which ptr points. */
+         vg_assert(sh_no >= 0 && sh_no < vglc_n_shadows);
+         vg_assert(ptr < vglc_shadows[sh_no]->data 
+                         + vglc_shadows[sh_no]->size);
+         /* Decide whether Proper-ly or Interior-ly reached. */
+         if (ptr == vglc_shadows[sh_no]->data) {
+            if (0) VG_(printf)("pointer at %p to %p\n", a, word_at_a );
+            vglc_reachedness[sh_no] = Proper;
+         } else {
+            if (vglc_reachedness[sh_no] == Unreached)
+               vglc_reachedness[sh_no] = Interior;
+         }
+      }
+   }
+}
+
+
+void SK_(detect_memory_leaks) ( void )
+{
+   Int    i;
+   Int    blocks_leaked, bytes_leaked;
+   Int    blocks_dubious, bytes_dubious;
+   Int    blocks_reachable, bytes_reachable;
+   Int    n_lossrecords;
+   UInt   bytes_notified;
+   
+   LossRecord*  errlist;
+   LossRecord*  p;
+
+   PROF_EVENT(76);
+
+   /* VG_(get_malloc_shadows) allocates storage for shadows */
+   vglc_shadows = VG_(get_malloc_shadows)( &vglc_n_shadows );
+   if (vglc_n_shadows == 0) {
+      vg_assert(vglc_shadows == NULL);
+      VG_(message)(Vg_UserMsg, 
+                   "No malloc'd blocks -- no leaks are possible.\n");
+      return;
+   }
+
+   VG_(message)(Vg_UserMsg, 
+                "searching for pointers to %d not-freed blocks.", 
+                vglc_n_shadows );
+   sort_malloc_shadows ( vglc_shadows, vglc_n_shadows );
+
+   /* Sanity check; assert that the blocks are now in order and that
+      they don't overlap. */
+   for (i = 0; i < vglc_n_shadows-1; i++) {
+      vg_assert( ((Addr)vglc_shadows[i]->data)
+                 < ((Addr)vglc_shadows[i+1]->data) );
+      vg_assert( ((Addr)vglc_shadows[i]->data) + vglc_shadows[i]->size
+                 < ((Addr)vglc_shadows[i+1]->data) );
+   }
+
+   vglc_min_mallocd_addr = ((Addr)vglc_shadows[0]->data);
+   vglc_max_mallocd_addr = ((Addr)vglc_shadows[vglc_n_shadows-1]->data)
+                         + vglc_shadows[vglc_n_shadows-1]->size - 1;
+
+   vglc_reachedness 
+      = VG_(malloc)( vglc_n_shadows * sizeof(Reachedness) );
+   for (i = 0; i < vglc_n_shadows; i++)
+      vglc_reachedness[i] = Unreached;
+
+   /* Do the scan of memory. */
+   bytes_notified
+       = VG_(scan_all_valid_memory)( &vg_detect_memory_leaks_notify_addr )
+         * VKI_BYTES_PER_WORD;
+
+   VG_(message)(Vg_UserMsg, "checked %d bytes.", bytes_notified);
+
+   blocks_leaked    = bytes_leaked    = 0;
+   blocks_dubious   = bytes_dubious   = 0;
+   blocks_reachable = bytes_reachable = 0;
+
+   for (i = 0; i < vglc_n_shadows; i++) {
+      if (vglc_reachedness[i] == Unreached) {
+         blocks_leaked++;
+         bytes_leaked += vglc_shadows[i]->size;
+      }
+      else if (vglc_reachedness[i] == Interior) {
+         blocks_dubious++;
+         bytes_dubious += vglc_shadows[i]->size;
+      }
+      else if (vglc_reachedness[i] == Proper) {
+         blocks_reachable++;
+         bytes_reachable += vglc_shadows[i]->size;
+      }
+   }
+
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, "definitely lost: %d bytes in %d blocks.", 
+                            bytes_leaked, blocks_leaked );
+   VG_(message)(Vg_UserMsg, "possibly lost:   %d bytes in %d blocks.", 
+                            bytes_dubious, blocks_dubious );
+   VG_(message)(Vg_UserMsg, "still reachable: %d bytes in %d blocks.", 
+                            bytes_reachable, blocks_reachable );
+
+
+   /* Common up the lost blocks so we can print sensible error
+      messages. */
+
+   n_lossrecords = 0;
+   errlist       = NULL;
+   for (i = 0; i < vglc_n_shadows; i++) {
+     
+      /* 'where' stored in 'skin_extra' field */
+      ExeContext* where = get_where ( vglc_shadows[i] );
+
+      for (p = errlist; p != NULL; p = p->next) {
+         if (p->loss_mode == vglc_reachedness[i]
+             && VG_(eq_ExeContext) ( SK_(clo_leak_resolution),
+                                     p->allocated_at, 
+                                     where) ) {
+            break;
+	 }
+      }
+      if (p != NULL) {
+         p->num_blocks  ++;
+         p->total_bytes += vglc_shadows[i]->size;
+      } else {
+         n_lossrecords ++;
+         p = VG_(malloc)(sizeof(LossRecord));
+         p->loss_mode    = vglc_reachedness[i];
+         p->allocated_at = where;
+         p->total_bytes  = vglc_shadows[i]->size;
+         p->num_blocks   = 1;
+         p->next         = errlist;
+         errlist         = p;
+      }
+   }
+   
+   for (i = 0; i < n_lossrecords; i++) {
+      LossRecord* p_min = NULL;
+      UInt        n_min = 0xFFFFFFFF;
+      for (p = errlist; p != NULL; p = p->next) {
+         if (p->num_blocks > 0 && p->total_bytes < n_min) {
+            n_min = p->total_bytes;
+            p_min = p;
+         }
+      }
+      vg_assert(p_min != NULL);
+
+      if ( (!SK_(clo_show_reachable)) && p_min->loss_mode == Proper) {
+         p_min->num_blocks = 0;
+         continue;
+      }
+
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(
+         Vg_UserMsg,
+         "%d bytes in %d blocks are %s in loss record %d of %d",
+         p_min->total_bytes, p_min->num_blocks,
+         p_min->loss_mode==Unreached ? "definitely lost" :
+            (p_min->loss_mode==Interior ? "possibly lost"
+                                        : "still reachable"),
+         i+1, n_lossrecords
+      );
+      VG_(pp_ExeContext)(p_min->allocated_at);
+      p_min->num_blocks = 0;
+   }
+
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, "LEAK SUMMARY:");
+   VG_(message)(Vg_UserMsg, "   definitely lost: %d bytes in %d blocks.", 
+                            bytes_leaked, blocks_leaked );
+   VG_(message)(Vg_UserMsg, "   possibly lost:   %d bytes in %d blocks.", 
+                            bytes_dubious, blocks_dubious );
+   VG_(message)(Vg_UserMsg, "   still reachable: %d bytes in %d blocks.", 
+                            bytes_reachable, blocks_reachable );
+   if (!SK_(clo_show_reachable)) {
+      VG_(message)(Vg_UserMsg, 
+         "Reachable blocks (those to which a pointer was found) are not shown.");
+      VG_(message)(Vg_UserMsg, 
+         "To see them, rerun with: --show-reachable=yes");
+   }
+   VG_(message)(Vg_UserMsg, "");
+
+   VG_(free) ( vglc_shadows );
+   VG_(free) ( vglc_reachedness );
+}
+
+
+/* ---------------------------------------------------------------------
+   Sanity check machinery (permanently engaged).
+   ------------------------------------------------------------------ */
+
+/* Check that nobody has spuriously claimed that the first or last 16
+   pages (64 KB) of address space have become accessible.  Failure of
+   the following do not per se indicate an internal consistency
+   problem, but they are so likely to that we really want to know
+   about it if so. */
+
+Bool SK_(cheap_sanity_check) ( void )
+{
+   if (IS_DISTINGUISHED_SM(primary_map[0]) && 
+       IS_DISTINGUISHED_SM(primary_map[65535]))
+      return True;
+   else
+      return False;
+}
+
+Bool SK_(expensive_sanity_check) ( void )
+{
+   Int i;
+
+   /* Make sure nobody changed the distinguished secondary. */
+   for (i = 0; i < 8192; i++)
+      if (distinguished_secondary_map.abits[i] != VGM_BYTE_INVALID)
+         return False;
+
+   for (i = 0; i < 65536; i++)
+      if (distinguished_secondary_map.vbyte[i] != VGM_BYTE_INVALID)
+         return False;
+
+   /* Make sure that the upper 3/4 of the primary map hasn't
+      been messed with. */
+   for (i = 65536; i < 262144; i++)
+      if (primary_map[i] != & distinguished_secondary_map)
+         return False;
+
+   return True;
+}
+      
+/* ---------------------------------------------------------------------
+   Debugging machinery (turn on to debug).  Something of a mess.
+   ------------------------------------------------------------------ */
+
+#if 0
+/* Print the value tags on the 8 integer registers & flag reg. */
+
+static void uint_to_bits ( UInt x, Char* str )
+{
+   Int i;
+   Int w = 0;
+   /* str must point to a space of at least 36 bytes. */
+   for (i = 31; i >= 0; i--) {
+      str[w++] = (x & ( ((UInt)1) << i)) ? '1' : '0';
+      if (i == 24 || i == 16 || i == 8)
+         str[w++] = ' ';
+   }
+   str[w++] = 0;
+   vg_assert(w == 36);
+}
+
+/* Caution!  Not vthread-safe; looks in VG_(baseBlock), not the thread
+   state table. */
+
+static void vg_show_reg_tags ( void )
+{
+   Char buf1[36];
+   Char buf2[36];
+   UInt z_eax, z_ebx, z_ecx, z_edx, 
+        z_esi, z_edi, z_ebp, z_esp, z_eflags;
+
+   z_eax    = VG_(baseBlock)[VGOFF_(sh_eax)];
+   z_ebx    = VG_(baseBlock)[VGOFF_(sh_ebx)];
+   z_ecx    = VG_(baseBlock)[VGOFF_(sh_ecx)];
+   z_edx    = VG_(baseBlock)[VGOFF_(sh_edx)];
+   z_esi    = VG_(baseBlock)[VGOFF_(sh_esi)];
+   z_edi    = VG_(baseBlock)[VGOFF_(sh_edi)];
+   z_ebp    = VG_(baseBlock)[VGOFF_(sh_ebp)];
+   z_esp    = VG_(baseBlock)[VGOFF_(sh_esp)];
+   z_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
+   
+   uint_to_bits(z_eflags, buf1);
+   VG_(message)(Vg_DebugMsg, "efl %\n", buf1);
+
+   uint_to_bits(z_eax, buf1);
+   uint_to_bits(z_ebx, buf2);
+   VG_(message)(Vg_DebugMsg, "eax %s   ebx %s\n", buf1, buf2);
+
+   uint_to_bits(z_ecx, buf1);
+   uint_to_bits(z_edx, buf2);
+   VG_(message)(Vg_DebugMsg, "ecx %s   edx %s\n", buf1, buf2);
+
+   uint_to_bits(z_esi, buf1);
+   uint_to_bits(z_edi, buf2);
+   VG_(message)(Vg_DebugMsg, "esi %s   edi %s\n", buf1, buf2);
+
+   uint_to_bits(z_ebp, buf1);
+   uint_to_bits(z_esp, buf2);
+   VG_(message)(Vg_DebugMsg, "ebp %s   esp %s\n", buf1, buf2);
+}
+
+
+/* For debugging only.  Scan the address space and touch all allegedly
+   addressible words.  Useful for establishing where Valgrind's idea of
+   addressibility has diverged from what the kernel believes. */
+
+static 
+void zzzmemscan_notify_word ( Addr a, UInt w )
+{
+}
+
+void zzzmemscan ( void )
+{
+   Int n_notifies
+      = VG_(scan_all_valid_memory)( zzzmemscan_notify_word );
+   VG_(printf)("zzzmemscan: n_bytes = %d\n", 4 * n_notifies );
+}
+#endif
+
+
+
+
+#if 0
+static Int zzz = 0;
+
+void show_bb ( Addr eip_next )
+{
+   VG_(printf)("[%4d] ", zzz);
+   vg_show_reg_tags( &VG_(m_shadow );
+   VG_(translate) ( eip_next, NULL, NULL, NULL );
+}
+#endif /* 0 */
+
+/*------------------------------------------------------------*/
+/*--- Syscall wrappers                                     ---*/
+/*------------------------------------------------------------*/
+
+void* SK_(pre_syscall)  ( ThreadId tid, UInt syscallno, Bool isBlocking )
+{
+   Int sane = SK_(cheap_sanity_check)();
+   return (void*)sane;
+}
+
+void  SK_(post_syscall) ( ThreadId tid, UInt syscallno,
+                           void* pre_result, Int res, Bool isBlocking )
+{
+   Int  sane_before_call = (Int)pre_result;
+   Bool sane_after_call  = SK_(cheap_sanity_check)();
+
+   if ((Int)sane_before_call && (!sane_after_call)) {
+      VG_(message)(Vg_DebugMsg, "post-syscall: ");
+      VG_(message)(Vg_DebugMsg,
+                   "probable sanity check failure for syscall number %d\n",
+                   syscallno );
+      VG_(panic)("aborting due to the above ... bye!");
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Setup                                                ---*/
+/*------------------------------------------------------------*/
+
+void SK_(written_shadow_regs_values)( UInt* gen_reg_value, UInt* eflags_value )
+{
+   *gen_reg_value = VGM_WORD_VALID;
+   *eflags_value  = VGM_EFLAGS_VALID;
+}
+
+Bool SK_(process_cmd_line_option)(Char* arg)
+{
+#  define STREQ(s1,s2)     (0==VG_(strcmp_ws)((s1),(s2)))
+#  define STREQN(nn,s1,s2) (0==VG_(strncmp_ws)((s1),(s2),(nn)))
+
+   if      (STREQ(arg, "--partial-loads-ok=yes"))
+      SK_(clo_partial_loads_ok) = True;
+   else if (STREQ(arg, "--partial-loads-ok=no"))
+      SK_(clo_partial_loads_ok) = False;
+
+   else if (STREQN(15, arg, "--freelist-vol=")) {
+      SK_(clo_freelist_vol) = (Int)VG_(atoll)(&arg[15]);
+      if (SK_(clo_freelist_vol) < 0) SK_(clo_freelist_vol) = 0;
+   }
+
+   else if (STREQ(arg, "--leak-check=yes"))
+      SK_(clo_leak_check) = True;
+   else if (STREQ(arg, "--leak-check=no"))
+      SK_(clo_leak_check) = False;
+
+   else if (STREQ(arg, "--leak-resolution=low"))
+      SK_(clo_leak_resolution) = Vg_LowRes;
+   else if (STREQ(arg, "--leak-resolution=med"))
+      SK_(clo_leak_resolution) = Vg_MedRes;
+   else if (STREQ(arg, "--leak-resolution=high"))
+      SK_(clo_leak_resolution) = Vg_HighRes;
+   
+   else if (STREQ(arg, "--show-reachable=yes"))
+      SK_(clo_show_reachable) = True;
+   else if (STREQ(arg, "--show-reachable=no"))
+      SK_(clo_show_reachable) = False;
+
+   else if (STREQ(arg, "--workaround-gcc296-bugs=yes"))
+      SK_(clo_workaround_gcc296_bugs) = True;
+   else if (STREQ(arg, "--workaround-gcc296-bugs=no"))
+      SK_(clo_workaround_gcc296_bugs) = False;
+
+   else if (STREQ(arg, "--check-addrVs=yes"))
+      SK_(clo_check_addrVs) = True;
+   else if (STREQ(arg, "--check-addrVs=no"))
+      SK_(clo_check_addrVs) = False;
+
+   else if (STREQ(arg, "--cleanup=yes"))
+      SK_(clo_cleanup) = True;
+   else if (STREQ(arg, "--cleanup=no"))
+      SK_(clo_cleanup) = False;
+
+   else
+      return False;
+
+   return True;
+
+#undef STREQ
+#undef STREQN
+}
+
+Char* SK_(usage)(void)
+{  
+   return  
+"    --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
+"    --freelist-vol=<number>   volume of freed blocks queue [1000000]\n"
+"    --leak-check=no|yes       search for memory leaks at exit? [no]\n"
+"    --leak-resolution=low|med|high\n"
+"                              amount of bt merging in leak check [low]\n"
+"    --show-reachable=no|yes   show reachable blocks in leak check? [no]\n"
+"    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
+"    --check-addrVs=no|yes     experimental lighterweight checking? [yes]\n"
+"                              yes == Valgrind's original behaviour\n"
+"\n"
+"    --cleanup=no|yes          improve after instrumentation? [yes]\n";
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Setup                                                ---*/
+/*------------------------------------------------------------*/
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* track)
+{
+   needs->name                    = "valgrind";
+   needs->description             = "a memory error detector";
+
+   needs->core_errors             = True;
+   needs->skin_errors             = True;
+   needs->run_libc_freeres        = True;
+
+   needs->sizeof_shadow_block     = 1;
+
+   needs->basic_block_discards    = False;
+   needs->shadow_regs             = True;
+   needs->command_line_options    = True;
+   needs->client_requests         = True;
+   needs->extended_UCode          = True;
+   needs->syscall_wrapper         = True;
+   needs->alternative_free        = True;
+   needs->sanity_checks           = True;
+
+   VG_(register_compact_helper)((Addr) & SK_(helper_value_check4_fail));
+   VG_(register_compact_helper)((Addr) & SK_(helper_value_check0_fail));
+   VG_(register_compact_helper)((Addr) & SK_(helperc_STOREV4));
+   VG_(register_compact_helper)((Addr) & SK_(helperc_STOREV1));
+   VG_(register_compact_helper)((Addr) & SK_(helperc_LOADV4));
+   VG_(register_compact_helper)((Addr) & SK_(helperc_LOADV1));
+
+   /* These two made non-compact because 2-byte transactions are rare. */
+   VG_(register_noncompact_helper)((Addr) & SK_(helperc_STOREV2));
+   VG_(register_noncompact_helper)((Addr) & SK_(helperc_LOADV2));
+   VG_(register_noncompact_helper)((Addr) & SK_(fpu_write_check));
+   VG_(register_noncompact_helper)((Addr) & SK_(fpu_read_check));
+   VG_(register_noncompact_helper)((Addr) & SK_(helper_value_check2_fail));
+   VG_(register_noncompact_helper)((Addr) & SK_(helper_value_check1_fail));
+
+   /* Events to track */
+   track->new_mem_startup       = & memcheck_new_mem_startup;
+   track->new_mem_heap          = & memcheck_new_mem_heap;
+   track->new_mem_stack         = & SK_(make_writable);
+   track->new_mem_stack_aligned = & make_writable_aligned;
+   track->new_mem_stack_signal  = & SK_(make_writable);
+   track->new_mem_brk           = & SK_(make_writable);
+   track->new_mem_mmap          = & memcheck_set_perms;
+   
+   track->copy_mem_heap         = & copy_address_range_state;
+   track->copy_mem_remap        = & copy_address_range_state;
+   track->change_mem_mprotect   = & memcheck_set_perms;
+      
+   track->ban_mem_heap          = & SK_(make_noaccess);
+   track->ban_mem_stack         = & SK_(make_noaccess);
+
+   track->die_mem_heap          = & SK_(make_noaccess);
+   track->die_mem_stack         = & SK_(make_noaccess);
+   track->die_mem_stack_aligned = & make_noaccess_aligned; 
+   track->die_mem_stack_signal  = & SK_(make_noaccess); 
+   track->die_mem_brk           = & SK_(make_noaccess);
+   track->die_mem_munmap        = & SK_(make_noaccess); 
+
+   track->bad_free              = & SK_(record_free_error);
+   track->mismatched_free       = & SK_(record_freemismatch_error);
+
+   track->pre_mem_read          = & check_is_readable;
+   track->pre_mem_read_asciiz   = & check_is_readable_asciiz;
+   track->pre_mem_write         = & check_is_writable;
+   track->post_mem_write        = & SK_(make_readable);
+
+   init_shadow_memory();
+
+   init_prof_mem();
+
+   VGP_(register_profile_event) ( VgpSetMem,   "set-mem-perms" );
+   VGP_(register_profile_event) ( VgpCheckMem, "check-mem-perms" );
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_memcheck.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
new file mode 100644
index 0000000..34f9643
--- /dev/null
+++ b/memcheck/mc_translate.c
@@ -0,0 +1,1470 @@
+/*--------------------------------------------------------------------*/
+/*--- Part of the MemCheck skin: instrument UCode to perform       ---*/
+/*--- memory checking operations.                                  ---*/
+/*---                                      vg_memcheck_translate.c ---*/
+/*--------------------------------------------------------------------*/
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_memcheck_include.h"
+
+/* ---------------------------------------------------------------------
+   Template functions for extending UCode
+   ------------------------------------------------------------------ */
+
+/* Compare this with the restrictions on core instructions in
+   vg_translate.c:VG_(saneUInstr)().  Everything general said there applies
+   here too.
+*/
+Bool SK_(saneExtUInstr)(Bool beforeRA, Bool beforeLiveness, UInstr* u)
+{
+// SSS: duplicating these macros really sucks
+#  define LIT0 (u->lit32 == 0)
+#  define LIT1 (!(LIT0))
+#  define LITm (u->tag1 == Literal ? True : LIT0 )
+#  define SZ0 (u->size == 0)
+#  define SZi (u->size == 4 || u->size == 2 || u->size == 1)
+#  define SZj (u->size == 4 || u->size == 2 || u->size == 1 || u->size == 0)
+#  define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)
+#  define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
+#  define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
+#  define A1  (u->tag1 == ArchReg)
+#  define A2  (u->tag2 == ArchReg)
+#  define L1  (u->tag1 == Literal && u->val1 == 0)
+#  define Ls1 (u->tag1 == Lit16)
+#  define Ls3 (u->tag3 == Lit16)
+#  define TRL1 (TR1 || L1)
+#  define TRA1 (TR1 || A1)
+#  define N2  (u->tag2 == NoValue)
+#  define N3  (u->tag3 == NoValue)
+#  define COND0    (u->cond         == 0)
+#  define EXTRA4b0 (u->extra4b      == 0)
+#  define SG_WD0   (u->signed_widen == 0)
+#  define JMPKIND0 (u->jmpkind      == 0)
+#  define CCALL0   (u->argc==0 && u->regparms_n==0 && u->has_ret_val==0 && \
+                    ( beforeLiveness                                       \
+                    ? u->regs_live_after == ALL_RREGS_LIVE                 \
+                    : True ))
+#  define XOTHER   (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
+
+   Int n_lits = 0;
+   if (u->tag1 == Literal) n_lits++;
+   if (u->tag2 == Literal) n_lits++;
+   if (u->tag3 == Literal) n_lits++;
+   if (n_lits > 1) 
+      return False;
+
+   /* Fields not checked: val1, val2, val3 */
+
+   switch (u->opcode) {
+
+   /* Fields checked: lit32   size flags_r/w tag1   tag2   tag3    (rest) */
+   case LOADV:  return LIT0 && SZi && CC0 &&  TR1 && TR2 &&  N3 && XOTHER;
+   case STOREV: return LITm && SZi && CC0 && TRL1 && TR2 &&  N3 && XOTHER;
+   case GETV:   return LIT0 && SZi && CC0 &&   A1 && TR2 &&  N3 && XOTHER;
+   case PUTV:   return LITm && SZi && CC0 && TRL1 &&  A2 &&  N3 && XOTHER;
+   case GETVF: 
+   case PUTVF:  return LIT0 && SZ0 && CC0 &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case TESTV: 
+   case SETV:   return LIT0 && SZj && CC0 && TRA1 &&  N2 &&  N3 && XOTHER;
+   case TAG1:   return LIT0 && SZ0 && CC0 &&  TR1 &&  N2 && Ls3 && XOTHER;
+   case TAG2:   return LIT0 && SZ0 && CC0 &&  TR1 && TR2 && Ls3 && XOTHER;
+   default:
+      VG_(printf)("unhandled opcode: %u\n", u->opcode);
+      VG_(panic)("SK_(saneExtUInstr): unhandled opcode");
+   }
+#  undef LIT0
+#  undef LIT1
+#  undef LITm
+#  undef SZ0
+#  undef SZi
+#  undef SZj
+#  undef CC0
+#  undef TR1
+#  undef TR2
+#  undef A1
+#  undef A2
+#  undef L1
+#  undef Ls1
+#  undef Ls3
+#  undef TRL1
+#  undef TRA1
+#  undef N2
+#  undef N3
+#  undef COND0
+#  undef EXTRA4b0
+#  undef JMPKIND0
+#  undef CCALL0
+#  undef XOTHER
+}
+
+static Char* nameOfTagOp ( TagOp h )
+{
+   switch (h) {
+      case Tag_PCast40:        return "PCast40";
+      case Tag_PCast20:        return "PCast20";
+      case Tag_PCast10:        return "PCast10";
+      case Tag_PCast01:        return "PCast01";
+      case Tag_PCast02:        return "PCast02";
+      case Tag_PCast04:        return "PCast04";
+      case Tag_PCast14:        return "PCast14";
+      case Tag_PCast12:        return "PCast12";
+      case Tag_PCast11:        return "PCast11";
+      case Tag_Left4:          return "Left4";
+      case Tag_Left2:          return "Left2";
+      case Tag_Left1:          return "Left1";
+      case Tag_SWiden14:       return "SWiden14";
+      case Tag_SWiden24:       return "SWiden24";
+      case Tag_SWiden12:       return "SWiden12";
+      case Tag_ZWiden14:       return "ZWiden14";
+      case Tag_ZWiden24:       return "ZWiden24";
+      case Tag_ZWiden12:       return "ZWiden12";
+      case Tag_UifU4:          return "UifU4";
+      case Tag_UifU2:          return "UifU2";
+      case Tag_UifU1:          return "UifU1";
+      case Tag_UifU0:          return "UifU0";
+      case Tag_DifD4:          return "DifD4";
+      case Tag_DifD2:          return "DifD2";
+      case Tag_DifD1:          return "DifD1";
+      case Tag_ImproveAND4_TQ: return "ImproveAND4_TQ";
+      case Tag_ImproveAND2_TQ: return "ImproveAND2_TQ";
+      case Tag_ImproveAND1_TQ: return "ImproveAND1_TQ";
+      case Tag_ImproveOR4_TQ:  return "ImproveOR4_TQ";
+      case Tag_ImproveOR2_TQ:  return "ImproveOR2_TQ";
+      case Tag_ImproveOR1_TQ:  return "ImproveOR1_TQ";
+      case Tag_DebugFn:        return "DebugFn";
+      default: VG_(panic)("vg_nameOfTagOp");
+   }
+}
+
+
+Char* SK_(nameExtUOpcode)(Opcode opc)
+{
+   switch (opc) {
+      case GETVF:   return "GETVF";
+      case PUTVF:   return "PUTVF";
+      case TAG1:    return "TAG1";
+      case TAG2:    return "TAG2";
+      case LOADV:   return "LOADV";
+      case STOREV:  return "STOREV";
+      case GETV:    return "GETV";
+      case PUTV:    return "PUTV";
+      case TESTV:   return "TESTV";
+      case SETV:    return "SETV";
+      default:      
+         VG_(printf)("unhandled opcode: %u\n", opc);
+         VG_(panic)("SK_(nameExtUOpcode): unhandled case");
+   }
+}
+
+/* ---------------------------------------------------------------------
+   Debugging stuff.
+   ------------------------------------------------------------------ */
+
+void SK_(ppExtUInstr)(UInstr* u)
+{
+   switch (u->opcode) {
+
+      case TAG1:
+         VG_(printf)("\t");
+         VG_(ppUOperand)(u, 1, 4, False);
+         VG_(printf)(" = %s ( ", nameOfTagOp( u->val3 ));
+         VG_(ppUOperand)(u, 1, 4, False);
+         VG_(printf)(" )");
+         break;
+
+      case TAG2:
+         VG_(printf)("\t");
+         VG_(ppUOperand)(u, 2, 4, False);
+         VG_(printf)(" = %s ( ", nameOfTagOp( u->val3 ));
+         VG_(ppUOperand)(u, 1, 4, False);
+         VG_(printf)(", ");
+         VG_(ppUOperand)(u, 2, 4, False);
+         VG_(printf)(" )");
+         break;
+
+      case STOREV: case LOADV:
+         VG_(printf)("\t");
+         VG_(ppUOperand)(u, 1, u->size, u->opcode==LOADV);
+         VG_(printf)(", ");
+         VG_(ppUOperand)(u, 2, u->size, u->opcode==STOREV);
+         break;
+
+      case PUTVF: case GETVF:
+         VG_(printf)("\t");
+         VG_(ppUOperand)(u, 1, 0, False);
+         break;
+
+      case GETV: case PUTV:
+         VG_(printf)("\t");
+         VG_(ppUOperand)(u, 1, u->opcode==PUTV ? 4 : u->size, False);
+         VG_(printf)(", ");
+         VG_(ppUOperand)(u, 2, u->opcode==GETV ? 4 : u->size, False);
+         break;
+
+      case TESTV: case SETV:
+         VG_(printf)("\t");
+         VG_(ppUOperand)(u, 1, u->size, False);
+         break;
+
+      default:
+         VG_(printf)("unhandled opcode: %u\n", u->opcode);
+         VG_(panic)("SK_(ppExtUInstr): unhandled opcode");
+   }
+
+}
+
+Int SK_(getExtRegUsage)(UInstr* u, Tag tag, RegUse* arr)
+{
+#  define RD(ono)    VG_UINSTR_READS_REG(ono)
+#  define WR(ono)    VG_UINSTR_WRITES_REG(ono)
+
+   Int n = 0;
+   switch (u->opcode) {        
+
+      // JJJ: I don't understand this comment... what about reg alloc?  --njn
+
+      /* These sizes are only ever consulted when the instrumentation
+         code is being added, so the following can return
+         manifestly-bogus sizes. */
+
+      case TAG1:    RD(1); WR(1);        break;
+      case TAG2:    RD(1); RD(2); WR(2); break;
+      case LOADV:   RD(1); WR(2);        break;
+      case STOREV:  RD(1); RD(2);        break;
+      case GETV:    WR(2);               break;
+      case PUTV:    RD(1);               break;
+      case TESTV:   RD(1);               break;
+      case SETV:    WR(1);               break;
+      case PUTVF:   RD(1);               break;
+      case GETVF:   WR(1);               break;
+
+      default: 
+         VG_(printf)("unhandled opcode: %u\n", u->opcode);
+         VG_(panic)("SK_(getExtRegUsage): unhandled opcode");
+   }
+   return n;
+
+#  undef RD
+#  undef WR
+}
+
+/*------------------------------------------------------------*/
+/*--- New instrumentation machinery.                       ---*/
+/*------------------------------------------------------------*/
+
+#define uInstr1   VG_(newUInstr1)
+#define uInstr2   VG_(newUInstr2)
+#define uInstr3   VG_(newUInstr3)
+#define uLiteral  VG_(setLiteralField)
+#define uCCall    VG_(setCCallFields)
+#define newTemp   VG_(getNewTemp)
+#define newShadow VG_(getNewShadow)
+
+static
+TagOp get_Tag_ImproveOR_TQ ( Int sz )
+{
+   switch (sz) {
+      case 4: return Tag_ImproveOR4_TQ;
+      case 2: return Tag_ImproveOR2_TQ;
+      case 1: return Tag_ImproveOR1_TQ;
+      default: VG_(panic)("get_Tag_ImproveOR_TQ");
+   }
+}
+
+
+static
+TagOp get_Tag_ImproveAND_TQ ( Int sz )
+{
+   switch (sz) {
+      case 4: return Tag_ImproveAND4_TQ;
+      case 2: return Tag_ImproveAND2_TQ;
+      case 1: return Tag_ImproveAND1_TQ;
+      default: VG_(panic)("get_Tag_ImproveAND_TQ");
+   }
+}
+
+
+static
+TagOp get_Tag_Left ( Int sz )
+{
+   switch (sz) {
+      case 4: return Tag_Left4;
+      case 2: return Tag_Left2;
+      case 1: return Tag_Left1;
+      default: VG_(panic)("get_Tag_Left");
+   }
+}
+
+
+static
+TagOp get_Tag_UifU ( Int sz )
+{
+   switch (sz) {
+      case 4: return Tag_UifU4;
+      case 2: return Tag_UifU2;
+      case 1: return Tag_UifU1;
+      case 0: return Tag_UifU0;
+      default: VG_(panic)("get_Tag_UifU");
+   }
+}
+
+
+static
+TagOp get_Tag_DifD ( Int sz )
+{
+   switch (sz) {
+      case 4: return Tag_DifD4;
+      case 2: return Tag_DifD2;
+      case 1: return Tag_DifD1;
+      default: VG_(panic)("get_Tag_DifD");
+   }
+}
+
+
+static 
+TagOp get_Tag_PCast ( Int szs, Int szd )
+{
+   if (szs == 4 && szd == 0) return Tag_PCast40;
+   if (szs == 2 && szd == 0) return Tag_PCast20;
+   if (szs == 1 && szd == 0) return Tag_PCast10;
+   if (szs == 0 && szd == 1) return Tag_PCast01;
+   if (szs == 0 && szd == 2) return Tag_PCast02;
+   if (szs == 0 && szd == 4) return Tag_PCast04;
+   if (szs == 1 && szd == 4) return Tag_PCast14;
+   if (szs == 1 && szd == 2) return Tag_PCast12;
+   if (szs == 1 && szd == 1) return Tag_PCast11;
+   VG_(printf)("get_Tag_PCast(%d,%d)\n", szs, szd);
+   VG_(panic)("get_Tag_PCast");
+}
+
+
+static 
+TagOp get_Tag_Widen ( Bool syned, Int szs, Int szd )
+{
+   if (szs == 1 && szd == 2 && syned)  return Tag_SWiden12;
+   if (szs == 1 && szd == 2 && !syned) return Tag_ZWiden12;
+
+   if (szs == 1 && szd == 4 && syned)  return Tag_SWiden14;
+   if (szs == 1 && szd == 4 && !syned) return Tag_ZWiden14;
+
+   if (szs == 2 && szd == 4 && syned)  return Tag_SWiden24;
+   if (szs == 2 && szd == 4 && !syned) return Tag_ZWiden24;
+
+   VG_(printf)("get_Tag_Widen(%d,%d,%d)\n", (Int)syned, szs, szd);
+   VG_(panic)("get_Tag_Widen");
+}
+
+/* Pessimally cast the spec'd shadow from one size to another. */
+static 
+void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg )
+{
+   if (szs == 0 && szd == 0)
+      return;
+   uInstr3(cb, TAG1, 0, TempReg, tempreg, 
+                        NoValue, 0, 
+                        Lit16,   get_Tag_PCast(szs,szd));
+}
+
+
+/* Create a signed or unsigned widen of the spec'd shadow from one
+   size to another.  The only allowed size transitions are 1->2, 1->4
+   and 2->4. */
+static 
+void create_Widen ( UCodeBlock* cb, Bool signed_widen,
+                    Int szs, Int szd, Int tempreg )
+{
+   if (szs == szd) return;
+   uInstr3(cb, TAG1, 0, TempReg, tempreg, 
+                        NoValue, 0, 
+                        Lit16,   get_Tag_Widen(signed_widen,szs,szd));
+}
+
+
+/* Get the condition codes into a new shadow, at the given size. */
+static
+Int create_GETVF ( UCodeBlock* cb, Int sz )
+{
+   Int tt = newShadow(cb);
+   uInstr1(cb, GETVF, 0, TempReg, tt);
+   create_PCast(cb, 0, sz, tt);
+   return tt;
+}
+
+
+/* Save the condition codes from the spec'd shadow. */
+static
+void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg )
+{
+   if (sz == 0) {
+      uInstr1(cb, PUTVF, 0, TempReg, tempreg);
+   } else { 
+      Int tt = newShadow(cb);
+      uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt);
+      create_PCast(cb, sz, 0, tt);
+      uInstr1(cb, PUTVF, 0, TempReg, tt);
+   }
+}
+
+
+/* Do Left on the spec'd shadow. */
+static 
+void create_Left ( UCodeBlock* cb, Int sz, Int tempreg )
+{
+   uInstr3(cb, TAG1, 0, 
+               TempReg, tempreg,
+               NoValue, 0, 
+               Lit16, get_Tag_Left(sz));
+}
+
+
+/* Do UifU on ts and td, putting the result in td. */
+static 
+void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td )
+{
+   uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
+               Lit16, get_Tag_UifU(sz));
+}
+
+
+/* Do DifD on ts and td, putting the result in td. */
+static 
+void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td )
+{
+   uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
+               Lit16, get_Tag_DifD(sz));
+}
+
+
+/* Do HelpAND on value tval and tag tqqq, putting the result in
+   tqqq. */
+static 
+void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
+{
+   uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
+               Lit16, get_Tag_ImproveAND_TQ(sz));
+}
+
+
+/* Do HelpOR on value tval and tag tqqq, putting the result in
+   tqqq. */
+static 
+void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
+{
+   uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
+               Lit16, get_Tag_ImproveOR_TQ(sz));
+}
+
+
+/* Get the shadow for an operand described by (tag, val).  Emit code
+   to do this and return the identity of the shadow holding the
+   result.  The result tag is always copied into a new shadow, so it
+   can be modified without trashing the original.*/
+static
+Int /* TempReg */ getOperandShadow ( UCodeBlock* cb, 
+                                     Int sz, Int tag, Int val )
+{
+   Int sh;
+   sh = newShadow(cb);
+   if (tag == TempReg) {
+      uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh);
+      return sh;
+   }
+   if (tag == Literal) {
+      uInstr1(cb, SETV, sz, TempReg, sh);
+      return sh;
+   }
+   if (tag == ArchReg) {
+      uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh);
+      return sh;
+   }
+   VG_(panic)("getOperandShadow");
+}
+
+/* Create and return an instrumented version of cb_in.  Free cb_in
+   before returning. */
+static UCodeBlock* memcheck_instrument ( UCodeBlock* cb_in )
+{
+   UCodeBlock* cb;
+   Int         i, j;
+   UInstr*     u_in;
+   Int         qs, qd, qt, qtt;
+   cb = VG_(allocCodeBlock)();
+   cb->nextTemp = cb_in->nextTemp;
+
+   for (i = 0; i < cb_in->used; i++) {
+      qs = qd = qt = qtt = INVALID_TEMPREG;
+      u_in = &cb_in->instrs[i];
+
+      switch (u_in->opcode) {
+
+         case NOP:
+            break;
+
+         case INCEIP:
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* Loads and stores.  Test the V bits for the address.  24
+            Mar 02: since the address is A-checked anyway, there's not
+            really much point in doing the V-check too, unless you
+            think that you might use addresses which are undefined but
+            still addressible.  Hence the optionalisation of the V
+            check.
+
+            The LOADV/STOREV does an addressibility check for the
+            address. */
+
+         case LOAD: 
+            if (SK_(clo_check_addrVs)) {
+               uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
+               uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
+            }
+            uInstr2(cb, LOADV, u_in->size, 
+                        TempReg, u_in->val1,
+                        TempReg, SHADOW(u_in->val2));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         case STORE:
+            if (SK_(clo_check_addrVs)) {
+               uInstr1(cb, TESTV,  4, TempReg, SHADOW(u_in->val2));
+               uInstr1(cb, SETV,   4, TempReg, SHADOW(u_in->val2));
+            }
+            uInstr2(cb, STOREV, u_in->size,
+                        TempReg, SHADOW(u_in->val1), 
+                        TempReg, u_in->val2);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* Moving stuff around.  Make the V bits follow accordingly,
+            but don't do anything else.  */
+
+         case GET:
+            uInstr2(cb, GETV, u_in->size,
+                        ArchReg, u_in->val1,
+                        TempReg, SHADOW(u_in->val2));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         case PUT:
+            uInstr2(cb, PUTV, u_in->size, 
+                        TempReg, SHADOW(u_in->val1),
+                        ArchReg, u_in->val2);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         case GETF:
+            /* This is not the smartest way to do it, but should work. */
+            qd = create_GETVF(cb, u_in->size);
+            uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         case PUTF:
+            create_PUTVF(cb, u_in->size, SHADOW(u_in->val1));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         case MOV:
+            switch (u_in->tag1) {
+               case TempReg: 
+                  uInstr2(cb, MOV, 4,
+                              TempReg, SHADOW(u_in->val1),
+                              TempReg, SHADOW(u_in->val2));
+                  break;
+               case Literal: 
+                  uInstr1(cb, SETV, u_in->size, 
+                              TempReg, SHADOW(u_in->val2));
+                  break;
+               default: 
+                  VG_(panic)("memcheck_instrument: MOV");
+            }
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* Special case of add, where one of the operands is a literal.
+            lea1(t) = t + some literal.
+            Therefore: lea1#(qa) = left(qa) 
+         */
+         case LEA1:
+            vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
+            qs = SHADOW(u_in->val1);
+            qd = SHADOW(u_in->val2);
+            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd);
+            create_Left(cb, u_in->size, qd);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* Another form of add.  
+            lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal
+                                and is 0,1,2 or 3.
+            lea2#(qs,qt) = left(qs `UifU` (qt << shift)).
+            Note, subtly, that the shift puts zeroes at the bottom of qt,
+            meaning Valid, since the corresponding shift of tt puts 
+            zeroes at the bottom of tb.
+         */
+         case LEA2: {
+            Int shift;
+            vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
+            switch (u_in->extra4b) {
+               case 1: shift = 0; break;
+               case 2: shift = 1; break;
+               case 4: shift = 2; break;
+               case 8: shift = 3; break;
+               default: VG_(panic)( "memcheck_instrument(LEA2)" );
+            }
+            qs = SHADOW(u_in->val1);
+            qt = SHADOW(u_in->val2);
+            qd = SHADOW(u_in->val3);
+            uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd);
+            if (shift > 0) {
+               uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd);
+               uLiteral(cb, shift);
+            }
+            create_UifU(cb, 4, qs, qd);
+            create_Left(cb, u_in->size, qd);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+         }
+
+         /* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */
+         case INC: case DEC:
+            qd = SHADOW(u_in->val1);
+            create_Left(cb, u_in->size, qd);
+            if (u_in->flags_w != FlagsEmpty)
+               create_PUTVF(cb, u_in->size, qd);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* This is a HACK (approximation :-) */
+         /* rcl#/rcr#(qs,qd) 
+               = let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags#
+                 eflags# = q0
+                 qd =pcast-0-sz(q0)
+            Ie, cast everything down to a single bit, then back up.
+            This assumes that any bad bits infect the whole word and 
+            the eflags.
+         */
+         case RCL: case RCR:
+	    vg_assert(u_in->flags_r != FlagsEmpty);
+            /* The following assertion looks like it makes sense, but is
+               actually wrong.  Consider this:
+                  rcll    %eax
+                  imull   %eax, %eax
+               The rcll writes O and C but so does the imull, so the O and C 
+               write of the rcll is annulled by the prior improvement pass.
+               Noticed by Kevin Ryde <user42@zip.com.au>
+            */
+	    /* vg_assert(u_in->flags_w != FlagsEmpty); */
+            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
+            /* We can safely modify qs; cast it to 0-size. */
+            create_PCast(cb, u_in->size, 0, qs);
+            qd = SHADOW(u_in->val2);
+            create_PCast(cb, u_in->size, 0, qd);
+            /* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */
+            create_UifU(cb, 0, qs, qd);
+            /* qs is now free; reuse it for the flag definedness. */
+            qs = create_GETVF(cb, 0);
+            create_UifU(cb, 0, qs, qd);
+            create_PUTVF(cb, 0, qd);
+            create_PCast(cb, 0, u_in->size, qd);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* for OP in shl shr sar rol ror
+            (qs is shift count#, qd is value to be OP#d)
+            OP(ts,td)
+            OP#(qs,qd)
+               = pcast-1-sz(qs) `UifU` OP(ts,qd)
+            So we apply OP to the tag bits too, and then UifU with
+            the shift count# to take account of the possibility of it
+            being undefined.
+            
+            A bit subtle:
+               ROL/ROR rearrange the tag bits as per the value bits.
+               SHL/SHR shifts zeroes into the value, and corresponding 
+                  zeroes indicating Definedness into the tag.
+               SAR copies the top bit of the value downwards, and therefore
+                  SAR also copies the definedness of the top bit too.
+            So in all five cases, we just apply the same op to the tag 
+            bits as is applied to the value bits.  Neat!
+         */
+         case SHL:
+         case SHR: case SAR:
+         case ROL: case ROR: {
+            Int t_amount = INVALID_TEMPREG;
+            vg_assert(u_in->tag1 == TempReg || u_in->tag1 == Literal);
+            vg_assert(u_in->tag2 == TempReg);
+            qd = SHADOW(u_in->val2);
+
+            /* Make qs hold shift-count# and make
+               t_amount be a TempReg holding the shift count. */
+            if (u_in->tag1 == Literal) {
+               t_amount = newTemp(cb);
+               uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount);
+               uLiteral(cb, u_in->lit32);
+               qs = SHADOW(t_amount);
+               uInstr1(cb, SETV, 1, TempReg, qs);
+            } else {
+               t_amount = u_in->val1;
+               qs = SHADOW(u_in->val1);
+            }
+
+            uInstr2(cb, u_in->opcode, 
+                        u_in->size, 
+                        TempReg, t_amount, 
+                        TempReg, qd);
+            qt = newShadow(cb);
+            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
+            create_PCast(cb, 1, u_in->size, qt);
+            create_UifU(cb, u_in->size, qt, qd);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+         }
+
+         /* One simple tag operation. */
+         case WIDEN:
+            vg_assert(u_in->tag1 == TempReg);
+            create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size, 
+                             SHADOW(u_in->val1));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* not#(x) = x (since bitwise independent) */
+         case NOT:
+            vg_assert(u_in->tag1 == TempReg);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* neg#(x) = left(x) (derivable from case for SUB) */
+         case NEG:
+            vg_assert(u_in->tag1 == TempReg);
+            create_Left(cb, u_in->size, SHADOW(u_in->val1));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* bswap#(x) = bswap(x) */
+         case BSWAP:
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->size == 4);
+            qd = SHADOW(u_in->val1);
+            uInstr1(cb, BSWAP, 4, TempReg, qd);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* cc2val#(qd) = pcast-0-to-size(eflags#) */
+         case CC2VAL:
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->flags_r != FlagsEmpty);
+            qt = create_GETVF(cb, u_in->size);
+            uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* cmov#(qs,qd) = cmov(qs,qd)
+            That is, do the cmov of tags using the same flags as for
+            the data (obviously).  However, first do a test on the 
+            validity of the flags.
+         */
+         case CMOV:
+            vg_assert(u_in->size == 4);
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->tag2 == TempReg);
+            vg_assert(u_in->flags_r != FlagsEmpty);
+            vg_assert(u_in->flags_w == FlagsEmpty);
+            qs = SHADOW(u_in->val1);
+            qd = SHADOW(u_in->val2);
+            qt = create_GETVF(cb, 0);
+            uInstr1(cb, TESTV, 0, TempReg, qt);
+            /* qt should never be referred to again.  Nevertheless
+               ... */
+            uInstr1(cb, SETV, 0, TempReg, qt);
+
+            uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd);
+            LAST_UINSTR(cb).cond    = u_in->cond;
+            LAST_UINSTR(cb).flags_r = u_in->flags_r;
+
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* add#/sub#(qs,qd) 
+               = qs `UifU` qd `UifU` left(qs) `UifU` left(qd)
+               = left(qs) `UifU` left(qd)
+               = left(qs `UifU` qd)
+            adc#/sbb#(qs,qd)
+               = left(qs `UifU` qd) `UifU` pcast(eflags#)
+            Second arg (dest) is TempReg.
+            First arg (src) is Literal or TempReg or ArchReg. 
+         */
+         case ADD: case SUB:
+         case ADC: case SBB:
+            qd = SHADOW(u_in->val2);
+            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
+            create_UifU(cb, u_in->size, qs, qd);
+            create_Left(cb, u_in->size, qd);
+            if (u_in->opcode == ADC || u_in->opcode == SBB) {
+               vg_assert(u_in->flags_r != FlagsEmpty);
+               qt = create_GETVF(cb, u_in->size);
+               create_UifU(cb, u_in->size, qt, qd);
+            }
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, u_in->size, qd);
+            }
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* xor#(qs,qd) = qs `UifU` qd */
+         case XOR:
+            qd = SHADOW(u_in->val2);
+            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
+            create_UifU(cb, u_in->size, qs, qd);
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, u_in->size, qd);
+            }
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* and#/or#(qs,qd) 
+               = (qs `UifU` qd) `DifD` improve(vs,qs) 
+                                `DifD` improve(vd,qd)
+            where improve is the relevant one of
+                Improve{AND,OR}_TQ
+            Use the following steps, with qt as a temp:
+               qt = improve(vd,qd)
+               qd = qs `UifU` qd
+               qd = qt `DifD` qd
+               qt = improve(vs,qs)
+               qd = qt `DifD` qd
+         */
+         case AND: case OR:
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->tag2 == TempReg);
+            qd = SHADOW(u_in->val2);
+            qs = SHADOW(u_in->val1);
+            qt = newShadow(cb);
+
+            /* qt = improve(vd,qd) */
+            uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt);
+            if (u_in->opcode == AND)
+               create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt);
+            else
+               create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt);
+            /* qd = qs `UifU` qd */
+            create_UifU(cb, u_in->size, qs, qd);
+            /* qd = qt `DifD` qd */
+            create_DifD(cb, u_in->size, qt, qd);
+            /* qt = improve(vs,qs) */
+            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
+            if (u_in->opcode == AND)
+               create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt);
+            else
+               create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt);
+            /* qd = qt `DifD` qd */
+               create_DifD(cb, u_in->size, qt, qd);
+            /* So, finally qd is the result tag. */
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, u_in->size, qd);
+            }
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* Machinery to do with supporting CALLM.  Copy the start and
+            end markers only to make the result easier to read
+            (debug); they generate no code and have no effect. 
+         */
+         case CALLM_S: case CALLM_E:
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* Copy PUSH and POP verbatim.  Arg/result absval
+            calculations are done when the associated CALL is
+            processed.  CLEAR has no effect on absval calculations but
+            needs to be copied.  
+         */
+         case PUSH: case POP: case CLEAR:
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* In short:
+               callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#)
+            We have to decide on a size to do the computation at,
+            although the choice doesn't affect correctness.  We will
+            do a pcast to the final size anyway, so the only important
+            factor is to choose a size which minimises the total
+            number of casts needed.  Valgrind: just use size 0,
+            regardless.  It may not be very good for performance
+            but does simplify matters, mainly by reducing the number
+            of different pessimising casts which have to be implemented.
+         */
+         case CALLM: {
+            UInstr* uu;
+            Bool res_used;
+
+            /* Now generate the code.  Get the final result absval
+               into qt. */
+            qt  = newShadow(cb);
+            qtt = newShadow(cb);
+            uInstr1(cb, SETV, 0, TempReg, qt);
+            for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) {
+               uu = & cb_in->instrs[j];
+               if (uu->opcode != PUSH) continue;
+               /* cast via a temporary */
+               uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1),
+                                   TempReg, qtt);
+               create_PCast(cb, uu->size, 0, qtt);
+               create_UifU(cb, 0, qtt, qt);
+            }
+            /* Remembering also that flags read count as inputs. */
+            if (u_in->flags_r != FlagsEmpty) {
+               qtt = create_GETVF(cb, 0);
+               create_UifU(cb, 0, qtt, qt);
+            }
+
+            /* qt now holds the result tag.  If any results from the
+               call are used, either by fetching with POP or
+               implicitly by writing the flags, we copy the result
+               absval to the relevant location.  If not used, the call
+               must have been for its side effects, so we test qt here
+               and now.  Note that this assumes that all values
+               removed by POP continue to be live.  So dead args
+               *must* be removed with CLEAR, not by POPping them into
+               a dummy tempreg. 
+            */
+            res_used = False;
+            for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) {
+               uu = & cb_in->instrs[j];
+               if (uu->opcode != POP) continue;
+               /* Cast via a temp. */
+               uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt);
+               create_PCast(cb, 0, uu->size, qtt);
+               uInstr2(cb, MOV, 4, TempReg, qtt, 
+                                   TempReg, SHADOW(uu->val1));
+               res_used = True;
+            }
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, 0, qt);
+               res_used = True;
+            }
+            if (!res_used) {
+               uInstr1(cb, TESTV, 0, TempReg, qt);
+               /* qt should never be referred to again.  Nevertheless
+                  ... */
+               uInstr1(cb, SETV, 0, TempReg, qt);
+            }
+            VG_(copyUInstr)(cb, u_in);
+            break;
+         }
+         /* Whew ... */
+
+         case JMP:
+            if (u_in->tag1 == TempReg) {
+               uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
+               uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
+            } else {
+               vg_assert(u_in->tag1 == Literal);
+            }
+            if (u_in->cond != CondAlways) {
+               vg_assert(u_in->flags_r != FlagsEmpty);
+               qt = create_GETVF(cb, 0);
+               uInstr1(cb, TESTV, 0, TempReg, qt);
+               /* qt should never be referred to again.  Nevertheless
+                  ... */
+               uInstr1(cb, SETV, 0, TempReg, qt);
+            }
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         case JIFZ:
+            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
+            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* Emit a check on the address used.  The value loaded into the 
+            FPU is checked by the call to fpu_{read/write}_check().  */
+         case FPU_R: case FPU_W: {
+            Int t_size = INVALID_TEMPREG;
+
+            vg_assert(u_in->tag2 == TempReg);
+            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
+            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val2));
+
+            t_size = newTemp(cb);
+            uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_size);
+            uLiteral(cb, u_in->size);
+            uInstr2(cb, CCALL, 0, TempReg, u_in->val2, TempReg, t_size);
+            uCCall(cb, 
+                   u_in->opcode==FPU_R ? (Addr) & SK_(fpu_read_check) 
+                                       : (Addr) & SK_(fpu_write_check),
+                   2, 2, False);
+
+            VG_(copyUInstr)(cb, u_in);
+            break;
+         }
+
+         /* For FPU insns not referencing memory, just copy thru. */
+         case FPU: 
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         default:
+            VG_(ppUInstr)(0, u_in);
+            VG_(panic)( "memcheck_instrument: unhandled case");
+
+      } /* end of switch (u_in->opcode) */
+
+   } /* end of for loop */
+
+   VG_(freeCodeBlock)(cb_in);
+   return cb;
+}
+
+/*------------------------------------------------------------*/
+/*--- Clean up mem check instrumentation.                  ---*/
+/*------------------------------------------------------------*/
+
+Bool VG_(clo_memcheck_codegen) = False;
+
+#define dis    VG_(print_codegen)
+
+
+#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1)
+#define VGC_UNDEF ((UChar)100)
+#define VGC_VALUE ((UChar)101)
+
+#define NOP_no_msg(uu)                                            \
+   do { VG_(newNOP)(uu); } while (False)
+
+#define NOP_tag1_op(uu)                                           \
+   do { VG_(newNOP)(uu);                                          \
+        if (dis)                                                  \
+           VG_(printf)("   at %2d: delete %s due to defd arg\n",  \
+                       i, nameOfTagOp(u->val3));                  \
+   } while (False)
+
+#define SETV_tag1_op(uu,newsz)                                    \
+   do { uu->opcode = SETV;                                        \
+        uu->size = newsz;                                         \
+        uu->tag2 = uu->tag3 = NoValue;                            \
+        if (dis)                                                  \
+           VG_(printf)("   at %2d: convert %s to SETV%d "         \
+                       "due to defd arg\n",                       \
+                       i, nameOfTagOp(u->val3), newsz);           \
+   } while (False)
+
+
+
+/* Run backwards and delete SETVs on shadow temps for which the next
+   action is a write.  Needs an env saying whether or not the next
+   action is a write.  The supplied UCodeBlock is destructively
+   modified.
+*/
+static void vg_delete_redundant_SETVs ( UCodeBlock* cb )
+{
+   Int     i, j, k;
+   Int     n_temps = cb->nextTemp;
+   Bool*   next_is_write;
+   UInstr* u;
+   RegUse  tempUse[3];
+
+   if (n_temps == 0) return;
+
+   next_is_write = VG_(malloc)(n_temps * sizeof(Bool));
+
+   for (i = 0; i < n_temps; i++) next_is_write[i] = True;
+
+   for (i = cb->used-1; i >= 0; i--) {
+      u = &cb->instrs[i];
+
+      /* If we're not checking address V bits, there will be a lot of
+         GETVs, TAG1s and TAG2s calculating values which are never
+         used.  These first three cases get rid of them. */
+
+      if (u->opcode == GETV && VGC_IS_SHADOW(u->val2) 
+                            && next_is_write[u->val2]
+                            && !SK_(clo_check_addrVs)) {
+         VG_(newNOP)(u);
+         if (dis) 
+            VG_(printf)("   at %2d: delete GETV\n", i);
+      } else
+
+      if (u->opcode == TAG1 && VGC_IS_SHADOW(u->val1) 
+                            && next_is_write[u->val1]
+                            && !SK_(clo_check_addrVs)) {
+         VG_(newNOP)(u);
+         if (dis) 
+            VG_(printf)("   at %2d: delete TAG1\n", i);
+      } else
+
+      if (u->opcode == TAG2 && VGC_IS_SHADOW(u->val2) 
+                            && next_is_write[u->val2]
+                            && !SK_(clo_check_addrVs)) {
+         VG_(newNOP)(u);
+         if (dis) 
+            VG_(printf)("   at %2d: delete TAG2\n", i);
+      } else
+
+      /* We do the rest of these regardless of whether or not
+         addresses are V-checked. */
+
+      if (u->opcode == MOV && VGC_IS_SHADOW(u->val2) 
+                           && next_is_write[u->val2]) {
+         /* This MOV is pointless because the target is dead at this
+            point.  Delete it. */
+         VG_(newNOP)(u);
+         if (dis) 
+            VG_(printf)("   at %2d: delete MOV\n", i);
+      } else
+
+      if (u->opcode == SETV) {
+         if (u->tag1 == TempReg) {
+            vg_assert(VGC_IS_SHADOW(u->val1));
+            if (next_is_write[u->val1]) {
+               /* This write is pointless, so annul it. */
+               VG_(newNOP)(u);
+               if (dis) 
+                  VG_(printf)("   at %2d: delete SETV\n", i);
+            } else {
+               /* This write has a purpose; don't annul it, but do
+                  notice that we did it. */
+               next_is_write[u->val1] = True;
+            }
+              
+         }
+
+      } else {
+         /* Find out what this insn does to the temps. */
+         k = VG_(getRegUsage)(u, TempReg, &tempUse[0]);
+         vg_assert(k <= 3);
+         for (j = k-1; j >= 0; j--) {
+            next_is_write[ tempUse[j].num ]
+                         = tempUse[j].isWrite;
+         }
+      }
+   }
+}
+
+
+/* Run forwards, propagating and using the is-completely-defined
+   property.  This removes a lot of redundant tag-munging code.
+   Unfortunately it requires intimate knowledge of how each uinstr and
+   tagop modifies its arguments.  This duplicates knowledge of uinstr
+   tempreg uses embodied in VG_(getRegUsage)(), which is unfortunate. 
+   The supplied UCodeBlock* is modified in-place.
+
+   For each value temp, def[] should hold VGC_VALUE.
+
+   For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is
+   definitely known to be fully defined at that size.  In all other
+   circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly
+   undefined.  In cases of doubt, VGC_UNDEF is always safe.
+*/
+static void vg_propagate_definedness ( UCodeBlock* cb )
+{
+   Int     i, j, k, t;
+   Int     n_temps = cb->nextTemp;
+   UChar*  def;
+   UInstr* u;
+   RegUse  tempUse[3];
+
+   if (n_temps == 0) return;
+
+   def = VG_(malloc)(n_temps * sizeof(UChar));
+
+   for (i = 0; i < n_temps; i++) 
+      def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE;
+
+   /* Run forwards, detecting and using the all-defined property. */
+
+   for (i = 0; i < cb->used; i++) {
+      u = &cb->instrs[i];
+      switch (u->opcode) {
+
+      /* Tag-handling uinstrs. */
+
+         /* Deal with these quickly. */
+         case NOP:
+         case INCEIP:
+            break;
+
+         /* Make a tag defined. */
+         case SETV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            def[u->val1] = u->size;
+            break;
+
+         /* Check definedness of a tag. */
+         case TESTV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] <= 4) { 
+               vg_assert(def[u->val1] == u->size); 
+               NOP_no_msg(u);
+               if (dis) 
+                  VG_(printf)("   at %2d: delete TESTV on defd arg\n", i);
+            }
+            break;
+
+         /* Applies to both values and tags.  Propagate Definedness
+            property through copies.  Note that this isn't optional;
+            we *have* to do this to keep def[] correct. */
+         case MOV:
+            vg_assert(u->tag2 == TempReg);
+            if (u->tag1 == TempReg) {
+               if (VGC_IS_SHADOW(u->val1)) {
+                  vg_assert(VGC_IS_SHADOW(u->val2));
+                  def[u->val2] = def[u->val1];
+               }
+            }
+            break;
+
+         case PUTV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] <= 4) {
+               vg_assert(def[u->val1] == u->size);
+               u->tag1 = Literal;
+               u->val1 = 0;
+               switch (u->size) {
+                  case 4: u->lit32 = 0x00000000; break;
+                  case 2: u->lit32 = 0xFFFF0000; break;
+                  case 1: u->lit32 = 0xFFFFFF00; break;
+                  default: VG_(panic)("vg_cleanup(PUTV)");
+               }
+               if (dis) 
+                  VG_(printf)(
+                     "   at %2d: propagate definedness into PUTV\n", i);
+            }
+            break;
+
+         case STOREV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] <= 4) {
+               vg_assert(def[u->val1] == u->size);
+               u->tag1 = Literal;
+               u->val1 = 0;
+               switch (u->size) {
+                  case 4: u->lit32 = 0x00000000; break;
+                  case 2: u->lit32 = 0xFFFF0000; break;
+                  case 1: u->lit32 = 0xFFFFFF00; break;
+                  default: VG_(panic)("vg_cleanup(STOREV)");
+               }
+               if (dis) 
+                  VG_(printf)(
+                     "   at %2d: propagate definedness into STandV\n", i);
+            }
+            break;
+
+         /* Nothing interesting we can do with this, I think. */
+         case PUTVF:
+            break;
+
+         /* Tag handling operations. */
+         case TAG2:
+            vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
+            vg_assert(u->tag3 == Lit16);
+            /* Ultra-paranoid "type" checking. */
+            switch (u->val3) {
+               case Tag_ImproveAND4_TQ: case Tag_ImproveAND2_TQ:
+               case Tag_ImproveAND1_TQ: case Tag_ImproveOR4_TQ:
+               case Tag_ImproveOR2_TQ: case Tag_ImproveOR1_TQ:
+                  vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1));
+                  break;
+               default:
+                  vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+                  break;
+            }
+            switch (u->val3) {
+               Int sz;
+               case Tag_UifU4: 
+                  sz = 4; goto do_UifU;
+               case Tag_UifU2: 
+                  sz = 2; goto do_UifU;
+               case Tag_UifU1:
+                  sz = 1; goto do_UifU;
+               case Tag_UifU0:
+                  sz = 0; goto do_UifU;
+               do_UifU:
+                  vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+                  vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
+                  if (def[u->val1] <= 4) {
+                     /* UifU.  The first arg is defined, so result is
+                        simply second arg.  Delete this operation. */
+                     vg_assert(def[u->val1] == sz);
+                     NOP_no_msg(u);
+                     if (dis) 
+                        VG_(printf)(
+                           "   at %2d: delete UifU%d due to defd arg1\n", 
+                           i, sz);
+                  }
+                  else 
+                  if (def[u->val2] <= 4) {
+                     /* UifU.  The second arg is defined, so result is
+                        simply first arg.  Copy to second. */
+                     vg_assert(def[u->val2] == sz);
+                     u->opcode = MOV; 
+                     u->size = 4;
+                     u->tag3 = NoValue;
+                     def[u->val2] = def[u->val1];
+                     if (dis) 
+                        VG_(printf)(
+                           "   at %2d: change UifU%d to MOV due to defd"
+                           " arg2\n", 
+                           i, sz);
+                  }
+                  break;
+               case Tag_ImproveAND4_TQ:
+                  sz = 4; goto do_ImproveAND;
+               case Tag_ImproveAND1_TQ:
+                  sz = 1; goto do_ImproveAND;
+               do_ImproveAND:
+                  /* Implements Q = T OR Q.  So if Q is entirely defined,
+                     ie all 0s, we get MOV T, Q. */
+		  if (def[u->val2] <= 4) {
+                     vg_assert(def[u->val2] == sz);
+                     u->size = 4; /* Regardless of sz */
+                     u->opcode = MOV;
+                     u->tag3 = NoValue;
+                     def[u->val2] = VGC_UNDEF;
+                     if (dis) 
+                        VG_(printf)(
+                            "   at %2d: change ImproveAND%d_TQ to MOV due "
+                            "to defd arg2\n", 
+                            i, sz);
+                  }
+                  break;
+               default: 
+                  goto unhandled;
+            }
+            break;
+
+         case TAG1:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] > 4) break;
+            /* We now know that the arg to the op is entirely defined.
+               If the op changes the size of the arg, we must replace
+               it with a SETV at the new size.  If it doesn't change
+               the size, we can delete it completely. */
+            switch (u->val3) {
+               /* Maintain the same size ... */
+               case Tag_Left4: 
+                  vg_assert(def[u->val1] == 4);
+                  NOP_tag1_op(u);
+                  break;
+               case Tag_PCast11: 
+                  vg_assert(def[u->val1] == 1);
+                  NOP_tag1_op(u);
+                  break;
+               /* Change size ... */
+               case Tag_PCast40: 
+                  vg_assert(def[u->val1] == 4);
+                  SETV_tag1_op(u,0);
+                  def[u->val1] = 0;
+                  break;
+               case Tag_PCast14: 
+                  vg_assert(def[u->val1] == 1);
+                  SETV_tag1_op(u,4);
+                  def[u->val1] = 4;
+                  break;
+               case Tag_PCast12: 
+                  vg_assert(def[u->val1] == 1);
+                  SETV_tag1_op(u,2);
+                  def[u->val1] = 2;
+                  break;
+               case Tag_PCast10: 
+                  vg_assert(def[u->val1] == 1);
+                  SETV_tag1_op(u,0);
+                  def[u->val1] = 0;
+                  break;
+               case Tag_PCast02: 
+                  vg_assert(def[u->val1] == 0);
+                  SETV_tag1_op(u,2);
+                  def[u->val1] = 2;
+                  break;
+               default: 
+                  goto unhandled;
+            }
+            if (dis) 
+               VG_(printf)(
+                  "   at %2d: delete TAG1 %s due to defd arg\n",
+                  i, nameOfTagOp(u->val3));
+            break;
+
+         default:
+         unhandled:
+            /* We don't know how to handle this uinstr.  Be safe, and 
+               set to VGC_VALUE or VGC_UNDEF all temps written by it. */
+            k = VG_(getRegUsage)(u, TempReg, &tempUse[0]);
+            vg_assert(k <= 3);
+            for (j = 0; j < k; j++) {
+               t = tempUse[j].num;
+               vg_assert(t >= 0 && t < n_temps);
+               if (!tempUse[j].isWrite) {
+                  /* t is read; ignore it. */
+                  if (0&& VGC_IS_SHADOW(t) && def[t] <= 4)
+                     VG_(printf)("ignoring def %d at %s %s\n", 
+                                 def[t], 
+                                 VG_(nameUOpcode)(True, u->opcode),
+                                 (u->opcode == TAG1 || u->opcode == TAG2)
+                                    ? nameOfTagOp(u->val3) 
+                                    : (Char*)"");
+               } else {
+                  /* t is written; better nullify it. */
+                  def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE;
+               }
+            }
+      }
+   }
+}
+
+
+/* Top level post-MemCheck-instrumentation cleanup function. */
+static void vg_cleanup ( UCodeBlock* cb )
+{
+   vg_propagate_definedness ( cb );
+   vg_delete_redundant_SETVs ( cb );
+}
+
+
+/* Caller will print out final instrumented code if necessary;  we
+   print out intermediate instrumented code here if necessary. */
+UCodeBlock* SK_(instrument) ( UCodeBlock* cb, Addr not_used )
+{
+   cb = memcheck_instrument ( cb );
+   if (SK_(clo_cleanup)) {
+      if (dis) {
+         VG_(ppUCodeBlock) ( cb, "Unimproved instrumented UCode:" );
+         VG_(printf)("Instrumentation improvements:\n");
+      }
+      vg_cleanup(cb);
+      if (dis) VG_(printf)("\n");
+   }
+   return cb;
+}
+
+#undef dis
+
+/*--------------------------------------------------------------------*/
+/*--- end                                  vg_memcheck_translate.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/memcheck/memcheck.h b/memcheck/memcheck.h
new file mode 100644
index 0000000..b126ffb
--- /dev/null
+++ b/memcheck/memcheck.h
@@ -0,0 +1,197 @@
+
+/*
+   ----------------------------------------------------------------
+
+   Notice that the following BSD-style license applies to this one
+   file (vg_memcheck.h) only.  The entire rest of Valgrind is licensed
+   under the terms of the GNU General Public License, version 2.  See
+   the COPYING file in the source distribution for details.
+
+   ----------------------------------------------------------------
+
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward.  All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+   2. The origin of this software must not be misrepresented; you must 
+      not claim that you wrote the original software.  If you use this 
+      software in a product, an acknowledgment in the product 
+      documentation would be appreciated but is not required.
+
+   3. Altered source versions must be plainly marked as such, and must
+      not be misrepresented as being the original software.
+
+   4. The name of the author may not be used to endorse or promote 
+      products derived from this software without specific prior written 
+      permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   ----------------------------------------------------------------
+
+   Notice that the above BSD-style license applies to this one file
+   (vg_memcheck.h) only.  The entire rest of Valgrind is licensed under
+   the terms of the GNU General Public License, version 2.  See the
+   COPYING file in the source distribution for details.
+
+   ---------------------------------------------------------------- 
+*/
+
+
+#ifndef __VG_MEMCHECK_H
+#define __VG_MEMCHECK_H
+
+
+/* This file is for inclusion into client (your!) code.
+
+   You can use these macros to manipulate and query memory permissions
+   inside your own programs.
+
+   See comment near the top of valgrind.h on how to use them.
+*/
+
+#include "valgrind.h"
+
+typedef
+   enum { 
+      VG_USERREQ__MAKE_NOACCESS = VG_USERREQ__FINAL_DUMMY_CLIENT_REQUEST + 1, 
+      VG_USERREQ__MAKE_WRITABLE,
+      VG_USERREQ__MAKE_READABLE,
+      VG_USERREQ__DISCARD,
+      VG_USERREQ__CHECK_WRITABLE,
+      VG_USERREQ__CHECK_READABLE,
+      VG_USERREQ__MAKE_NOACCESS_STACK,
+      VG_USERREQ__DO_LEAK_CHECK, /* untested */
+   } Vg_MemCheckClientRequest;
+
+
+
+/* Client-code macros to manipulate the state of memory. */
+
+/* Mark memory at _qzz_addr as unaddressible and undefined for
+   _qzz_len bytes.  Returns an int handle pertaining to the block
+   descriptions Valgrind will use in subsequent error messages. */
+#define VALGRIND_MAKE_NOACCESS(_qzz_addr,_qzz_len)               \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
+                            VG_USERREQ__MAKE_NOACCESS,           \
+                            _qzz_addr, _qzz_len, 0, 0);          \
+    _qzz_res;                                                    \
+   }) 
+      
+/* Similarly, mark memory at _qzz_addr as addressible but undefined
+   for _qzz_len bytes. */
+#define VALGRIND_MAKE_WRITABLE(_qzz_addr,_qzz_len)               \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
+                            VG_USERREQ__MAKE_WRITABLE,           \
+                            _qzz_addr, _qzz_len, 0, 0);          \
+    _qzz_res;                                                    \
+   })
+
+/* Similarly, mark memory at _qzz_addr as addressible and defined
+   for _qzz_len bytes. */
+#define VALGRIND_MAKE_READABLE(_qzz_addr,_qzz_len)               \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
+                            VG_USERREQ__MAKE_READABLE,           \
+                            _qzz_addr, _qzz_len, 0, 0);          \
+    _qzz_res;                                                    \
+   })
+
+/* Discard a block-description-handle obtained from the above three
+   macros.  After this, Valgrind will no longer be able to relate
+   addressing errors to the user-defined block associated with the
+   handle.  The permissions settings associated with the handle remain
+   in place.  Returns 1 for an invalid handle, 0 for a valid
+   handle. */
+#define VALGRIND_DISCARD(_qzz_blkindex)                          \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
+                            VG_USERREQ__DISCARD,                 \
+                            0, _qzz_blkindex, 0, 0);             \
+    _qzz_res;                                                    \
+   })
+
+
+/* Client-code macros to check the state of memory. */
+
+/* Check that memory at _qzz_addr is addressible for _qzz_len bytes.
+   If suitable addressibility is not established, Valgrind prints an
+   error message and returns the address of the first offending byte.
+   Otherwise it returns zero. */
+#define VALGRIND_CHECK_WRITABLE(_qzz_addr,_qzz_len)                \
+   ({unsigned int _qzz_res;                                        \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
+                            VG_USERREQ__CHECK_WRITABLE,            \
+                            _qzz_addr, _qzz_len, 0, 0);            \
+    _qzz_res;                                                      \
+   })
+
+/* Check that memory at _qzz_addr is addressible and defined for
+   _qzz_len bytes.  If suitable addressibility and definedness are not
+   established, Valgrind prints an error message and returns the
+   address of the first offending byte.  Otherwise it returns zero. */
+#define VALGRIND_CHECK_READABLE(_qzz_addr,_qzz_len)                \
+   ({unsigned int _qzz_res;                                        \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
+                            VG_USERREQ__CHECK_READABLE,            \
+                            _qzz_addr, _qzz_len, 0, 0);            \
+    _qzz_res;                                                      \
+   })
+
+/* Use this macro to force the definedness and addressibility of a
+   value to be checked.  If suitable addressibility and definedness
+   are not established, Valgrind prints an error message and returns
+   the address of the first offending byte.  Otherwise it returns
+   zero. */
+#define VALGRIND_CHECK_DEFINED(__lvalue)                           \
+   (void)                                                          \
+   VALGRIND_CHECK_READABLE(                                        \
+      (volatile unsigned char *)&(__lvalue),                       \
+                      (unsigned int)(sizeof (__lvalue)))
+
+/* Mark memory, intended to be on the client's stack, at _qzz_addr as
+   unaddressible and undefined for _qzz_len bytes.  Does not return a
+   value.  The record associated with this setting will be
+   automatically removed by Valgrind when the containing routine
+   exits. */
+#define VALGRIND_MAKE_NOACCESS_STACK(_qzz_addr,_qzz_len)           \
+   {unsigned int _qzz_res;                                         \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
+                            VG_USERREQ__MAKE_NOACCESS_STACK,       \
+                            _qzz_addr, _qzz_len, 0, 0);            \
+   }
+
+
+
+/* Do a memory leak check mid-execution.
+   Currently implemented but untested.
+*/
+#define VALGRIND_DO_LEAK_CHECK                                     \
+   {unsigned int _qzz_res;                                         \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
+                            VG_USERREQ__DO_LEAK_CHECK,             \
+                            0, 0, 0, 0);                           \
+   }
+
+
+#endif
diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am
new file mode 100644
index 0000000..a36047c
--- /dev/null
+++ b/memcheck/tests/Makefile.am
@@ -0,0 +1,61 @@
+## Process this file with automake to produce Makefile.in
+
+##---------------------------------------------------------------------------
+## Need more tests:
+## - lots more mmap/munmap/mremap/mprotect ones
+
+## Notes:
+##   - HEAD and ERASER stderr.exp are different for signal2 due to lazy vs.
+##     strict EIP updating
+##---------------------------------------------------------------------------
+
+noinst_PROGRAMS = \
+	badaddrvalue badfree badjump badloop buflen_check \
+	doublefree errs1 exitprog fprw fwrite inits inline \
+	malloc1 malloc2 manuel1 manuel2 manuel3 \
+	memalign_test memcmptest mmaptest nanoleak pushfpopf \
+	realloc1 realloc2 sigaltstack signal2 supp1 supp2 suppfree \
+	trivialleak tronical weirdioctl	\
+	mismatches new_override
+
+CFLAGS   = $(WERROR) -Winline -Wall -Wshadow -g
+CXXFLAGS = $(CFLAGS)
+
+# C ones
+badaddrvalue_SOURCES 	= badaddrvalue.c
+badfree_SOURCES 	= badfree.c
+badjump_SOURCES 	= badjump.c
+badloop_SOURCES 	= badloop.c
+buflen_check_SOURCES	= buflen_check.c
+doublefree_SOURCES 	= doublefree.c
+errs1_SOURCES 		= errs1.c
+exitprog_SOURCES 	= exitprog.c
+fprw_SOURCES 		= fprw.c
+fwrite_SOURCES 		= fwrite.c
+inits_SOURCES		= inits.c
+inline_SOURCES 	        = inline.c
+malloc1_SOURCES 	= malloc1.c
+malloc2_SOURCES 	= malloc2.c
+manuel1_SOURCES 	= manuel1.c
+manuel2_SOURCES 	= manuel2.c
+manuel3_SOURCES 	= manuel3.c
+mmaptest_SOURCES 	= mmaptest.c
+memalign_test_SOURCES 	= memalign_test.c
+memcmptest_SOURCES 	= memcmptest.c
+nanoleak_SOURCES 	= nanoleak.c
+pushfpopf_SOURCES 	= pushfpopf_c.c pushfpopf_s.s
+realloc1_SOURCES 	= realloc1.c
+realloc2_SOURCES 	= realloc2.c
+signal2_SOURCES 	= signal2.c
+supp1_SOURCES 		= supp.c
+supp2_SOURCES 		= supp.c
+suppfree_SOURCES 	= suppfree.c
+sigaltstack_SOURCES 	= sigaltstack.c
+trivialleak_SOURCES 	= trivialleak.c
+tronical_SOURCES 	= tronical.S
+weirdioctl_SOURCES 	= weirdioctl.c
+
+# C++ ones
+mismatches_SOURCES	= mismatches.cpp
+new_override_SOURCES 	= new_override.cpp
+
diff --git a/tests/badaddrvalue.c b/memcheck/tests/badaddrvalue.c
similarity index 100%
copy from tests/badaddrvalue.c
copy to memcheck/tests/badaddrvalue.c
diff --git a/memcheck/tests/badaddrvalue.stderr.exp b/memcheck/tests/badaddrvalue.stderr.exp
new file mode 100644
index 0000000..8888c85
--- /dev/null
+++ b/memcheck/tests/badaddrvalue.stderr.exp
@@ -0,0 +1,26 @@
+
+Invalid write of size 1
+   at 0x........: main (badaddrvalue.c:8)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+   Address 0x........ is 1 bytes before a block of size 8 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (badaddrvalue.c:7)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+
+Invalid read of size 1
+   at 0x........: main (badaddrvalue.c:9)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+   Address 0x........ is 1 bytes before a block of size 8 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (badaddrvalue.c:7)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 8 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 8 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/badaddrvalue.stderr.exp.hd b/memcheck/tests/badaddrvalue.stderr.exp.hd
new file mode 100644
index 0000000..8888c85
--- /dev/null
+++ b/memcheck/tests/badaddrvalue.stderr.exp.hd
@@ -0,0 +1,26 @@
+
+Invalid write of size 1
+   at 0x........: main (badaddrvalue.c:8)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+   Address 0x........ is 1 bytes before a block of size 8 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (badaddrvalue.c:7)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+
+Invalid read of size 1
+   at 0x........: main (badaddrvalue.c:9)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+   Address 0x........ is 1 bytes before a block of size 8 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (badaddrvalue.c:7)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 8 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 8 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/badaddrvalue.stdout.exp b/memcheck/tests/badaddrvalue.stdout.exp
new file mode 100644
index 0000000..98d9bcb
--- /dev/null
+++ b/memcheck/tests/badaddrvalue.stdout.exp
@@ -0,0 +1 @@
+17
diff --git a/memcheck/tests/badaddrvalue.vgtest b/memcheck/tests/badaddrvalue.vgtest
new file mode 100644
index 0000000..91187e8
--- /dev/null
+++ b/memcheck/tests/badaddrvalue.vgtest
@@ -0,0 +1 @@
+prog: badaddrvalue
diff --git a/memcheck/tests/badfree-2trace.stderr.exp b/memcheck/tests/badfree-2trace.stderr.exp
new file mode 100644
index 0000000..741fd25
--- /dev/null
+++ b/memcheck/tests/badfree-2trace.stderr.exp
@@ -0,0 +1,16 @@
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (badfree.c:12)
+   Address 0x........ is not stack'd, malloc'd or free'd
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (badfree.c:15)
+   Address 0x........ is on thread 1's stack
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 2 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/badfree-2trace.vgtest b/memcheck/tests/badfree-2trace.vgtest
new file mode 100644
index 0000000..8a60456
--- /dev/null
+++ b/memcheck/tests/badfree-2trace.vgtest
@@ -0,0 +1,2 @@
+vgopts: --num-callers=2
+prog:   badfree
diff --git a/memcheck/tests/badfree.c b/memcheck/tests/badfree.c
new file mode 100644
index 0000000..3a22567
--- /dev/null
+++ b/memcheck/tests/badfree.c
@@ -0,0 +1,18 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main ( void )
+{
+   void* p = (void*)0x87654321;
+   int q[] = { 1, 2, 3 };
+   
+   /* Free a pointer to Never-Never Land */
+   free(p);
+
+   /* Free a pointer to a stack block */
+   free(q);
+
+   return 0;
+}
diff --git a/memcheck/tests/badfree.stderr.exp b/memcheck/tests/badfree.stderr.exp
new file mode 100644
index 0000000..37c9b3e
--- /dev/null
+++ b/memcheck/tests/badfree.stderr.exp
@@ -0,0 +1,20 @@
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (badfree.c:12)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/badfree)
+   Address 0x........ is not stack'd, malloc'd or free'd
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (badfree.c:15)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/badfree)
+   Address 0x........ is on thread 1's stack
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 2 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/badfree.vgtest b/memcheck/tests/badfree.vgtest
new file mode 100644
index 0000000..455863a
--- /dev/null
+++ b/memcheck/tests/badfree.vgtest
@@ -0,0 +1 @@
+prog: badfree
diff --git a/tests/badjump.c b/memcheck/tests/badjump.c
similarity index 100%
copy from tests/badjump.c
copy to memcheck/tests/badjump.c
diff --git a/memcheck/tests/badjump.stderr.exp b/memcheck/tests/badjump.stderr.exp
new file mode 100644
index 0000000..1be7f70
--- /dev/null
+++ b/memcheck/tests/badjump.stderr.exp
@@ -0,0 +1,6 @@
+
+Jump to the invalid address stated on the next line
+   at 0x........: ???
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __libc_start_main@@GLIBC_2.0 (...libc...)
+   Address 0x........ is not stack'd, malloc'd or free'd
diff --git a/memcheck/tests/badjump.vgtest b/memcheck/tests/badjump.vgtest
new file mode 100644
index 0000000..1e82b86
--- /dev/null
+++ b/memcheck/tests/badjump.vgtest
@@ -0,0 +1 @@
+prog: badjump
diff --git a/tests/badloop.c b/memcheck/tests/badloop.c
similarity index 100%
copy from tests/badloop.c
copy to memcheck/tests/badloop.c
diff --git a/memcheck/tests/badloop.stderr.exp b/memcheck/tests/badloop.stderr.exp
new file mode 100644
index 0000000..ebfa1c2
--- /dev/null
+++ b/memcheck/tests/badloop.stderr.exp
@@ -0,0 +1,11 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (badloop.c:12)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badloop)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/badloop.vgtest b/memcheck/tests/badloop.vgtest
new file mode 100644
index 0000000..abd0f39
--- /dev/null
+++ b/memcheck/tests/badloop.vgtest
@@ -0,0 +1 @@
+prog: badloop
diff --git a/memcheck/tests/buflen_check.c b/memcheck/tests/buflen_check.c
new file mode 100644
index 0000000..25f1714
--- /dev/null
+++ b/memcheck/tests/buflen_check.c
@@ -0,0 +1,29 @@
+#include <sys/socket.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+int main(void)
+{
+   struct sockaddr name;
+   int res1, res2;
+   int len = 10;
+
+   res1 = socket(PF_UNIX, SOCK_STREAM, 0);
+   if (res1 == 0) {
+      fprintf(stderr, "socket() failed\n");
+      exit(1);
+   }
+
+   /* Valgrind 1.0.X doesn't report the second error */
+   res1 = getsockname(-1, NULL,  &len);    /* NULL is bogus */
+   res2 = getsockname(-1, &name, NULL);    /* NULL is bogus */
+   if (res1 == -1) {
+      fprintf(stderr, "getsockname(1) failed\n");
+   }
+   if (res2 == -1) {
+      fprintf(stderr, "getsockname(2) failed\n");
+   }
+   
+   return 0;
+}
+
diff --git a/memcheck/tests/buflen_check.stderr.exp b/memcheck/tests/buflen_check.stderr.exp
new file mode 100644
index 0000000..a1b9b36
--- /dev/null
+++ b/memcheck/tests/buflen_check.stderr.exp
@@ -0,0 +1,20 @@
+
+Syscall param socketcall.getsockname(name) contains unaddressable byte(s)
+   at 0x........: getsockname (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: socket@@GLIBC_2.0 (in /.../tests/memcheck/buflen_check)
+   Address 0x........ is not stack'd, malloc'd or free'd
+
+Syscall param socketcall.getsockname(namelen_in) contains uninitialised or unaddressable byte(s)
+   at 0x........: getsockname (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: socket@@GLIBC_2.0 (in /.../tests/memcheck/buflen_check)
+   Address 0x........ is not stack'd, malloc'd or free'd
+getsockname(1) failed
+getsockname(2) failed
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/buflen_check.stderr.exp.hd b/memcheck/tests/buflen_check.stderr.exp.hd
new file mode 100644
index 0000000..855f51e
--- /dev/null
+++ b/memcheck/tests/buflen_check.stderr.exp.hd
@@ -0,0 +1,14 @@
+
+Syscall param socketcall.getsockname(name) contains unaddressable byte(s)
+   at 0x........: getsockname (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: socket@@GLIBC_2.0 (in /.../tests/memcheck/buflen_check)
+   Address 0x........ is not stack'd, malloc'd or free'd
+getsockname(1) failed
+getsockname(2) failed
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/buflen_check.vgtest b/memcheck/tests/buflen_check.vgtest
new file mode 100644
index 0000000..e14c8f1
--- /dev/null
+++ b/memcheck/tests/buflen_check.vgtest
@@ -0,0 +1 @@
+prog: buflen_check
diff --git a/tests/doublefree.c b/memcheck/tests/doublefree.c
similarity index 100%
copy from tests/doublefree.c
copy to memcheck/tests/doublefree.c
diff --git a/memcheck/tests/doublefree.stderr.exp b/memcheck/tests/doublefree.stderr.exp
new file mode 100644
index 0000000..282523f
--- /dev/null
+++ b/memcheck/tests/doublefree.stderr.exp
@@ -0,0 +1,17 @@
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (doublefree.c:10)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/doublefree)
+   Address 0x........ is 0 bytes inside a block of size 177 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (doublefree.c:10)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/doublefree)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 2 frees, 177 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/doublefree.vgtest b/memcheck/tests/doublefree.vgtest
new file mode 100644
index 0000000..9c0efac
--- /dev/null
+++ b/memcheck/tests/doublefree.vgtest
@@ -0,0 +1 @@
+prog: doublefree
diff --git a/tests/errs1.c b/memcheck/tests/errs1.c
similarity index 100%
copy from tests/errs1.c
copy to memcheck/tests/errs1.c
diff --git a/memcheck/tests/errs1.stderr.exp b/memcheck/tests/errs1.stderr.exp
new file mode 100644
index 0000000..2de4b48
--- /dev/null
+++ b/memcheck/tests/errs1.stderr.exp
@@ -0,0 +1,28 @@
+
+Invalid read of size 1
+   at 0x........: ddd (errs1.c:7)
+   by 0x........: bbb (errs1.c:9)
+   by 0x........: aaa (errs1.c:10)
+   by 0x........: main (errs1.c:17)
+   Address 0x........ is 1 bytes before a block of size 10 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: zzzzzzz (errs1.c:12)
+   by 0x........: yyy (errs1.c:13)
+   by 0x........: xxx (errs1.c:14)
+
+Invalid write of size 1
+   at 0x........: ddd (errs1.c:7)
+   by 0x........: bbb (errs1.c:9)
+   by 0x........: aaa (errs1.c:10)
+   by 0x........: main (errs1.c:17)
+   Address 0x........ is 1 bytes before a block of size 10 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: zzzzzzz (errs1.c:12)
+   by 0x........: yyy (errs1.c:13)
+   by 0x........: xxx (errs1.c:14)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 10 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 10 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/errs1.vgtest b/memcheck/tests/errs1.vgtest
new file mode 100644
index 0000000..fbe7c76
--- /dev/null
+++ b/memcheck/tests/errs1.vgtest
@@ -0,0 +1 @@
+prog: errs1
diff --git a/memcheck/tests/exitprog.c b/memcheck/tests/exitprog.c
new file mode 100644
index 0000000..3067216
--- /dev/null
+++ b/memcheck/tests/exitprog.c
@@ -0,0 +1,16 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#define ZILLION 1000000
+
+int main ( void )
+{
+   int i;
+   char* a = malloc(ZILLION * sizeof(char));
+   for (i = 0; i <= ZILLION; i++) a[i] = 0;
+   a = (char*)177;
+   _exit(1);
+}
diff --git a/memcheck/tests/exitprog.stderr.exp b/memcheck/tests/exitprog.stderr.exp
new file mode 100644
index 0000000..1b30fe0
--- /dev/null
+++ b/memcheck/tests/exitprog.stderr.exp
@@ -0,0 +1,16 @@
+
+Invalid write of size 1
+   at 0x........: main (exitprog.c:15)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/exitprog)
+   Address 0x........ is 0 bytes after a block of size 1000000 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (exitprog.c:12)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/exitprog)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 1000000 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 1000000 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/exitprog.vgtest b/memcheck/tests/exitprog.vgtest
new file mode 100644
index 0000000..0095028
--- /dev/null
+++ b/memcheck/tests/exitprog.vgtest
@@ -0,0 +1 @@
+prog: exitprog
diff --git a/memcheck/tests/filter_leak_check_size b/memcheck/tests/filter_leak_check_size
new file mode 100755
index 0000000..03def89
--- /dev/null
+++ b/memcheck/tests/filter_leak_check_size
@@ -0,0 +1,4 @@
+#! /bin/sh
+
+./filter_stderr | \
+sed "s/checked [0-9]\+ bytes./checked ... bytes./"
diff --git a/memcheck/tests/filter_stderr b/memcheck/tests/filter_stderr
new file mode 100755
index 0000000..0d5e763
--- /dev/null
+++ b/memcheck/tests/filter_stderr
@@ -0,0 +1,24 @@
+#! /bin/sh
+
+# Skip first four lines (valgrind intro)  
+# XXX: be more clever/subtle; eg. if there's just a 1-line error message
+# don't cut it
+
+dir=`dirname $0`
+
+$dir/../filter_stderr_basic                             |
+
+# Anonymise addresses
+$dir/../filter_addresses                                |
+
+# Anonymise line numbers in vg_clientfuncs.c
+sed "s/vg_clientfuncs.c:[0-9]\+/vg_clientfuncs.c:.../"  |
+
+$dir/../filter_test_paths                               |
+
+# Anonymise paths like "(in /foo/bar/libc-baz.so)"
+sed "s/(in \/.*libc.*)$/(in \/...libc...)/"             |
+
+# Anonymise paths like "__libc_start_main (../foo/bar/libc-quux.c:129)"
+sed "s/__libc_\(.*\) (.*)$/__libc_\1 (...libc...)/"
+
diff --git a/tests/fprw.c b/memcheck/tests/fprw.c
similarity index 100%
copy from tests/fprw.c
copy to memcheck/tests/fprw.c
diff --git a/memcheck/tests/fprw.stderr.exp b/memcheck/tests/fprw.stderr.exp
new file mode 100644
index 0000000..2137572
--- /dev/null
+++ b/memcheck/tests/fprw.stderr.exp
@@ -0,0 +1,83 @@
+
+Use of uninitialised value of size 8
+   at 0x........: main (fprw.c:14)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Use of uninitialised value of size 4
+   at 0x........: main (fprw.c:15)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Use of uninitialised value of size 8
+   at 0x........: main (fprw.c:16)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Use of uninitialised value of size 4
+   at 0x........: main (fprw.c:17)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Invalid read of size 8
+   at 0x........: main (fprw.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+   Address 0x........ is 0 bytes inside a block of size 8 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (fprw.c:18)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Invalid write of size 8
+   at 0x........: main (fprw.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+   Address 0x........ is 0 bytes inside a block of size 8 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (fprw.c:18)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Invalid read of size 4
+   at 0x........: main (fprw.c:21)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+   Address 0x........ is 0 bytes inside a block of size 4 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (fprw.c:19)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Invalid write of size 4
+   at 0x........: main (fprw.c:21)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+   Address 0x........ is 0 bytes inside a block of size 4 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (fprw.c:19)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (fprw.c:22)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+   Address 0x........ is not stack'd, malloc'd or free'd
+
+Invalid write of size 8
+   at 0x........: main (fprw.c:24)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+   Address 0x........ is 0 bytes inside a block of size 4 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (fprw.c:23)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+ERROR SUMMARY: 10 errors from 10 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 4 bytes in 1 blocks.
+malloc/free: 3 allocs, 3 frees, 16 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/fprw.vgtest b/memcheck/tests/fprw.vgtest
new file mode 100644
index 0000000..d44e04a
--- /dev/null
+++ b/memcheck/tests/fprw.vgtest
@@ -0,0 +1,2 @@
+vgopts: --single-step=yes
+prog:   fprw
diff --git a/tests/fwrite.c b/memcheck/tests/fwrite.c
similarity index 100%
copy from tests/fwrite.c
copy to memcheck/tests/fwrite.c
diff --git a/memcheck/tests/fwrite.stderr.exp b/memcheck/tests/fwrite.stderr.exp
new file mode 100644
index 0000000..9c26de2
--- /dev/null
+++ b/memcheck/tests/fwrite.stderr.exp
@@ -0,0 +1,16 @@
+
+Syscall param write(buf) contains uninitialised or unaddressable byte(s)
+   at 0x........: __libc_write (...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __libc_start_main@@GLIBC_2.0 (...libc...)
+   Address 0x........ is 0 bytes inside a block of size 10 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (fwrite.c:6)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __libc_start_main@@GLIBC_2.0 (...libc...)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 10 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 10 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/fwrite.stdout.exp b/memcheck/tests/fwrite.stdout.exp
new file mode 100644
index 0000000..cb43b5c
--- /dev/null
+++ b/memcheck/tests/fwrite.stdout.exp
Binary files differ
diff --git a/memcheck/tests/fwrite.vgtest b/memcheck/tests/fwrite.vgtest
new file mode 100644
index 0000000..f43efd0
--- /dev/null
+++ b/memcheck/tests/fwrite.vgtest
@@ -0,0 +1 @@
+prog: fwrite
diff --git a/memcheck/tests/inits.c b/memcheck/tests/inits.c
new file mode 100644
index 0000000..7dd0c93
--- /dev/null
+++ b/memcheck/tests/inits.c
@@ -0,0 +1,20 @@
+
+#include <stdio.h>
+
+/* Static and global vars are inited to zero, non-static local vars aren't. */
+
+int        g;
+static int gs;
+
+int main(void)
+{
+   int        l;
+   static int ls;
+   
+   if (gs == 0xDEADBEEF) printf("1!\n");
+   if (g  == 0xDEADBEEF) printf("2!\n");
+   if (ls == 0xDEADBEEF) printf("3!\n");
+   if (l  == 0xDEADBEEF) printf("4!\n");  // complains
+   
+   return 0;
+}
diff --git a/memcheck/tests/inits.stderr.exp b/memcheck/tests/inits.stderr.exp
new file mode 100644
index 0000000..e703ced
--- /dev/null
+++ b/memcheck/tests/inits.stderr.exp
@@ -0,0 +1,11 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (inits.c:17)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/inits)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/inits.vgtest b/memcheck/tests/inits.vgtest
new file mode 100644
index 0000000..e654dc6
--- /dev/null
+++ b/memcheck/tests/inits.vgtest
@@ -0,0 +1 @@
+prog: inits
diff --git a/memcheck/tests/inline.c b/memcheck/tests/inline.c
new file mode 100644
index 0000000..cb023b2
--- /dev/null
+++ b/memcheck/tests/inline.c
@@ -0,0 +1,21 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+
+__inline__
+static int addemup ( int* arr )
+{
+   int i, j = 0;
+   for (i = 0; i <= 10; i++)
+      j += arr[i];
+   return j;
+}
+
+int main ( void )
+{
+   int sum;
+   int* a = calloc(10, sizeof(int));
+   sum = addemup(a);
+   printf("sum is %d\n", sum);
+   return 0;
+}
diff --git a/memcheck/tests/inline.stderr.exp b/memcheck/tests/inline.stderr.exp
new file mode 100644
index 0000000..a2225c1
--- /dev/null
+++ b/memcheck/tests/inline.stderr.exp
@@ -0,0 +1,17 @@
+
+Invalid read of size 4
+   at 0x........: addemup (inline.c:10)
+   by 0x........: main (inline.c:18)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: calloc@@GLIBC_2.0 (in /.../tests/memcheck/inline)
+   Address 0x........ is 0 bytes after a block of size 40 alloc'd
+   at 0x........: calloc (vg_clientfuncs.c:...)
+   by 0x........: main (inline.c:17)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: calloc@@GLIBC_2.0 (in /.../tests/memcheck/inline)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 40 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 40 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/inline.stdout.exp b/memcheck/tests/inline.stdout.exp
new file mode 100644
index 0000000..ad1401e
--- /dev/null
+++ b/memcheck/tests/inline.stdout.exp
@@ -0,0 +1 @@
+sum is 0
diff --git a/memcheck/tests/inline.vgtest b/memcheck/tests/inline.vgtest
new file mode 100644
index 0000000..89673b1
--- /dev/null
+++ b/memcheck/tests/inline.vgtest
@@ -0,0 +1 @@
+prog: inline
diff --git a/tests/malloc1.c b/memcheck/tests/malloc1.c
similarity index 100%
copy from tests/malloc1.c
copy to memcheck/tests/malloc1.c
diff --git a/memcheck/tests/malloc1.stderr.exp b/memcheck/tests/malloc1.stderr.exp
new file mode 100644
index 0000000..1e4c67f
--- /dev/null
+++ b/memcheck/tests/malloc1.stderr.exp
@@ -0,0 +1,28 @@
+
+Invalid write of size 1
+   at 0x........: really (malloc1.c:20)
+   by 0x........: main (malloc1.c:10)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/malloc1)
+   Address 0x........ is 1 bytes inside a block of size 10 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: really (malloc1.c:19)
+   by 0x........: main (malloc1.c:10)
+   by 0x........: __libc_start_main (...libc...)
+
+Invalid write of size 1
+   at 0x........: really (malloc1.c:23)
+   by 0x........: main (malloc1.c:10)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/malloc1)
+   Address 0x........ is 1 bytes before a block of size 10 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: really (malloc1.c:21)
+   by 0x........: main (malloc1.c:10)
+   by 0x........: __libc_start_main (...libc...)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 10 bytes in 1 blocks.
+malloc/free: 2 allocs, 1 frees, 20 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/malloc1.vgtest b/memcheck/tests/malloc1.vgtest
new file mode 100644
index 0000000..43c402a
--- /dev/null
+++ b/memcheck/tests/malloc1.vgtest
@@ -0,0 +1 @@
+prog: malloc1
diff --git a/memcheck/tests/malloc2.c b/memcheck/tests/malloc2.c
new file mode 100644
index 0000000..44cc7bb
--- /dev/null
+++ b/memcheck/tests/malloc2.c
@@ -0,0 +1,49 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/* The original test driver machinery. */
+#define N_TEST_TRANSACTIONS 500
+#define N_TEST_ARR 2000
+
+#define M_TEST_MALLOC 1000
+
+void* test_arr[N_TEST_ARR];
+
+int main ( int argc, char** argv )
+{
+   int i, j, k, nbytes;
+   unsigned char* chp;
+
+   for (i = 0; i < N_TEST_ARR; i++)
+      test_arr[i] = NULL;
+
+   for (i = 0; i < N_TEST_TRANSACTIONS; i++) {
+      j = random() % N_TEST_ARR;
+      if (test_arr[j]) {
+         free(test_arr[j]);
+         test_arr[j] = NULL;
+      } else {
+         nbytes = 1 + random() % M_TEST_MALLOC;
+         if (random()%64 == 32) 
+            nbytes *= 17;
+         test_arr[j] = malloc( nbytes );
+         chp = test_arr[j];
+         for (k = 1; k < nbytes; k++) 
+            chp[k] = (unsigned char)(k + 99);
+      }
+   }
+
+   for (i = 0; test_arr[i] == NULL; i++) ;
+   free(test_arr[i]);
+   ((char*)test_arr[i])[0] = 0;
+
+   for (i = 0; i < N_TEST_ARR; i++) {
+      if (test_arr[i]) {
+         free(test_arr[i]);
+         test_arr[i] = NULL;
+      }
+   }
+
+   return 0;
+}
diff --git a/memcheck/tests/malloc2.stderr.exp b/memcheck/tests/malloc2.stderr.exp
new file mode 100644
index 0000000..dd86b2d
--- /dev/null
+++ b/memcheck/tests/malloc2.stderr.exp
@@ -0,0 +1,27 @@
+
+Invalid write of size 1
+   at 0x........: main (malloc2.c:39)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/malloc2)
+   Address 0x........ is 0 bytes inside a block of size 429 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (malloc2.c:38)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/malloc2)
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (malloc2.c:43)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/malloc2)
+   Address 0x........ is 0 bytes inside a block of size 429 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (malloc2.c:38)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/malloc2)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 443 allocs, 444 frees, 265463 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/malloc2.vgtest b/memcheck/tests/malloc2.vgtest
new file mode 100644
index 0000000..d2dd1b4
--- /dev/null
+++ b/memcheck/tests/malloc2.vgtest
@@ -0,0 +1 @@
+prog: malloc2
diff --git a/memcheck/tests/manuel1.c b/memcheck/tests/manuel1.c
new file mode 100644
index 0000000..ac1f3c8
--- /dev/null
+++ b/memcheck/tests/manuel1.c
@@ -0,0 +1,10 @@
+#include <stdio.h>
+
+int main ()
+{
+  int x;
+
+  printf ("x = %d\n", x==0xDEADBEEF ? 99 : 88);
+
+  return 0;
+}
diff --git a/memcheck/tests/manuel1.stderr.exp b/memcheck/tests/manuel1.stderr.exp
new file mode 100644
index 0000000..c674937
--- /dev/null
+++ b/memcheck/tests/manuel1.stderr.exp
@@ -0,0 +1,11 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (manuel1.c:7)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/manuel1)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/manuel1.stdout.exp b/memcheck/tests/manuel1.stdout.exp
new file mode 100644
index 0000000..d26cbc9
--- /dev/null
+++ b/memcheck/tests/manuel1.stdout.exp
@@ -0,0 +1 @@
+x = 88
diff --git a/memcheck/tests/manuel1.vgtest b/memcheck/tests/manuel1.vgtest
new file mode 100644
index 0000000..e3ad9c7
--- /dev/null
+++ b/memcheck/tests/manuel1.vgtest
@@ -0,0 +1 @@
+prog: manuel1
diff --git a/memcheck/tests/manuel2.c b/memcheck/tests/manuel2.c
new file mode 100644
index 0000000..3b7135e
--- /dev/null
+++ b/memcheck/tests/manuel2.c
@@ -0,0 +1,11 @@
+#include <stdio.h>
+#include <malloc.h>
+
+int main ()
+{
+  int *x;
+
+  printf ("x = %d\n", *x==0xDEADBEEF ? 99 : 88);
+
+  return 0;
+}
diff --git a/memcheck/tests/manuel2.stderr.exp b/memcheck/tests/manuel2.stderr.exp
new file mode 100644
index 0000000..55ff720
--- /dev/null
+++ b/memcheck/tests/manuel2.stderr.exp
@@ -0,0 +1,11 @@
+
+Use of uninitialised value of size 4
+   at 0x........: main (manuel2.c:8)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/manuel2)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/manuel2.stdout.exp b/memcheck/tests/manuel2.stdout.exp
new file mode 100644
index 0000000..d26cbc9
--- /dev/null
+++ b/memcheck/tests/manuel2.stdout.exp
@@ -0,0 +1 @@
+x = 88
diff --git a/memcheck/tests/manuel2.vgtest b/memcheck/tests/manuel2.vgtest
new file mode 100644
index 0000000..1c785a5
--- /dev/null
+++ b/memcheck/tests/manuel2.vgtest
@@ -0,0 +1 @@
+prog: manuel2
diff --git a/memcheck/tests/manuel3.c b/memcheck/tests/manuel3.c
new file mode 100644
index 0000000..ea98fa9
--- /dev/null
+++ b/memcheck/tests/manuel3.c
@@ -0,0 +1,28 @@
+#include <stdio.h>
+#include <malloc.h>
+
+int gcc_cant_inline_me ( int );
+
+int main ()
+{
+  int *x, y;
+
+  x = (int *) malloc (sizeof (int));
+
+  y = *x == 173;
+
+  if (gcc_cant_inline_me(y)) { } 
+
+  return 0;
+}
+
+/* must be AFTER main */
+int gcc_cant_inline_me ( int n )
+{
+   if (n == 42) 
+      return 1; /* forty-two, dudes! */
+   else
+      return 0; /* some other number, dudes! */
+}
+
+
diff --git a/memcheck/tests/manuel3.stderr.exp b/memcheck/tests/manuel3.stderr.exp
new file mode 100644
index 0000000..7a257c3
--- /dev/null
+++ b/memcheck/tests/manuel3.stderr.exp
@@ -0,0 +1,12 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: gcc_cant_inline_me (manuel3.c:22)
+   by 0x........: main (manuel3.c:14)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __libc_start_main@@GLIBC_2.0 (...libc...)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 4 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 4 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/manuel3.vgtest b/memcheck/tests/manuel3.vgtest
new file mode 100644
index 0000000..0481cc6
--- /dev/null
+++ b/memcheck/tests/manuel3.vgtest
@@ -0,0 +1 @@
+prog: manuel3
diff --git a/tests/memalign_test.c b/memcheck/tests/memalign_test.c
similarity index 100%
copy from tests/memalign_test.c
copy to memcheck/tests/memalign_test.c
diff --git a/memcheck/tests/memalign_test.stderr.exp b/memcheck/tests/memalign_test.stderr.exp
new file mode 100644
index 0000000..4725928
--- /dev/null
+++ b/memcheck/tests/memalign_test.stderr.exp
@@ -0,0 +1,17 @@
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (memalign_test.c:17)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: valloc@@GLIBC_2.0 (in /.../tests/memcheck/memalign_test)
+   Address 0x........ is 0 bytes inside a block of size 111110 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (memalign_test.c:15)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: valloc@@GLIBC_2.0 (in /.../tests/memcheck/memalign_test)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 10 allocs, 11 frees, 611105 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/memalign_test.vgtest b/memcheck/tests/memalign_test.vgtest
new file mode 100644
index 0000000..56b601c
--- /dev/null
+++ b/memcheck/tests/memalign_test.vgtest
@@ -0,0 +1 @@
+prog: memalign_test
diff --git a/memcheck/tests/memcmptest.c b/memcheck/tests/memcmptest.c
new file mode 100644
index 0000000..83eb2d4
--- /dev/null
+++ b/memcheck/tests/memcmptest.c
@@ -0,0 +1,20 @@
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+char* s1;
+char* s2;
+
+int main ( void )
+{
+  s1 = malloc(10); strcpy(s1,"fooble");
+  s2 = malloc(10); strcpy(s2,"fooble");
+  if (memcmp(s1, s2, 8) != 0)
+    printf("different\n");
+  else
+    printf("same (?!)\n");
+  return 0;
+}
+
+	
diff --git a/memcheck/tests/memcmptest.stderr.exp b/memcheck/tests/memcmptest.stderr.exp
new file mode 100644
index 0000000..d7b1c3a
--- /dev/null
+++ b/memcheck/tests/memcmptest.stderr.exp
@@ -0,0 +1,16 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: memcmp (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/memcmptest)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: memcmp (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/memcmptest)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 20 bytes in 2 blocks.
+malloc/free: 2 allocs, 0 frees, 20 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/memcmptest.stdout.exp b/memcheck/tests/memcmptest.stdout.exp
new file mode 100644
index 0000000..7164804
--- /dev/null
+++ b/memcheck/tests/memcmptest.stdout.exp
@@ -0,0 +1 @@
+same (?!)
diff --git a/memcheck/tests/memcmptest.vgtest b/memcheck/tests/memcmptest.vgtest
new file mode 100644
index 0000000..f31a8f5
--- /dev/null
+++ b/memcheck/tests/memcmptest.vgtest
@@ -0,0 +1 @@
+prog: memcmptest
diff --git a/memcheck/tests/mismatches.cpp b/memcheck/tests/mismatches.cpp
new file mode 100644
index 0000000..857a075
--- /dev/null
+++ b/memcheck/tests/mismatches.cpp
@@ -0,0 +1,27 @@
+#include <stdlib.h>
+
+int main()
+{
+  int* fpointer = (int*)malloc(10);
+  delete fpointer;          // should give warning
+  fpointer = (int*)malloc(10);
+  delete [] fpointer;       // should give warning
+  fpointer = (int*)malloc(10);
+  free (fpointer);          // should work!
+
+  int* nvec = new int[10];
+  delete nvec;              // should give a warning
+  nvec = new int[10];
+  free (nvec);              // should give a warning
+  nvec = new int[10];
+  delete [] nvec;           // should work!
+
+  int* n = new int;
+  delete [] n;              // should give a warning
+  n = new int;
+  free(n);                  // should give a warning
+  n = new int;
+  delete n;                 // should work!
+
+  return 0;
+}
diff --git a/memcheck/tests/mismatches.stderr.exp b/memcheck/tests/mismatches.stderr.exp
new file mode 100644
index 0000000..caf65dc
--- /dev/null
+++ b/memcheck/tests/mismatches.stderr.exp
@@ -0,0 +1,72 @@
+
+Mismatched free() / delete / delete []
+   at 0x........: __builtin_delete (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:6)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+   Address 0x........ is 0 bytes inside a block of size 10 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:5)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+
+Mismatched free() / delete / delete []
+   at 0x........: __builtin_vec_delete (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:8)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+   Address 0x........ is 0 bytes inside a block of size 10 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:7)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+
+Mismatched free() / delete / delete []
+   at 0x........: __builtin_delete (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:13)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+   Address 0x........ is 0 bytes inside a block of size 40 alloc'd
+   at 0x........: __builtin_vec_new (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:12)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+
+Mismatched free() / delete / delete []
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:15)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+   Address 0x........ is 0 bytes inside a block of size 40 alloc'd
+   at 0x........: __builtin_vec_new (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:14)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+
+Mismatched free() / delete / delete []
+   at 0x........: __builtin_vec_delete (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+   Address 0x........ is 0 bytes inside a block of size 4 alloc'd
+   at 0x........: __builtin_new (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:19)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+
+Mismatched free() / delete / delete []
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:22)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+   Address 0x........ is 0 bytes inside a block of size 4 alloc'd
+   at 0x........: __builtin_new (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:21)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+
+ERROR SUMMARY: 6 errors from 6 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 9 allocs, 9 frees, 162 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/mismatches.vgtest b/memcheck/tests/mismatches.vgtest
new file mode 100644
index 0000000..3a87ef0
--- /dev/null
+++ b/memcheck/tests/mismatches.vgtest
@@ -0,0 +1 @@
+prog: mismatches
diff --git a/memcheck/tests/mmaptest.c b/memcheck/tests/mmaptest.c
new file mode 100644
index 0000000..74a21ed
--- /dev/null
+++ b/memcheck/tests/mmaptest.c
@@ -0,0 +1,15 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+int main()
+{
+    int fd;
+
+    mkdir("dir", 0777);
+    fd = open("dir", O_RDONLY);
+    mmap(NULL, 4711, PROT_READ, MAP_PRIVATE, fd, 0);
+    return 0;
+}
diff --git a/memcheck/tests/mmaptest.stderr.exp b/memcheck/tests/mmaptest.stderr.exp
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/memcheck/tests/mmaptest.stderr.exp
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/mmaptest.vgtest b/memcheck/tests/mmaptest.vgtest
new file mode 100644
index 0000000..1540c20
--- /dev/null
+++ b/memcheck/tests/mmaptest.vgtest
@@ -0,0 +1 @@
+prog: mmaptest
diff --git a/tests/nanoleak.c b/memcheck/tests/nanoleak.c
similarity index 100%
copy from tests/nanoleak.c
copy to memcheck/tests/nanoleak.c
diff --git a/memcheck/tests/nanoleak.stderr.exp b/memcheck/tests/nanoleak.stderr.exp
new file mode 100644
index 0000000..8dc3ae7
--- /dev/null
+++ b/memcheck/tests/nanoleak.stderr.exp
@@ -0,0 +1,26 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 1000 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 1000 bytes allocated.
+For counts of detected errors, rerun with: -v
+searching for pointers to 1 not-freed blocks.
+checked ... bytes.
+
+definitely lost: 1000 bytes in 1 blocks.
+possibly lost:   0 bytes in 0 blocks.
+still reachable: 0 bytes in 0 blocks.
+
+1000 bytes in 1 blocks are definitely lost in loss record 1 of 1
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (nanoleak.c:6)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __libc_start_main@@GLIBC_2.0 (...libc...)
+
+LEAK SUMMARY:
+   definitely lost: 1000 bytes in 1 blocks.
+   possibly lost:   0 bytes in 0 blocks.
+   still reachable: 0 bytes in 0 blocks.
+Reachable blocks (those to which a pointer was found) are not shown.
+To see them, rerun with: --show-reachable=yes
+
diff --git a/memcheck/tests/nanoleak.vgtest b/memcheck/tests/nanoleak.vgtest
new file mode 100644
index 0000000..2fadc98
--- /dev/null
+++ b/memcheck/tests/nanoleak.vgtest
@@ -0,0 +1,3 @@
+vgopts: --leak-check=yes
+prog: nanoleak
+stderr_filter: filter_leak_check_size
diff --git a/memcheck/tests/new_override.cpp b/memcheck/tests/new_override.cpp
new file mode 100644
index 0000000..5b48611
--- /dev/null
+++ b/memcheck/tests/new_override.cpp
@@ -0,0 +1,30 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+class Test {
+public:
+  int a, b, c, d;
+};
+
+void *operator new(size_t size)
+{
+  void *ret = malloc(size);
+  printf("Here.\n");
+  for (unsigned int i = 0; i < size; i++) ((char *) ret)[i] = 0xFF;
+  return ret;
+}
+
+int main(int argc, char *argv[]) {
+  Test *toto;
+  int i;
+  int j = 0;
+
+  toto = new Test[2];
+
+  for (i = 0; i < 2; i++) {
+    if (toto[i].a) {
+      j++;
+    }
+    //printf("%d : %08x %08x %08x %08x\n", i, toto[i].a, toto[i].b, toto[i].c, toto[i].d);
+  }
+}
diff --git a/memcheck/tests/new_override.stderr.exp b/memcheck/tests/new_override.stderr.exp
new file mode 100644
index 0000000..8ba31b6
--- /dev/null
+++ b/memcheck/tests/new_override.stderr.exp
@@ -0,0 +1,11 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (new_override.cpp:25)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/new_override)
+
+ERROR SUMMARY: 2 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 32 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 32 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/new_override.vgtest b/memcheck/tests/new_override.vgtest
new file mode 100644
index 0000000..4f11a5b
--- /dev/null
+++ b/memcheck/tests/new_override.vgtest
@@ -0,0 +1 @@
+prog: new_override
diff --git a/memcheck/tests/pushfpopf.stderr.exp b/memcheck/tests/pushfpopf.stderr.exp
new file mode 100644
index 0000000..8e10fb2
--- /dev/null
+++ b/memcheck/tests/pushfpopf.stderr.exp
@@ -0,0 +1,12 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: fooble (in /.../tests/memcheck/pushfpopf)
+   by 0x........: main (pushfpopf_c.c:12)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/pushfpopf)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/pushfpopf.stdout.exp b/memcheck/tests/pushfpopf.stdout.exp
new file mode 100644
index 0000000..180f871
--- /dev/null
+++ b/memcheck/tests/pushfpopf.stdout.exp
@@ -0,0 +1 @@
+fooble: result is 22
diff --git a/memcheck/tests/pushfpopf.vgtest b/memcheck/tests/pushfpopf.vgtest
new file mode 100644
index 0000000..f87b791
--- /dev/null
+++ b/memcheck/tests/pushfpopf.vgtest
@@ -0,0 +1 @@
+prog: pushfpopf
diff --git a/tests/pushfpopf_c.c b/memcheck/tests/pushfpopf_c.c
similarity index 100%
copy from tests/pushfpopf_c.c
copy to memcheck/tests/pushfpopf_c.c
diff --git a/tests/pushfpopf.s b/memcheck/tests/pushfpopf_s.s
similarity index 100%
copy from tests/pushfpopf.s
copy to memcheck/tests/pushfpopf_s.s
diff --git a/tests/realloc1.c b/memcheck/tests/realloc1.c
similarity index 100%
copy from tests/realloc1.c
copy to memcheck/tests/realloc1.c
diff --git a/memcheck/tests/realloc1.stderr.exp b/memcheck/tests/realloc1.stderr.exp
new file mode 100644
index 0000000..14ec594
--- /dev/null
+++ b/memcheck/tests/realloc1.stderr.exp
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 49 bytes in 1 blocks.
+malloc/free: 49 allocs, 48 frees, 1225 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/realloc1.vgtest b/memcheck/tests/realloc1.vgtest
new file mode 100644
index 0000000..d0d10d7
--- /dev/null
+++ b/memcheck/tests/realloc1.vgtest
@@ -0,0 +1 @@
+prog: realloc1
diff --git a/memcheck/tests/realloc2.c b/memcheck/tests/realloc2.c
new file mode 100644
index 0000000..c89ff8a
--- /dev/null
+++ b/memcheck/tests/realloc2.c
@@ -0,0 +1,21 @@
+/* This test demonstrated an obscure bug in malloclists handling caused by
+   multiple blocks hashing to the same list and one being overwritten at
+   realloc time due to bad ordering of the things happening.  Now runs
+   without error. */
+
+#include <malloc.h>
+#include <stdio.h>
+
+int main ( void )
+{
+  char* p;
+  int i;
+  for (i = 0; i < 10000; i++) {
+    p = malloc(10 + 10 * (i % 100));
+    p = realloc(p, 500);
+    p = realloc(p, 600);
+    free(p);
+  }
+  return 0;
+}
+
diff --git a/memcheck/tests/realloc2.stderr.exp b/memcheck/tests/realloc2.stderr.exp
new file mode 100644
index 0000000..8ed8426
--- /dev/null
+++ b/memcheck/tests/realloc2.stderr.exp
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 30000 allocs, 30000 frees, 16050000 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/realloc2.vgtest b/memcheck/tests/realloc2.vgtest
new file mode 100644
index 0000000..0a28b23
--- /dev/null
+++ b/memcheck/tests/realloc2.vgtest
@@ -0,0 +1 @@
+prog: realloc2
diff --git a/tests/sigaltstack.c b/memcheck/tests/sigaltstack.c
similarity index 100%
copy from tests/sigaltstack.c
copy to memcheck/tests/sigaltstack.c
diff --git a/memcheck/tests/sigaltstack.stderr.exp b/memcheck/tests/sigaltstack.stderr.exp
new file mode 100644
index 0000000..ceeb462
--- /dev/null
+++ b/memcheck/tests/sigaltstack.stderr.exp
@@ -0,0 +1,19 @@
+
+calling sigaltstack, stack base is 0x........
+setting sigaction
+Syscall param sigaction(act) contains uninitialised or unaddressable byte(s)
+   at 0x........: __libc_sigaction (...libc...)
+   by 0x........: main (sigaltstack.c:27)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: sigaltstack@@GLIBC_2.0 (in /.../tests/memcheck/sigaltstack)
+   Address 0x........ is on thread 1's stack
+res = 0
+raising the signal
+caught signal, local var is on 0x........
+done
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 8192 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 8192 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/sigaltstack.vgtest b/memcheck/tests/sigaltstack.vgtest
new file mode 100644
index 0000000..f61eeae
--- /dev/null
+++ b/memcheck/tests/sigaltstack.vgtest
@@ -0,0 +1 @@
+prog: sigaltstack
diff --git a/memcheck/tests/signal2.c b/memcheck/tests/signal2.c
new file mode 100644
index 0000000..a1df705
--- /dev/null
+++ b/memcheck/tests/signal2.c
@@ -0,0 +1,20 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+
+void sig_hdlr ( int signo )
+{
+   printf ( "caught sig segv\n" );
+   exit(1);
+}
+
+int main ( void )
+{
+   printf ( "installing sig handler\n" );
+   signal(SIGSEGV, sig_hdlr);
+   printf ( "doing bad thing\n" );
+   * (int*) 65536 = 0;
+   printf ( "exited normally ?!\n" );
+   return 0;
+}
diff --git a/memcheck/tests/signal2.stderr.exp b/memcheck/tests/signal2.stderr.exp
new file mode 100644
index 0000000..3ab7302
--- /dev/null
+++ b/memcheck/tests/signal2.stderr.exp
@@ -0,0 +1,12 @@
+
+Invalid write of size 4
+   at 0x........: main (signal2.c:17)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: exit@@GLIBC_2.0 (in /.../tests/memcheck/signal2)
+   Address 0x........ is not stack'd, malloc'd or free'd
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/signal2.stderr.exp.hd b/memcheck/tests/signal2.stderr.exp.hd
new file mode 100644
index 0000000..2cd1fc8
--- /dev/null
+++ b/memcheck/tests/signal2.stderr.exp.hd
@@ -0,0 +1,12 @@
+
+Invalid write of size 4
+   at 0x........: main (signal2.c:16)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: exit@@GLIBC_2.0 (in /.../tests/memcheck/signal2)
+   Address 0x........ is not stack'd, malloc'd or free'd
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/signal2.stdout.exp b/memcheck/tests/signal2.stdout.exp
new file mode 100644
index 0000000..3e16af0
--- /dev/null
+++ b/memcheck/tests/signal2.stdout.exp
@@ -0,0 +1,3 @@
+installing sig handler
+doing bad thing
+caught sig segv
diff --git a/memcheck/tests/signal2.vgtest b/memcheck/tests/signal2.vgtest
new file mode 100644
index 0000000..c301370
--- /dev/null
+++ b/memcheck/tests/signal2.vgtest
@@ -0,0 +1 @@
+prog: signal2
diff --git a/memcheck/tests/supp.c b/memcheck/tests/supp.c
new file mode 100644
index 0000000..50c4a81
--- /dev/null
+++ b/memcheck/tests/supp.c
@@ -0,0 +1,12 @@
+#include <stdlib.h>
+
+int
+main ()
+{
+  int x;
+
+  if (x == 0)
+     return 0;
+  else
+     return 1;
+}
diff --git a/memcheck/tests/supp.supp b/memcheck/tests/supp.supp
new file mode 100644
index 0000000..477c6c4
--- /dev/null
+++ b/memcheck/tests/supp.supp
@@ -0,0 +1,6 @@
+{
+  name_of_this_suppression
+  Cond
+  obj:*supp1
+  fun:__libc_start_main
+}
diff --git a/memcheck/tests/supp1.stderr.exp b/memcheck/tests/supp1.stderr.exp
new file mode 100644
index 0000000..6d763a7
--- /dev/null
+++ b/memcheck/tests/supp1.stderr.exp
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/supp1.vgtest b/memcheck/tests/supp1.vgtest
new file mode 100644
index 0000000..31130ff
--- /dev/null
+++ b/memcheck/tests/supp1.vgtest
@@ -0,0 +1,2 @@
+vgopts: --suppressions=supp.supp
+prog: supp1
diff --git a/memcheck/tests/supp2.stderr.exp b/memcheck/tests/supp2.stderr.exp
new file mode 100644
index 0000000..b245f04
--- /dev/null
+++ b/memcheck/tests/supp2.stderr.exp
@@ -0,0 +1,11 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (supp.c:8)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __libc_start_main@@GLIBC_2.0 (...libc...)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/supp2.vgtest b/memcheck/tests/supp2.vgtest
new file mode 100644
index 0000000..f5200b0
--- /dev/null
+++ b/memcheck/tests/supp2.vgtest
@@ -0,0 +1,2 @@
+vgopts: --suppressions=supp.supp
+prog: supp2
diff --git a/tests/suppfree.c b/memcheck/tests/suppfree.c
similarity index 100%
copy from tests/suppfree.c
copy to memcheck/tests/suppfree.c
diff --git a/memcheck/tests/suppfree.stderr.exp b/memcheck/tests/suppfree.stderr.exp
new file mode 100644
index 0000000..149bf84
--- /dev/null
+++ b/memcheck/tests/suppfree.stderr.exp
@@ -0,0 +1,17 @@
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: ddd (suppfree.c:7)
+   by 0x........: ccc (suppfree.c:12)
+   by 0x........: bbb (suppfree.c:17)
+   Address 0x........ is 0 bytes inside a block of size 10 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: ddd (suppfree.c:6)
+   by 0x........: ccc (suppfree.c:12)
+   by 0x........: bbb (suppfree.c:17)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 2 frees, 10 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/suppfree.vgtest b/memcheck/tests/suppfree.vgtest
new file mode 100644
index 0000000..bd38f8a
--- /dev/null
+++ b/memcheck/tests/suppfree.vgtest
@@ -0,0 +1 @@
+prog: suppfree
diff --git a/memcheck/tests/trivialleak.c b/memcheck/tests/trivialleak.c
new file mode 100644
index 0000000..f3a8963
--- /dev/null
+++ b/memcheck/tests/trivialleak.c
@@ -0,0 +1,14 @@
+#include <stdlib.h>
+
+static void test()
+  {
+    void* leak;
+    int i;
+    for (i = 0; i < 1000; i++)
+       leak = (void*)malloc( 1 );
+  }
+  int main()
+  {
+    test();
+    return 0;
+  }
diff --git a/memcheck/tests/trivialleak.stderr.exp b/memcheck/tests/trivialleak.stderr.exp
new file mode 100644
index 0000000..12bb84b
--- /dev/null
+++ b/memcheck/tests/trivialleak.stderr.exp
@@ -0,0 +1,26 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 1000 bytes in 1000 blocks.
+malloc/free: 1000 allocs, 0 frees, 1000 bytes allocated.
+For counts of detected errors, rerun with: -v
+searching for pointers to 1000 not-freed blocks.
+checked ... bytes.
+
+definitely lost: 1000 bytes in 1000 blocks.
+possibly lost:   0 bytes in 0 blocks.
+still reachable: 0 bytes in 0 blocks.
+
+1000 bytes in 1000 blocks are definitely lost in loss record 1 of 1
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: test (trivialleak.c:8)
+   by 0x........: main (trivialleak.c:13)
+   by 0x........: __libc_start_main (...libc...)
+
+LEAK SUMMARY:
+   definitely lost: 1000 bytes in 1000 blocks.
+   possibly lost:   0 bytes in 0 blocks.
+   still reachable: 0 bytes in 0 blocks.
+Reachable blocks (those to which a pointer was found) are not shown.
+To see them, rerun with: --show-reachable=yes
+
diff --git a/memcheck/tests/trivialleak.vgtest b/memcheck/tests/trivialleak.vgtest
new file mode 100644
index 0000000..c5b68a8
--- /dev/null
+++ b/memcheck/tests/trivialleak.vgtest
@@ -0,0 +1,3 @@
+vgopts: --leak-check=yes
+prog: trivialleak
+stderr_filter: filter_leak_check_size
diff --git a/memcheck/tests/tronical.S b/memcheck/tests/tronical.S
new file mode 100644
index 0000000..030a2af
--- /dev/null
+++ b/memcheck/tests/tronical.S
@@ -0,0 +1,102 @@
+/*
+
+Assembly derived from the following program compiled with -O2.
+This fools Valgrind, causing it to give a false error.
+
+#include <stdio.h>
+
+struct Foo
+{
+    int a1 : 1;
+    int a2 : 1;
+    int a3 : 1;
+    int a4 : 1;
+    int a5 : 1;
+    int a6 : 1;
+    int a7 : 1;
+    int bleh : 1;
+};
+
+struct Foo* foo;
+
+void set()
+{
+    foo->bleh = 1;
+}
+
+void get()
+{
+    if ( foo->bleh == 0 )
+        printf( "blieb\n" );
+}
+
+int main()
+{
+  foo = malloc(sizeof(struct Foo));
+    set();
+
+    get();
+
+    return 0;
+}
+
+*/
+
+	.file	"tronical.c"
+	.version	"01.01"
+gcc2_compiled.:
+.text
+	.align 4
+.globl set
+	.type	 set,@function
+set:
+	pushl	%ebp
+	movl	foo, %eax
+	orb	$128, (%eax)
+	movl	%esp, %ebp
+	popl	%ebp
+	ret
+.Lfe1:
+	.size	 set,.Lfe1-set
+	.section	.rodata.str1.1,"ams",@progbits,1
+.LC0:
+	.string	"blieb\n"
+.text
+	.align 4
+.globl get
+	.type	 get,@function
+get:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$8, %esp
+	movl	foo, %eax
+	cmpb	$0, (%eax)
+	js	.L4
+	subl	$12, %esp
+	pushl	$.LC0
+	call	printf
+	addl	$16, %esp
+.L4:
+	leave
+	ret
+.Lfe2:
+	.size	 get,.Lfe2-get
+	.align 4
+.globl main
+	.type	 main,@function
+main:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$20, %esp
+	pushl	$4
+	call	malloc
+	movl	%eax, foo
+	call	set
+	call	get
+	xorl	%eax, %eax
+	leave
+	ret
+.Lfe3:
+	.size	 main,.Lfe3-main
+	.comm	foo,4,4
+	.ident	"GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-98)"
diff --git a/memcheck/tests/tronical.stderr.exp b/memcheck/tests/tronical.stderr.exp
new file mode 100644
index 0000000..2c63087
--- /dev/null
+++ b/memcheck/tests/tronical.stderr.exp
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 4 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 4 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/tronical.vgtest b/memcheck/tests/tronical.vgtest
new file mode 100644
index 0000000..97623ad
--- /dev/null
+++ b/memcheck/tests/tronical.vgtest
@@ -0,0 +1 @@
+prog: tronical
diff --git a/memcheck/tests/weirdioctl.c b/memcheck/tests/weirdioctl.c
new file mode 100644
index 0000000..a78de65
--- /dev/null
+++ b/memcheck/tests/weirdioctl.c
@@ -0,0 +1,44 @@
+
+/* A program which sets a readable fd to have a timeout, and therefore
+   needs --weird-hacks=ioctl-VTIME in order to run without
+   blocking. */
+
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <termio.h>
+
+int main ( void )
+{
+   int c, i;
+   int res;
+         struct termio tty, oldtty;
+
+          /**
+           ** Save the old tty settings, and get rid of echo
+           ** for the new tty settings
+           **/
+          ioctl(0, TCGETA, &oldtty);
+          tty = oldtty;
+          tty.c_lflag    &= ~(ICANON|ECHO|ECHOE|ECHOK|ECHONL);
+          tty.c_cc[VMIN]  = 0;
+          tty.c_cc[VTIME] = 5;
+          res = ioctl(0, TCSETA, &tty);
+	  printf("first ioctl returned %d\n", res);
+
+          /**
+           ** Now do whatever stuff you want non-echoed
+           **/
+          i = 0;
+	  while (i++ < 50) {
+	    c = getchar();
+	    printf("got %d\n", c);
+	  }
+
+          /**
+           ** Now reset the old settings
+           **/
+          res = ioctl(0, TCSETA, &oldtty);
+	  printf("second ioctl returned %d\n", res);
+
+return 0;
+}
diff --git a/memcheck/tests/weirdioctl.stderr.exp b/memcheck/tests/weirdioctl.stderr.exp
new file mode 100644
index 0000000..7d5c9aa
--- /dev/null
+++ b/memcheck/tests/weirdioctl.stderr.exp
@@ -0,0 +1,12 @@
+
+Syscall param ioctl(TCSET{A,AW,AF}) contains uninitialised or unaddressable byte(s)
+   at 0x........: __ioctl (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ioctl@@GLIBC_2.0 (in /.../tests/memcheck/weirdioctl)
+   Address 0x........ is on thread 1's stack
+
+ERROR SUMMARY: 2 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/memcheck/tests/weirdioctl.stdout.exp b/memcheck/tests/weirdioctl.stdout.exp
new file mode 100644
index 0000000..bb65b7c
--- /dev/null
+++ b/memcheck/tests/weirdioctl.stdout.exp
@@ -0,0 +1,52 @@
+first ioctl returned -1
+got 118
+got 103
+got 111
+got 112
+got 116
+got 115
+got 58
+got 32
+got 45
+got 45
+got 119
+got 101
+got 105
+got 114
+got 100
+got 45
+got 104
+got 97
+got 99
+got 107
+got 115
+got 61
+got 105
+got 111
+got 99
+got 116
+got 108
+got 45
+got 86
+got 84
+got 73
+got 77
+got 69
+got 10
+got 112
+got 114
+got 111
+got 103
+got 58
+got 32
+got 32
+got 32
+got 119
+got 101
+got 105
+got 114
+got 100
+got 105
+got 111
+got 99
+second ioctl returned -1
diff --git a/memcheck/tests/weirdioctl.vgtest b/memcheck/tests/weirdioctl.vgtest
new file mode 100644
index 0000000..e8d8630
--- /dev/null
+++ b/memcheck/tests/weirdioctl.vgtest
@@ -0,0 +1,3 @@
+vgopts: --weird-hacks=ioctl-VTIME
+prog:   weirdioctl
+args:   < weirdioctl.vgtest
diff --git a/none/Makefile.am b/none/Makefile.am
index 60553dd..96911ed 100644
--- a/none/Makefile.am
+++ b/none/Makefile.am
@@ -1,15 +1,17 @@
+
+
 SUBDIRS = demangle . docs tests
 
 CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \
-		-Winline -Wall -Wshadow -O -fomit-frame-pointer -g
+		-Winline -Wall -Wshadow -O -fomit-frame-pointer @PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
 
-LDFLAGS = -Wl,-z -Wl,initfirst
+#LDFLAGS = -Wl,-z -Wl,initfirst
 
 INCLUDES = -I$(srcdir)/demangle
 
-bin_SCRIPTS = valgrind cachegrind vg_annotate
+bin_SCRIPTS = valgrind vg_annotate
 
 SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp
 
@@ -26,60 +28,103 @@
 	PATCHES_APPLIED ACKNOWLEDGEMENTS \
 	README_KDE3_FOLKS README_PACKAGERS \
 	README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \
-	valgrind.spec valgrind.spec.in
+	valgrind.spec valgrind.spec.in \
+	vg_profile.c \
+	vg_cachesim_I1.c vg_cachesim_D1.c vg_cachesim_L2.c vg_cachesim_gen.c
 
-val_PROGRAMS = valgrind.so valgrinq.so libpthread.so
+val_PROGRAMS = \
+	valgrind.so \
+	valgrinq.so \
+	libpthread.so \
+	vgskin_memcheck.so \
+	vgskin_cachesim.so \
+	vgskin_eraser.so \
+	vgskin_addrcheck.so \
+	vgskin_none.so \
+	vgskin_lackey.so \
+	vgskin_corecheck.so
 
-libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c
+libpthread_so_SOURCES = \
+	vg_libpthread.c \
+	vg_libpthread_unimp.c
+libpthread_so_DEPENDENCIES = $(srcdir)/vg_libpthread.vs
+libpthread_so_LDFLAGS	   = -Werror -fno-omit-frame-pointer -UVG_LIBDIR -shared -fpic -Wl,-version-script $(srcdir)/vg_libpthread.vs
 
 valgrinq_so_SOURCES = vg_valgrinq_dummy.c
+valgrinq_so_LDFLAGS = -shared
 
 valgrind_so_SOURCES = \
 	vg_clientfuncs.c \
 	vg_scheduler.c \
-        vg_cachesim.c \
 	vg_clientmalloc.c \
-	vg_clientperms.c \
+	vg_default.c \
 	vg_demangle.c \
 	vg_dispatch.S \
 	vg_errcontext.c \
 	vg_execontext.c \
 	vg_from_ucode.c \
 	vg_helpers.S \
+	vg_instrument.c \
 	vg_main.c \
 	vg_malloc2.c \
 	vg_memory.c \
 	vg_messages.c \
 	vg_mylibc.c \
 	vg_procselfmaps.c \
-	vg_profile.c \
+	vg_dummy_profile.c \
 	vg_signals.c \
 	vg_startup.S \
 	vg_symtab2.c \
-	vg_syscall_mem.c \
+	vg_syscalls.c \
 	vg_syscall.S \
 	vg_to_ucode.c \
 	vg_translate.c \
-	vg_transtab.c \
-	vg_vtagops.c
-
+	vg_transtab.c
+valgrind_so_LDFLAGS = -Wl,-z -Wl,initfirst -shared
 valgrind_so_LDADD = \
 	demangle/cp-demangle.o \
 	demangle/cplus-dem.o \
 	demangle/dyn-string.o \
 	demangle/safe-ctype.o
 
+vgskin_memcheck_so_SOURCES = \
+	vg_memcheck.c \
+	vg_memcheck_clientreqs.c \
+	vg_memcheck_errcontext.c \
+	vg_memcheck_from_ucode.c \
+	vg_memcheck_translate.c \
+	vg_memcheck_helpers.S
+vgskin_memcheck_so_LDFLAGS = -shared
+
+vgskin_cachesim_so_SOURCES = vg_cachesim.c
+vgskin_cachesim_so_LDFLAGS = -shared
+
+vgskin_eraser_so_SOURCES = vg_eraser.c
+vgskin_eraser_so_LDFLAGS = -shared
+
+vgskin_addrcheck_so_SOURCES = vg_addrcheck.c
+vgskin_addrcheck_so_LDFLAGS = -shared
+
+vgskin_none_so_SOURCES 	 = vg_none.c
+vgskin_none_so_LDFLAGS   = -shared
+
+vgskin_lackey_so_SOURCES = vg_lackey.c
+vgskin_lackey_so_LDFLAGS = -shared
+
+vgskin_corecheck_so_SOURCES = vg_corecheck.c
+vgskin_corecheck_so_LDFLAGS = -shared
+
 include_HEADERS = valgrind.h
 
 noinst_HEADERS = \
-        vg_cachesim_gen.c       \
-        vg_cachesim_I1.c        \
-        vg_cachesim_D1.c        \
-        vg_cachesim_L2.c        \
         vg_kerneliface.h        \
         vg_include.h            \
+        vg_skin.h               \
         vg_constants.h          \
-        vg_unsafe.h
+        vg_constants_skin.h     \
+        vg_unsafe.h		\
+	vg_memcheck_include.h	\
+	vg_memcheck.h
 
 MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) 
 
@@ -92,19 +137,40 @@
 vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS)
 	$(COMPILE) -fno-omit-frame-pointer -c $<
 
-valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
-	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
-		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
+##valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \
+##		$(valgrind_so_OBJECTS) $(valgrind_so_LDADD)
 
-valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
-	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
+##valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS)
+##	$(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS)
 
-libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
-	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
-		$(libpthread_so_OBJECTS) \
-		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+##libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs
+##	$(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \
+##		$(libpthread_so_OBJECTS) \
+##		-Wl,-version-script $(srcdir)/vg_libpthread.vs
+
+##vgskin_memcheck.so$(EXEEXT): $(vgskin_memcheck_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_memcheck.so \
+##		$(vgskin_memcheck_so_OBJECTS)
+
+##vgskin_cachesim.so$(EXEEXT): $(vgskin_cachesim_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_cachesim.so \
+##		$(vgskin_cachesim_so_OBJECTS)
+
+##vgskin_eraser.so$(EXEEXT): $(vgskin_eraser_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_eraser.so \
+##		$(vgskin_eraser_so_OBJECTS)
+
+##vgskin_none.so$(EXEEXT): $(vgskin_none_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_none.so \
+##		$(vgskin_none_so_OBJECTS)
+
+##vgskin_lackey.so$(EXEEXT): $(vgskin_lackey_so_OBJECTS)
+##	$(CC) $(CFLAGS) $(LDFLAGS) -shared -o vgskin_lackey.so \
+##		$(vgskin_lackey_so_OBJECTS)
 
 install-exec-hook:
 	$(mkinstalldirs) $(DESTDIR)$(valdir)
 	rm -f $(DESTDIR)$(valdir)/libpthread.so.0
 	$(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0
+
diff --git a/none/nl_main.c b/none/nl_main.c
new file mode 100644
index 0000000..398b88e
--- /dev/null
+++ b/none/nl_main.c
@@ -0,0 +1,57 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The null skin.                                     vg_none.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2002 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_skin.h"
+
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* track) 
+{
+   needs->name                    = "nulgrind";
+   needs->description             = "a binary JIT-compiler";
+
+   /* No needs, no core events to track */
+}
+
+void SK_(post_clo_init)(void)
+{
+}
+
+UCodeBlock* SK_(instrument)(UCodeBlock* cb, Addr a)
+{
+    return cb;
+}
+
+void SK_(fini)(void)
+{
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                vg_none.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/none/tests/Makefile.am b/none/tests/Makefile.am
new file mode 100644
index 0000000..32c5a1b
--- /dev/null
+++ b/none/tests/Makefile.am
@@ -0,0 +1,45 @@
+## Process this file with automake to produce Makefile.in
+
+##---------------------------------------------------------------------------
+## These ones all work fine without producing errors in any skin.
+##---------------------------------------------------------------------------
+
+noinst_PROGRAMS = \
+	bitfield1 bt_everything bt_literal coolo_strlen \
+	cpuid dastest floored fork fucomip munmap_exe rcl_assert \
+	rcrl readline1 sha1_test shortpush shorts smc1 \
+	pth_blockedsig \
+	coolo_sigaction gxx304
+
+CFLAGS   = $(WERROR) -Winline -Wall -Wshadow -g
+CXXFLAGS = $(CFLAGS)
+
+# generic C ones
+bitfield1_SOURCES 	= bitfield1.c
+bt_everything_SOURCES 	= bt_everything.c
+bt_literal_SOURCES 	= bt_literal.c
+cpuid_SOURCES 		= cpuid_c.c cpuid_s.s
+coolo_strlen_SOURCES 	= coolo_strlen.c
+dastest_SOURCES 	= dastest_c.c dastest_s.s
+fork_SOURCES 		= fork.c
+floored_SOURCES 	= floored.c
+floored_LDADD 		= -lm
+fucomip_SOURCES 	= fucomip.c
+munmap_exe_SOURCES 	= munmap_exe.c
+rcl_assert_SOURCES 	= rcl_assert.S
+rcrl_SOURCES 		= rcrl.c
+readline1_SOURCES 	= readline1.c
+smc1_SOURCES 		= smc1.c
+sha1_test_SOURCES 	= sha1_test.c
+shortpush_SOURCES 	= shortpush.c
+shorts_SOURCES 		= shorts.c
+
+# pthread C ones
+pth_blockedsig_SOURCES	= pth_blockedsig.c
+pth_blockedsig_LDADD	= -lpthread
+
+# generic C++ ones
+coolo_sigaction_SOURCES	= coolo_sigaction.cpp
+gxx304_SOURCES		= gxx304.cpp
+
+
diff --git a/none/tests/bitfield1.c b/none/tests/bitfield1.c
new file mode 100644
index 0000000..183c7e8
--- /dev/null
+++ b/none/tests/bitfield1.c
@@ -0,0 +1,19 @@
+
+#include <malloc.h>
+
+typedef
+   struct {
+      int          x;
+      unsigned int y:1;
+      int          z;
+   } 
+   Fooble;
+
+int main ( void )
+{
+   Fooble* f = malloc(sizeof(Fooble));
+   f->x = 1;
+   f->z = 1;
+   f->y = (f == (Fooble*)17 ? 1 : 0);
+   return 0;
+}
diff --git a/none/tests/bitfield1.stderr.exp b/none/tests/bitfield1.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/bitfield1.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/bitfield1.stderr.exp.hd b/none/tests/bitfield1.stderr.exp.hd
new file mode 100644
index 0000000..4f61f32
--- /dev/null
+++ b/none/tests/bitfield1.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 12 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 12 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/bitfield1.vgtest b/none/tests/bitfield1.vgtest
new file mode 100644
index 0000000..88260d8
--- /dev/null
+++ b/none/tests/bitfield1.vgtest
@@ -0,0 +1 @@
+prog: bitfield1
diff --git a/tests/bt_everything.c b/none/tests/bt_everything.c
similarity index 100%
copy from tests/bt_everything.c
copy to none/tests/bt_everything.c
diff --git a/none/tests/bt_everything.stderr.exp b/none/tests/bt_everything.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/bt_everything.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/bt_everything.stderr.exp.hd b/none/tests/bt_everything.stderr.exp.hd
new file mode 100644
index 0000000..3a4f79b
--- /dev/null
+++ b/none/tests/bt_everything.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 1 frees, 200 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/bt_everything.stdout.exp b/none/tests/bt_everything.stdout.exp
new file mode 100644
index 0000000..fd67221
--- /dev/null
+++ b/none/tests/bt_everything.stdout.exp
@@ -0,0 +1,2 @@
+MEM-L: final res 0xd2bfea53, carrydep 0x5b80deee
+REG-L: final res 0x605d78ff, carrydep 0x7c0dc86a
diff --git a/none/tests/bt_everything.vgtest b/none/tests/bt_everything.vgtest
new file mode 100644
index 0000000..711210d
--- /dev/null
+++ b/none/tests/bt_everything.vgtest
@@ -0,0 +1 @@
+prog: bt_everything
diff --git a/tests/bt_literal.c b/none/tests/bt_literal.c
similarity index 100%
copy from tests/bt_literal.c
copy to none/tests/bt_literal.c
diff --git a/none/tests/bt_literal.stderr.exp b/none/tests/bt_literal.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/bt_literal.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/bt_literal.stderr.exp.hd b/none/tests/bt_literal.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/none/tests/bt_literal.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/bt_literal.stdout.exp b/none/tests/bt_literal.stdout.exp
new file mode 100644
index 0000000..c9bb28d
--- /dev/null
+++ b/none/tests/bt_literal.stdout.exp
@@ -0,0 +1,16 @@
+0x0 -> 0x a 0x a 0x a
+0x1 -> 0x1b 0x1b 0x1b
+0x2 -> 0x2a 0x2a 0x2a
+0x3 -> 0x3b 0x3b 0x3b
+0x4 -> 0x4a 0x4a 0x4a
+0x5 -> 0x5b 0x5b 0x5b
+0x6 -> 0x6a 0x6a 0x6a
+0x7 -> 0x7b 0x7b 0x7b
+0x8 -> 0x82 0x82 0x82
+0x9 -> 0x93 0x93 0x93
+0xa -> 0xa2 0xa2 0xa2
+0xb -> 0xb3 0xb3 0xb3
+0xc -> 0xc2 0xc2 0xc2
+0xd -> 0xd3 0xd3 0xd3
+0xe -> 0xe2 0xe2 0xe2
+0xf -> 0xf3 0xf3 0xf3
diff --git a/none/tests/bt_literal.vgtest b/none/tests/bt_literal.vgtest
new file mode 100644
index 0000000..9c06c64
--- /dev/null
+++ b/none/tests/bt_literal.vgtest
@@ -0,0 +1 @@
+prog: bt_literal
diff --git a/tests/coolo_sigaction.cpp b/none/tests/coolo_sigaction.cpp
similarity index 100%
copy from tests/coolo_sigaction.cpp
copy to none/tests/coolo_sigaction.cpp
diff --git a/none/tests/coolo_sigaction.stderr.exp b/none/tests/coolo_sigaction.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/coolo_sigaction.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/coolo_sigaction.stderr.exp.hd b/none/tests/coolo_sigaction.stderr.exp.hd
new file mode 100644
index 0000000..564abd8
--- /dev/null
+++ b/none/tests/coolo_sigaction.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 1 frees, 372 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/coolo_sigaction.stdout.exp b/none/tests/coolo_sigaction.stdout.exp
new file mode 100644
index 0000000..652c76e
--- /dev/null
+++ b/none/tests/coolo_sigaction.stdout.exp
@@ -0,0 +1 @@
+handled 17
diff --git a/none/tests/coolo_sigaction.vgtest b/none/tests/coolo_sigaction.vgtest
new file mode 100644
index 0000000..4ee1d82
--- /dev/null
+++ b/none/tests/coolo_sigaction.vgtest
@@ -0,0 +1 @@
+prog: coolo_sigaction
diff --git a/tests/coolo_strlen.c b/none/tests/coolo_strlen.c
similarity index 100%
copy from tests/coolo_strlen.c
copy to none/tests/coolo_strlen.c
diff --git a/none/tests/coolo_strlen.stderr.exp b/none/tests/coolo_strlen.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/coolo_strlen.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/coolo_strlen.stderr.exp.hd b/none/tests/coolo_strlen.stderr.exp.hd
new file mode 100644
index 0000000..5164e75
--- /dev/null
+++ b/none/tests/coolo_strlen.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 33 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 33 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/coolo_strlen.vgtest b/none/tests/coolo_strlen.vgtest
new file mode 100644
index 0000000..b49ad79
--- /dev/null
+++ b/none/tests/coolo_strlen.vgtest
@@ -0,0 +1 @@
+prog: coolo_strlen
diff --git a/none/tests/cpuid.stderr.exp b/none/tests/cpuid.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/cpuid.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/cpuid.stderr.exp.hd b/none/tests/cpuid.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/none/tests/cpuid.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/cpuid.stdout.exp b/none/tests/cpuid.stdout.exp
new file mode 100644
index 0000000..ab872bf
--- /dev/null
+++ b/none/tests/cpuid.stdout.exp
@@ -0,0 +1,2 @@
+cpuid words (0): 0x1 0x756e6547 0x6c65746e 0x49656e69
+cpuid words (1): 0x52b 0x0 0x0 0x1bf
diff --git a/none/tests/cpuid.vgtest b/none/tests/cpuid.vgtest
new file mode 100644
index 0000000..36a2db0
--- /dev/null
+++ b/none/tests/cpuid.vgtest
@@ -0,0 +1 @@
+prog: cpuid
diff --git a/tests/cpuid_c.c b/none/tests/cpuid_c.c
similarity index 100%
copy from tests/cpuid_c.c
copy to none/tests/cpuid_c.c
diff --git a/tests/cpuid_s.s b/none/tests/cpuid_s.s
similarity index 100%
copy from tests/cpuid_s.s
copy to none/tests/cpuid_s.s
diff --git a/none/tests/dastest.stderr.exp b/none/tests/dastest.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/dastest.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/dastest.stderr.exp.hd b/none/tests/dastest.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/none/tests/dastest.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/dastest.stdout.exp b/none/tests/dastest.stdout.exp
new file mode 100644
index 0000000..a122b1e
--- /dev/null
+++ b/none/tests/dastest.stdout.exp
@@ -0,0 +1,2 @@
+dastest: x = 49
+dastest: das(x) = 49
diff --git a/none/tests/dastest.vgtest b/none/tests/dastest.vgtest
new file mode 100644
index 0000000..91f9033
--- /dev/null
+++ b/none/tests/dastest.vgtest
@@ -0,0 +1 @@
+prog: dastest
diff --git a/tests/dastest_c.c b/none/tests/dastest_c.c
similarity index 100%
copy from tests/dastest_c.c
copy to none/tests/dastest_c.c
diff --git a/tests/dastest.s b/none/tests/dastest_s.s
similarity index 100%
copy from tests/dastest.s
copy to none/tests/dastest_s.s
diff --git a/none/tests/filter_stderr b/none/tests/filter_stderr
new file mode 100755
index 0000000..07d877d
--- /dev/null
+++ b/none/tests/filter_stderr
@@ -0,0 +1,3 @@
+#! /bin/sh
+
+../filter_stderr_basic
diff --git a/none/tests/floored.c b/none/tests/floored.c
new file mode 100644
index 0000000..678a4f5
--- /dev/null
+++ b/none/tests/floored.c
@@ -0,0 +1,18 @@
+
+#include <math.h>
+#include <stdio.h>
+
+int xToI ( );
+
+int main ( void )
+{
+   printf ( "the answer is %d\n", xToI () );
+   return 0;
+}
+
+
+int xToI()
+{
+    return (int)floor(2.90) + 1;
+}
+
diff --git a/none/tests/floored.stderr.exp b/none/tests/floored.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/floored.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/floored.stderr.exp.hd b/none/tests/floored.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/none/tests/floored.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/floored.stdout.exp b/none/tests/floored.stdout.exp
new file mode 100644
index 0000000..a8bb84d
--- /dev/null
+++ b/none/tests/floored.stdout.exp
@@ -0,0 +1 @@
+the answer is 3
diff --git a/none/tests/floored.vgtest b/none/tests/floored.vgtest
new file mode 100644
index 0000000..2e22206
--- /dev/null
+++ b/none/tests/floored.vgtest
@@ -0,0 +1 @@
+prog: floored
diff --git a/none/tests/fork.c b/none/tests/fork.c
new file mode 100644
index 0000000..2a987d2
--- /dev/null
+++ b/none/tests/fork.c
@@ -0,0 +1,15 @@
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <stdio.h>
+
+int main(void)
+{
+  pid_t pid;
+
+  pid = fork ();
+
+  printf("my pid is %s\n", pid==0 ? "zero" : "non-zero");
+
+  return 0;
+}
diff --git a/none/tests/fork.stderr.exp b/none/tests/fork.stderr.exp
new file mode 100644
index 0000000..b28b04f
--- /dev/null
+++ b/none/tests/fork.stderr.exp
@@ -0,0 +1,3 @@
+
+
+
diff --git a/none/tests/fork.stderr.exp.hd b/none/tests/fork.stderr.exp.hd
new file mode 100644
index 0000000..f1512e4
--- /dev/null
+++ b/none/tests/fork.stderr.exp.hd
@@ -0,0 +1,13 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/fork.stdout.exp b/none/tests/fork.stdout.exp
new file mode 100644
index 0000000..d3f09d4
--- /dev/null
+++ b/none/tests/fork.stdout.exp
@@ -0,0 +1,2 @@
+my pid is non-zero
+my pid is zero
diff --git a/none/tests/fork.vgtest b/none/tests/fork.vgtest
new file mode 100644
index 0000000..0de247f
--- /dev/null
+++ b/none/tests/fork.vgtest
@@ -0,0 +1 @@
+prog: fork
diff --git a/tests/fucomip.c b/none/tests/fucomip.c
similarity index 100%
copy from tests/fucomip.c
copy to none/tests/fucomip.c
diff --git a/none/tests/fucomip.stderr.exp b/none/tests/fucomip.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/fucomip.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/fucomip.stderr.exp.hd b/none/tests/fucomip.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/none/tests/fucomip.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/fucomip.vgtest b/none/tests/fucomip.vgtest
new file mode 100644
index 0000000..6755f13
--- /dev/null
+++ b/none/tests/fucomip.vgtest
@@ -0,0 +1 @@
+prog: fucomip
diff --git a/tests/gxx304.cpp b/none/tests/gxx304.cpp
similarity index 100%
copy from tests/gxx304.cpp
copy to none/tests/gxx304.cpp
diff --git a/none/tests/gxx304.stderr.exp b/none/tests/gxx304.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/gxx304.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/gxx304.stderr.exp.hd b/none/tests/gxx304.stderr.exp.hd
new file mode 100644
index 0000000..4944bba
--- /dev/null
+++ b/none/tests/gxx304.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 1 frees, 24 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/gxx304.vgtest b/none/tests/gxx304.vgtest
new file mode 100644
index 0000000..202a58a
--- /dev/null
+++ b/none/tests/gxx304.vgtest
@@ -0,0 +1 @@
+prog: gxx304
diff --git a/none/tests/munmap_exe.c b/none/tests/munmap_exe.c
new file mode 100644
index 0000000..e17d885
--- /dev/null
+++ b/none/tests/munmap_exe.c
@@ -0,0 +1,24 @@
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* Point of this is that the fd of an PROT_EXEC segment is -1, so Valgrind
+   shouldn't add it to its list of exe segs, and thus it won't be discarded
+   upon the munmap() (so no "discard" message). */
+
+int main()
+{
+    void* m;
+    
+    m = mmap(NULL, 100, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+
+    if (m == (void*)-1) {
+       fprintf(stderr, "error mmapping\n");
+       exit(1);
+    }
+    
+    munmap(m, 100);
+
+    return 0;
+}
diff --git a/none/tests/munmap_exe.stderr.exp b/none/tests/munmap_exe.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/munmap_exe.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/munmap_exe.stderr.exp.hd b/none/tests/munmap_exe.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/none/tests/munmap_exe.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/munmap_exe.vgtest b/none/tests/munmap_exe.vgtest
new file mode 100644
index 0000000..8409a03
--- /dev/null
+++ b/none/tests/munmap_exe.vgtest
@@ -0,0 +1 @@
+prog: munmap_exe
diff --git a/none/tests/pth_blockedsig.c b/none/tests/pth_blockedsig.c
new file mode 100644
index 0000000..65fe7ba
--- /dev/null
+++ b/none/tests/pth_blockedsig.c
@@ -0,0 +1,66 @@
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+#include <pthread.h>
+
+static void sig_usr1(int);
+
+static pthread_t main_thread;
+
+void *
+child_main(void *no_args)
+{
+//  int i;
+  
+// Only do it once, to shorten test --njn
+//  for (i = 0; i < 5; ++i)
+//    {
+      sleep (1);
+      fprintf (stdout, "thread %ld sending SIGUSR1 to thread %ld\n",
+               pthread_self (), main_thread);
+      if (pthread_kill (main_thread, SIGUSR1) != 0)
+        fprintf (stderr, "error doing pthread_kill\n"); 
+//    }
+
+  return no_args;
+}
+
+int
+main(void)
+{
+  struct sigaction sigact;
+  sigset_t newmask, oldmask;
+  pthread_t child;
+
+  memset(&newmask, 0, sizeof newmask);
+  sigemptyset (&newmask);
+  sigaddset (&newmask, SIGUSR1);
+
+  if (pthread_sigmask (SIG_BLOCK, &newmask, &oldmask) != 0)
+    fprintf (stderr, "SIG_BLOCK error");
+  
+  memset (&sigact, 0, sizeof sigact);
+  sigact.sa_handler = sig_usr1;
+  if (sigaction(SIGUSR1, &sigact, NULL) != 0)
+    fprintf (stderr, "signal(SIGINT) error");
+  
+  main_thread = pthread_self ();
+  if (pthread_create (&child, NULL, child_main, NULL) != 0)
+    fprintf (stderr, "error creating thread");
+
+  pthread_join (child, NULL);
+  
+  exit(0);
+}
+
+static void
+sig_usr1 (int signo)
+{
+  fprintf (stderr, "SHOULD NOT BE HERE (SIGUSR1)!!!!\n");
+  return;
+}
+
+
diff --git a/none/tests/pth_blockedsig.stderr.exp b/none/tests/pth_blockedsig.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/pth_blockedsig.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/pth_blockedsig.stderr.exp.hd b/none/tests/pth_blockedsig.stderr.exp.hd
new file mode 100644
index 0000000..dc3cc2b
--- /dev/null
+++ b/none/tests/pth_blockedsig.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 1 frees, 12 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/pth_blockedsig.stdout.exp b/none/tests/pth_blockedsig.stdout.exp
new file mode 100644
index 0000000..be7b259
--- /dev/null
+++ b/none/tests/pth_blockedsig.stdout.exp
@@ -0,0 +1 @@
+thread 2 sending SIGUSR1 to thread 1
diff --git a/none/tests/pth_blockedsig.vgtest b/none/tests/pth_blockedsig.vgtest
new file mode 100644
index 0000000..4532980
--- /dev/null
+++ b/none/tests/pth_blockedsig.vgtest
@@ -0,0 +1 @@
+prog: pth_blockedsig
diff --git a/tests/pth_specific.c b/none/tests/pth_specific.c
similarity index 100%
copy from tests/pth_specific.c
copy to none/tests/pth_specific.c
diff --git a/tests/rcl_assert.s b/none/tests/rcl_assert.S
similarity index 100%
copy from tests/rcl_assert.s
copy to none/tests/rcl_assert.S
diff --git a/none/tests/rcl_assert.stderr.exp b/none/tests/rcl_assert.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/rcl_assert.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/rcl_assert.stderr.exp.hd b/none/tests/rcl_assert.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/none/tests/rcl_assert.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/rcl_assert.vgtest b/none/tests/rcl_assert.vgtest
new file mode 100644
index 0000000..0355bfd
--- /dev/null
+++ b/none/tests/rcl_assert.vgtest
@@ -0,0 +1 @@
+prog: rcl_assert
diff --git a/tests/rcrl.c b/none/tests/rcrl.c
similarity index 100%
copy from tests/rcrl.c
copy to none/tests/rcrl.c
diff --git a/none/tests/rcrl.stderr.exp b/none/tests/rcrl.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/rcrl.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/rcrl.stderr.exp.hd b/none/tests/rcrl.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/none/tests/rcrl.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/rcrl.stdout.exp b/none/tests/rcrl.stdout.exp
new file mode 100644
index 0000000..355f303
--- /dev/null
+++ b/none/tests/rcrl.stdout.exp
@@ -0,0 +1 @@
+x = 0.999939
diff --git a/none/tests/rcrl.vgtest b/none/tests/rcrl.vgtest
new file mode 100644
index 0000000..ea1b8ba
--- /dev/null
+++ b/none/tests/rcrl.vgtest
@@ -0,0 +1 @@
+prog: rcrl
diff --git a/none/tests/readline1.c b/none/tests/readline1.c
new file mode 100644
index 0000000..63c4b89
--- /dev/null
+++ b/none/tests/readline1.c
@@ -0,0 +1,27 @@
+
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+
+int rl_insert ( int, int );
+
+int main ( void )
+{
+   rl_insert(1, 'z');
+
+   return 0;
+}
+
+int zzzstrlen ( char* str )
+{
+   if (str[1] == 0) return 2; else return 10;
+}
+
+int rl_insert ( int count, int c )
+{
+   char str[2];
+   str[1] = 0;
+   str[0] = c;
+   printf("HERE strlen  is %d\n", zzzstrlen(str));
+   return 0;
+}
diff --git a/none/tests/readline1.stderr.exp b/none/tests/readline1.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/readline1.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/readline1.stderr.exp.hd b/none/tests/readline1.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/none/tests/readline1.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/readline1.stdout.exp b/none/tests/readline1.stdout.exp
new file mode 100644
index 0000000..bee59c7
--- /dev/null
+++ b/none/tests/readline1.stdout.exp
@@ -0,0 +1 @@
+HERE strlen  is 2
diff --git a/none/tests/readline1.vgtest b/none/tests/readline1.vgtest
new file mode 100644
index 0000000..cbfd47a
--- /dev/null
+++ b/none/tests/readline1.vgtest
@@ -0,0 +1 @@
+prog: readline1
diff --git a/tests/sha1.test.c b/none/tests/sha1_test.c
similarity index 100%
copy from tests/sha1.test.c
copy to none/tests/sha1_test.c
diff --git a/none/tests/sha1_test.stderr.exp b/none/tests/sha1_test.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/sha1_test.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/sha1_test.stderr.exp.hd b/none/tests/sha1_test.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/none/tests/sha1_test.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/sha1_test.vgtest b/none/tests/sha1_test.vgtest
new file mode 100644
index 0000000..eb20557
--- /dev/null
+++ b/none/tests/sha1_test.vgtest
@@ -0,0 +1 @@
+prog: sha1_test
diff --git a/tests/shortpush.c b/none/tests/shortpush.c
similarity index 100%
copy from tests/shortpush.c
copy to none/tests/shortpush.c
diff --git a/none/tests/shortpush.stderr.exp b/none/tests/shortpush.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/shortpush.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/shortpush.stderr.exp.hd b/none/tests/shortpush.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/none/tests/shortpush.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/shortpush.vgtest b/none/tests/shortpush.vgtest
new file mode 100644
index 0000000..7fc35ef
--- /dev/null
+++ b/none/tests/shortpush.vgtest
@@ -0,0 +1 @@
+prog: shortpush
diff --git a/tests/shorts.c b/none/tests/shorts.c
similarity index 100%
copy from tests/shorts.c
copy to none/tests/shorts.c
diff --git a/none/tests/shorts.stderr.exp b/none/tests/shorts.stderr.exp
new file mode 100644
index 0000000..328e795
--- /dev/null
+++ b/none/tests/shorts.stderr.exp
@@ -0,0 +1,4 @@
+
+case2
+case4
+
diff --git a/none/tests/shorts.stderr.exp.hd b/none/tests/shorts.stderr.exp.hd
new file mode 100644
index 0000000..a1f5070
--- /dev/null
+++ b/none/tests/shorts.stderr.exp.hd
@@ -0,0 +1,9 @@
+
+case2
+case4
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/shorts.vgtest b/none/tests/shorts.vgtest
new file mode 100644
index 0000000..af03ee0
--- /dev/null
+++ b/none/tests/shorts.vgtest
@@ -0,0 +1 @@
+prog: shorts
diff --git a/none/tests/smc1.c b/none/tests/smc1.c
new file mode 100644
index 0000000..0b0ebdf
--- /dev/null
+++ b/none/tests/smc1.c
@@ -0,0 +1,73 @@
+
+/* Test Heimdall's ability to spot writes to code which has been
+   translated, and discard the out-of-date translations.
+
+   CORRECT output is
+
+      in p 0
+      in q 1
+      in p 2
+      in q 3
+      in p 4
+      in q 5
+      in p 6
+      in q 7
+      in p 8
+      in q 9
+
+  WRONG output (if you fail to spot code-writes to code[0 .. 4]) is
+
+      in p 0
+      in p 1
+      in p 2
+      in p 3
+      in p 4
+      in p 5
+      in p 6
+      in p 7
+      in p 8
+      in p 9
+*/
+
+#include <stdio.h>
+
+typedef unsigned int Addr;
+typedef unsigned char UChar;
+
+void q ( int n )
+{
+   printf("in q %d\n", n);
+}
+
+void p ( int n )
+{
+   printf("in p %d\n", n);
+}
+
+UChar code[100];
+
+/* Make `code' be JMP-32 dest */
+void set_dest ( Addr dest )
+{
+   unsigned int delta;
+   delta = dest - ((Addr)(&code[0]));
+   delta -= 5;
+   
+   code[0] = 0xE9;   /* JMP d32 */
+   code[1] = (delta & 0xFF);
+   code[2] = ((delta >> 8) & 0xFF);
+   code[3] = ((delta >> 16) & 0xFF);
+   code[4] = ((delta >> 24) & 0xFF);
+}
+
+int main ( void )
+{
+   int i;
+   for (i = 0; i < 10; i += 2) {
+      set_dest ( (Addr)&p );
+      (  (void (*)(int)) (&code[0])  ) (i);
+      set_dest ( (Addr)&q );
+      (  (void (*)(int)) (&code[0])  ) (i+1);
+   }
+   return 0;
+}
diff --git a/none/tests/smc1.stderr.exp b/none/tests/smc1.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/none/tests/smc1.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/none/tests/smc1.stderr.exp.hd b/none/tests/smc1.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/none/tests/smc1.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/none/tests/smc1.stdout.exp b/none/tests/smc1.stdout.exp
new file mode 100644
index 0000000..d7fc032
--- /dev/null
+++ b/none/tests/smc1.stdout.exp
@@ -0,0 +1,10 @@
+in p 0
+in p 1
+in p 2
+in p 3
+in p 4
+in p 5
+in p 6
+in p 7
+in p 8
+in p 9
diff --git a/none/tests/smc1.vgtest b/none/tests/smc1.vgtest
new file mode 100644
index 0000000..e2ef32c
--- /dev/null
+++ b/none/tests/smc1.vgtest
@@ -0,0 +1 @@
+prog: smc1
diff --git a/tests/Makefile.am b/tests/Makefile.am
index e25f859..a505f11 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1,34 +1,4 @@
-EXTRA_DIST = \
-	badaddrvalue.c badjump.c \
-	badloop.c bitfield1.c \
-	blocked_syscall.c clientperm.c \
-	clientstackperm.c coolo_sigaction.cpp \
-	coolo_strlen.c coolo_strlen.s \
-	cpuid_c.c cpuid_s.s \
-	doublefree.c errs1.c \
-	exitprog.c floored.c \
-	fprw.c fwrite.c \
-	inline.c inlineh.c \
-	inlineh.h malloc1.c \
-	malloc2.c manuel1.c \
-	manuel2.c manuel3.c \
-	memalign_test.c memcmptest.c \
-	memtests.cpp mmaptest.c \
-	oneparam.c pushfpopf_c.c \
-	pushfpopf.s rcl_assert.s \
-	rcrl.c readline1.c \
-	realloc1.c sha1.test.c \
-	shortpush.c shorts.c \
-	signal1.c signal2.c \
-	signal3.c smc1.c \
-	suppfree.c tronical.c \
-	tronical.s twoparams.c \
-	twoparams.s \
-	pth_cvsimple.c pth_simple_threads.c pth_simple_mutex.c \
-	bt_everything.c bt_literal.c \
-	pth_threadpool.c pth_specific.c pth_mutexspeed.c malloc3.c \
-	pth_once.c weirdioctl.c pth_signal1.c pth_signal2.c \
-	discard.c pth_semaphore1.c new_override.cpp pth_yield.c \
-	sigaltstack.c erringfds.c sigwait_all.c \
-	pth_cancel1.c pth_cancel2.c pth_signal_gober.c nanoleak.c \
-	pth_pause.c pth_sigpending.c pth_atfork1.c resolv.c
+## Process this file with automake to produce Makefile.in
+
+SUBDIRS = cachesim corecheck memcheck none # eraser lackey
+
diff --git a/tests/bitfield1.c b/tests/bitfield1.c
deleted file mode 100644
index 4a7a61a..0000000
--- a/tests/bitfield1.c
+++ /dev/null
@@ -1,18 +0,0 @@
-
-#include <malloc.h>
-
-typedef
-   struct {
-      int          x;
-      unsigned int y:1;
-      int          z;
-   } 
-   Fooble;
-
-void main ( void )
-{
-   Fooble* f = malloc(sizeof(Fooble));
-   f->x = 1;
-   f->z = 1;
-   f->y = (f == (Fooble*)17 ? 1 : 0);
-}
diff --git a/tests/cachesim/.cvsignore b/tests/cachesim/.cvsignore
new file mode 100644
index 0000000..b1e9802
--- /dev/null
+++ b/tests/cachesim/.cvsignore
@@ -0,0 +1,9 @@
+Makefile.in
+Makefile
+dlclose
+fpu-28-108
+cachegrind.out.*
+*.stdout.diff
+*.stderr.diff
+*.stdout.out
+*.stderr.out
diff --git a/tests/cachesim/Makefile.am b/tests/cachesim/Makefile.am
new file mode 100644
index 0000000..f4e0f44
--- /dev/null
+++ b/tests/cachesim/Makefile.am
@@ -0,0 +1,25 @@
+## Process this file with automake to produce Makefile.in
+
+##---------------------------------------------------------------------------
+## Cachegrind ones.
+##---------------------------------------------------------------------------
+
+noinst_PROGRAMS = \
+	dlclose fpu-28-108 myprint.so
+
+CFLAGS   = $(WERROR) -Winline -Wall -Wshadow -g
+CXXFLAGS = $(CFLAGS) 
+
+# C ones
+dlclose_SOURCES		= dlclose.c
+dlclose_LDADD		= -ldl
+myprint_so_SOURCES	= myprint.c
+myprint_so_LDFLAGS	= -shared
+
+fpu_28_108_SOURCES	= fpu-28-108.S
+
+##myprint.so$(EXEEXT): $(myprint_so_OBJECTS)
+##	$(CC) $(CFLAGS) -shared -o myprint.so $(myprint_so_OBJECTS)
+
+
+
diff --git a/tests/cachesim/dlclose.c b/tests/cachesim/dlclose.c
new file mode 100644
index 0000000..9fee030
--- /dev/null
+++ b/tests/cachesim/dlclose.c
@@ -0,0 +1,38 @@
+/* This exercises the code that was causing this bug:
+  
+     valgrind: vg_cachesim.c:389 (get_BBCC): Assertion `((Bool)0) == remove' 
+     failed.
+
+   in Cachegrind 1.0.0 and 1.0.1, that was caused by unloading symbols before
+   invalidating translations.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <dlfcn.h>
+
+int main(int argc, char **argv) {
+   void *handle;
+   void (*myprint)(void);
+   char *error;
+
+   handle = dlopen ("./myprint.so", RTLD_LAZY);
+   if (!handle) {
+       fputs (dlerror(), stderr);
+       exit(1);
+   }
+
+   myprint = dlsym(handle, "myprint");
+   if ((error = dlerror()) != NULL)  {
+       fprintf (stderr, "%s\n", error);
+       exit(1);
+   }
+
+   (*myprint)();
+
+   /* Assertion failure was happening here */
+   dlclose(handle);
+
+   return 0;
+}
+
diff --git a/tests/cachesim/dlclose.stderr.exp b/tests/cachesim/dlclose.stderr.exp
new file mode 100644
index 0000000..89483cd
--- /dev/null
+++ b/tests/cachesim/dlclose.stderr.exp
@@ -0,0 +1,19 @@
+
+discard ... (... -> ...) translations in range 0x........ .. 0x........
+discard syms in /.../tests/cachesim/myprint.so due to munmap()
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/tests/cachesim/dlclose.stderr.exp.hd b/tests/cachesim/dlclose.stderr.exp.hd
new file mode 100644
index 0000000..89483cd
--- /dev/null
+++ b/tests/cachesim/dlclose.stderr.exp.hd
@@ -0,0 +1,19 @@
+
+discard ... (... -> ...) translations in range 0x........ .. 0x........
+discard syms in /.../tests/cachesim/myprint.so due to munmap()
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/tests/cachesim/dlclose.stdout.exp b/tests/cachesim/dlclose.stdout.exp
new file mode 100644
index 0000000..890082f
--- /dev/null
+++ b/tests/cachesim/dlclose.stdout.exp
@@ -0,0 +1 @@
+This is myprint!
diff --git a/tests/cachesim/dlclose.vgtest b/tests/cachesim/dlclose.vgtest
new file mode 100644
index 0000000..e014f34
--- /dev/null
+++ b/tests/cachesim/dlclose.vgtest
@@ -0,0 +1,3 @@
+vgopts.hd: --cachesim=yes
+prog: dlclose
+stderr_filter: filter_cachesim_discards
diff --git a/tests/cachesim/filter_cachesim_discards b/tests/cachesim/filter_cachesim_discards
new file mode 100755
index 0000000..a4f6732
--- /dev/null
+++ b/tests/cachesim/filter_cachesim_discards
@@ -0,0 +1,5 @@
+#! /bin/sh
+
+dir=`dirname $0`
+
+$dir/filter_stderr | $dir/../filter_discards
diff --git a/tests/cachesim/filter_stderr b/tests/cachesim/filter_stderr
new file mode 100755
index 0000000..c33214c
--- /dev/null
+++ b/tests/cachesim/filter_stderr
@@ -0,0 +1,12 @@
+#! /bin/sh
+
+dir=`dirname $0`
+
+$dir/../filter_stderr_basic                         |
+
+# Remove numbers from I/D/L2 "refs:" lines
+sed "s/\(\(I\|D\|L2\) *refs:\)[ 0-9,()+rdw]*$/\1/"  |
+
+# Remove numbers from I1/D1/L2/L2i/L2d "misses:" and "miss rates:" lines
+sed "s/\(\(I1\|D1\|L2\|L2i\|L2d\) *\(misses\|miss rate\):\)[ 0-9,()+rdw%\.]*$/\1/" 
+
diff --git a/tests/cachesim/fpu-28-108.S b/tests/cachesim/fpu-28-108.S
new file mode 100644
index 0000000..f655c00
--- /dev/null
+++ b/tests/cachesim/fpu-28-108.S
@@ -0,0 +1,24 @@
+/* Test 28 and 108 byte loads and stores.  (Just make sure program
+   runs without any assertion failures from V.) */
+
+/* Useful listing: 
+	gcc -o tests/fpu_28_108 tests/fpu_28_108.S -Wa,-a */
+
+.data
+fooble:
+        .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+        .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+        .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+bar:
+        
+.text
+.globl main
+main:
+        fstsw   fooble
+        fsave   fooble
+        frstor  fooble
+        fstenv  fooble
+        fldenv  fooble
+        movl    $0, %eax
+        ret
+
diff --git a/tests/cachesim/fpu-28-108.stderr.exp b/tests/cachesim/fpu-28-108.stderr.exp
new file mode 100644
index 0000000..8eaf654
--- /dev/null
+++ b/tests/cachesim/fpu-28-108.stderr.exp
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/tests/cachesim/fpu-28-108.stderr.exp.hd b/tests/cachesim/fpu-28-108.stderr.exp.hd
new file mode 100644
index 0000000..8eaf654
--- /dev/null
+++ b/tests/cachesim/fpu-28-108.stderr.exp.hd
@@ -0,0 +1,17 @@
+
+
+I   refs:
+I1  misses:
+L2i misses:
+I1  miss rate:
+L2i miss rate:
+
+D   refs:
+D1  misses:
+L2d misses:
+D1  miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
diff --git a/tests/cachesim/fpu-28-108.vgtest b/tests/cachesim/fpu-28-108.vgtest
new file mode 100644
index 0000000..42d57a3
--- /dev/null
+++ b/tests/cachesim/fpu-28-108.vgtest
@@ -0,0 +1,2 @@
+vgopts.hd: --cachesim=yes
+prog: fpu-28-108
diff --git a/tests/cachesim/myprint.c b/tests/cachesim/myprint.c
new file mode 100644
index 0000000..e22ae87
--- /dev/null
+++ b/tests/cachesim/myprint.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+void myprint(void)
+{
+   puts("This is myprint!");
+}
diff --git a/tests/coolo_strlen.s b/tests/coolo_strlen.s
deleted file mode 100644
index 6cadfcc..0000000
--- a/tests/coolo_strlen.s
+++ /dev/null
@@ -1,90 +0,0 @@
-	.file	"coolo_strlen.c"
-	.version	"01.01"
-gcc2_compiled.:
-.section	.rodata
-.LC0:
-	.string	"HALLO"
-.globl memset
-.LC1:
-	.string	"THis is a very long strings"
-.text
-	.align 4
-.globl main
-	.type	 main,@function
-main:
-	movl .LC0,%eax
-	pushl %ebp
-	movl %esp,%ebp
-	subl $216,%esp
-	movl %eax,-200(%ebp)
-	movw .LC0+4,%ax
-	movw %ax,-196(%ebp)
-	leal -194(%ebp),%eax
-	addl $-4,%esp
-	pushl $194
-	pushl $0
-	pushl %eax
-	call memset
-	addl $16,%esp
-	addl $-12,%esp
-	addl $-8,%esp
-	pushl $.LC1
-	leal -200(%ebp),%eax
-	pushl %eax
-	call strcat
-	addl $16,%esp
-	pushl %eax
-	call __strdup
-	movl %eax,%edx
-	movl %edx,%ecx
-	andl $3,%ecx
-	je .L105
-	jp .L110
-	cmpl $2,%ecx
-	je .L111
-	cmpb %ch,(%eax)
-	je .L109
-	incl %eax
-.L111:
-	cmpb %ch,(%eax)
-	je .L109
-	incl %eax
-.L110:
-	cmpb %ch,(%eax)
-	je .L109
-	incl %eax
-.L105:
-	movl (%eax),%ecx
-	testb %ch,%cl
-	jne .L106
-	testb %cl,%cl
-	je .L109
-	testb %ch,%ch
-	je .L108
-.L106:
-	testl $16711680,%ecx
-	je .L107
-	addl $4,%eax
-	testl $-16777216,%ecx
-	jne .L105
-	subl $3,%eax
-.L107:
-	incl %eax
-.L108:
-	incl %eax
-.L109:
-	subl %edx,%eax
-	cmpl $11,%eax
-	jle .L102
-	movl $1,%eax
-	jmp .L104
-	.p2align 4,,7
-.L102:
-	xorl %eax,%eax
-.L104:
-	movl %ebp,%esp
-	popl %ebp
-	ret
-.Lfe1:
-	.size	 main,.Lfe1-main
-	.ident	"GCC: (GNU) 2.95.3 20010315 (release)"
diff --git a/tests/corecheck/.cvsignore b/tests/corecheck/.cvsignore
new file mode 100644
index 0000000..923e2f8
--- /dev/null
+++ b/tests/corecheck/.cvsignore
@@ -0,0 +1,10 @@
+Makefile.in
+Makefile
+erringfds
+malloc3
+sigkill
+pth_empty
+*.stdout.diff
+*.stderr.diff
+*.stdout.out
+*.stderr.out
diff --git a/tests/corecheck/Makefile.am b/tests/corecheck/Makefile.am
new file mode 100644
index 0000000..26e906e
--- /dev/null
+++ b/tests/corecheck/Makefile.am
@@ -0,0 +1,36 @@
+## Process this file with automake to produce Makefile.in
+
+##---------------------------------------------------------------------------
+## These test core error checking, eg. "silly values" for malloc/calloc,
+## pthread errors (and suppressions), signal handling errors, invalid fds for
+## blocking syscalls, etc.
+##---------------------------------------------------------------------------
+
+noinst_PROGRAMS = \
+	erringfds malloc3 sigkill \
+	pth_atfork1 pth_cancel2 pth_cvsimple pth_empty \
+	pth_mutexspeed pth_once
+
+CFLAGS   = $(WERROR) -Winline -Wall -Wshadow -g
+CXXFLAGS = $(CFLAGS)
+
+# C ones
+erringfds_SOURCES 	= erringfds.c
+malloc3_SOURCES 	= malloc3.c
+sigkill_SOURCES 	= sigkill.c
+
+# Pthread ones
+pth_atfork1_SOURCES	= pth_atfork1.c
+pth_atfork1_LDADD	= -lpthread
+pth_cancel2_SOURCES	= pth_cancel2.c
+pth_cancel2_LDADD	= -lpthread
+pth_cvsimple_SOURCES	= pth_cvsimple.c
+pth_cvsimple_LDADD	= -lpthread
+pth_empty_SOURCES 	= pth_empty.c
+pth_empty_LDADD 	= -lpthread
+pth_mutexspeed_SOURCES	= pth_mutexspeed.c
+pth_mutexspeed_LDADD	= -lpthread
+pth_once_SOURCES	= pth_once.c
+pth_once_LDADD		= -lpthread
+
+
diff --git a/tests/erringfds.c b/tests/corecheck/erringfds.c
similarity index 100%
rename from tests/erringfds.c
rename to tests/corecheck/erringfds.c
diff --git a/tests/corecheck/erringfds.stderr.exp b/tests/corecheck/erringfds.stderr.exp
new file mode 100644
index 0000000..b6a487d
--- /dev/null
+++ b/tests/corecheck/erringfds.stderr.exp
@@ -0,0 +1,4 @@
+
+Warning: invalid file descriptor -1 in syscall read()
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
diff --git a/tests/corecheck/erringfds.stderr.exp.hd b/tests/corecheck/erringfds.stderr.exp.hd
new file mode 100644
index 0000000..0516e09
--- /dev/null
+++ b/tests/corecheck/erringfds.stderr.exp.hd
@@ -0,0 +1,8 @@
+
+Warning: invalid file descriptor -1 in syscall read()
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/corecheck/erringfds.stdout.exp b/tests/corecheck/erringfds.stdout.exp
new file mode 100644
index 0000000..bcc1770
--- /dev/null
+++ b/tests/corecheck/erringfds.stdout.exp
@@ -0,0 +1,2 @@
+fd = -1
+n = -1
diff --git a/tests/corecheck/erringfds.vgtest b/tests/corecheck/erringfds.vgtest
new file mode 100644
index 0000000..5a8ede4
--- /dev/null
+++ b/tests/corecheck/erringfds.vgtest
@@ -0,0 +1 @@
+prog: erringfds
diff --git a/tests/corecheck/filter_stderr b/tests/corecheck/filter_stderr
new file mode 100755
index 0000000..31c5258
--- /dev/null
+++ b/tests/corecheck/filter_stderr
@@ -0,0 +1,5 @@
+#! /bin/sh
+
+dir=`dirname $0`
+
+$dir/../filter_stderr_basic
diff --git a/tests/malloc3.c b/tests/corecheck/malloc3.c
similarity index 100%
rename from tests/malloc3.c
rename to tests/corecheck/malloc3.c
diff --git a/tests/corecheck/malloc3.stderr.exp b/tests/corecheck/malloc3.stderr.exp
new file mode 100644
index 0000000..97c1780
--- /dev/null
+++ b/tests/corecheck/malloc3.stderr.exp
@@ -0,0 +1,6 @@
+
+Warning: silly arg (-1) to malloc()
+Warning: silly args (0,-1) to calloc()
+Warning: silly args (-1,-1) to calloc()
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
diff --git a/tests/corecheck/malloc3.stderr.exp.hd b/tests/corecheck/malloc3.stderr.exp.hd
new file mode 100644
index 0000000..9a908f3
--- /dev/null
+++ b/tests/corecheck/malloc3.stderr.exp.hd
@@ -0,0 +1,10 @@
+
+Warning: silly arg (-1) to malloc()
+Warning: silly args (0,-1) to calloc()
+Warning: silly args (-1,-1) to calloc()
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 2 allocs, 2 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/corecheck/malloc3.stdout.exp b/tests/corecheck/malloc3.stdout.exp
new file mode 100644
index 0000000..681c9ec
--- /dev/null
+++ b/tests/corecheck/malloc3.stdout.exp
@@ -0,0 +1,5 @@
+malloc(0) = 0x........
+malloc(-1) = (nil)
+calloc(0,1) = 0x........
+calloc(0,-1) = (nil)
+calloc(-1,-1) = (nil)
diff --git a/tests/corecheck/malloc3.vgtest b/tests/corecheck/malloc3.vgtest
new file mode 100644
index 0000000..9feb8f0
--- /dev/null
+++ b/tests/corecheck/malloc3.vgtest
@@ -0,0 +1,2 @@
+prog: malloc3
+stdout_filter: ../filter_addresses
diff --git a/tests/pth_atfork1.c b/tests/corecheck/pth_atfork1.c
similarity index 100%
rename from tests/pth_atfork1.c
rename to tests/corecheck/pth_atfork1.c
diff --git a/tests/corecheck/pth_atfork1.stderr.exp b/tests/corecheck/pth_atfork1.stderr.exp
new file mode 100644
index 0000000..49f4fb4
--- /dev/null
+++ b/tests/corecheck/pth_atfork1.stderr.exp
@@ -0,0 +1,5 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
diff --git a/tests/corecheck/pth_atfork1.stderr.exp.hd b/tests/corecheck/pth_atfork1.stderr.exp.hd
new file mode 100644
index 0000000..77357fb
--- /dev/null
+++ b/tests/corecheck/pth_atfork1.stderr.exp.hd
@@ -0,0 +1,13 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 1 frees, 12 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 1 frees, 12 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/corecheck/pth_atfork1.stdout.exp b/tests/corecheck/pth_atfork1.stdout.exp
new file mode 100644
index 0000000..089bcff
--- /dev/null
+++ b/tests/corecheck/pth_atfork1.stdout.exp
@@ -0,0 +1,4 @@
+prepare
+child
+prepare
+parent
diff --git a/tests/corecheck/pth_atfork1.vgtest b/tests/corecheck/pth_atfork1.vgtest
new file mode 100644
index 0000000..237ff87
--- /dev/null
+++ b/tests/corecheck/pth_atfork1.vgtest
@@ -0,0 +1 @@
+prog: pth_atfork1
diff --git a/tests/corecheck/pth_cancel2.c b/tests/corecheck/pth_cancel2.c
new file mode 100644
index 0000000..5bd7d0b
--- /dev/null
+++ b/tests/corecheck/pth_cancel2.c
@@ -0,0 +1,101 @@
+/********************************************************
+ * An example source module to accompany...
+ *
+ * "Using POSIX Threads: Programming with Pthreads"
+ *     by Brad nichols, Dick Buttlar, Jackie Farrell
+ *     O'Reilly & Associates, Inc.
+ *
+ ********************************************************
+ * async_safe --
+ *
+ * Example showing macro wrappers for calling non-async
+ * safe routines when the caller has asynchronous 
+ * cancellation turned on
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <pthread.h>
+
+
+#define async_cancel_safe_read(fd,buf,amt) \
+   { \
+      int oldtype; \
+      pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, &oldtype); \
+      if (read(fd,buf,amt) < 0) \
+         perror("read"),exit(1); \
+      pthread_setcanceltype(oldtype,NULL); \
+      pthread_testcancel(); \
+   } 
+   
+
+#define async_cancel_safe_write(fd,buf,amt) \
+   { \
+      int oldtype; \
+      pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, &oldtype); \
+      if (write(fd,buf,amt) < 0) \
+         perror("write"), exit(1); \
+      pthread_setcanceltype(oldtype,NULL); \
+      pthread_testcancel(); \
+   }
+
+
+static int fd;
+   
+void *io(void *arg)
+{
+   int *fd2=(int *)arg; 
+   char buf[20]="String";
+   int amt=20;
+
+   for (;;) {
+      async_cancel_safe_write(*fd2,buf,amt);
+      async_cancel_safe_read(*fd2,buf,amt);
+   }
+   return(NULL);
+}
+
+void *killer(void *arg)
+{ 
+   pthread_t * target = (pthread_t *)arg;
+   sleep(1);
+   pthread_cancel(*target);
+   return(NULL);
+}
+
+extern int
+main(void)
+{
+   pthread_t io_thread, killer_thread;   
+
+   extern void *io(void *);
+   extern void *killer(void  *);
+
+   if ((fd = open(".ktemp",O_CREAT | O_RDWR, 0666)) < 0)
+      perror("open"), exit(1);
+
+   pthread_create(&io_thread, 
+		  NULL,
+		  io,
+		  (void *)&fd);
+   pthread_create(&killer_thread,
+		  NULL,
+		  killer,
+		  (void *)&io_thread);
+
+   pthread_join(io_thread, NULL);
+
+   pthread_join(killer_thread,NULL);
+
+   if ((close(fd)) < 0)
+     perror("close"),exit(1);
+   if ((unlink(".ktemp")) < 0)
+     perror("unlink"),exit(1);
+
+   return 0;
+}
diff --git a/tests/corecheck/pth_cancel2.stderr.exp b/tests/corecheck/pth_cancel2.stderr.exp
new file mode 100644
index 0000000..6be4c1b
--- /dev/null
+++ b/tests/corecheck/pth_cancel2.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
diff --git a/tests/corecheck/pth_cancel2.stderr.exp.hd b/tests/corecheck/pth_cancel2.stderr.exp.hd
new file mode 100644
index 0000000..7dd5fed
--- /dev/null
+++ b/tests/corecheck/pth_cancel2.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 2 allocs, 2 frees, 24 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/corecheck/pth_cancel2.vgtest b/tests/corecheck/pth_cancel2.vgtest
new file mode 100644
index 0000000..7bb8e1b
--- /dev/null
+++ b/tests/corecheck/pth_cancel2.vgtest
@@ -0,0 +1 @@
+prog: pth_cancel2
diff --git a/tests/corecheck/pth_cvsimple.c b/tests/corecheck/pth_cvsimple.c
new file mode 100644
index 0000000..3bb5085
--- /dev/null
+++ b/tests/corecheck/pth_cvsimple.c
@@ -0,0 +1,83 @@
+/********************************************************
+ * An example source module to accompany...
+ *
+ * "Using POSIX Threads: Programming with Pthreads"
+ *     by Brad nichols, Dick Buttlar, Jackie Farrell
+ *     O'Reilly & Associates, Inc.
+ *
+ ********************************************************
+ *
+ * cvsimple.c
+ *
+ * Demonstrates pthread cancellation.
+ *
+ */
+
+#include <stdio.h>
+#include <pthread.h>
+
+#define NUM_THREADS  3
+#define TCOUNT 10
+#define COUNT_THRES 12
+
+int     count = 0;
+int     thread_ids[3] = {0,1,2};
+pthread_mutex_t count_lock=PTHREAD_MUTEX_INITIALIZER; 
+pthread_cond_t count_hit_threshold=PTHREAD_COND_INITIALIZER; 
+
+void *inc_count(void *idp)
+{
+  int i=0;
+  int *my_id = idp;
+
+  for (i=0; i<TCOUNT; i++) {
+    pthread_mutex_lock(&count_lock);
+    count++;
+    printf("inc_counter(): thread %d, count = %d, unlocking mutex\n", 
+	   *my_id, count);
+    if (count == COUNT_THRES) {
+      printf("inc_count(): Thread %d, count %d\n", *my_id, count);
+      pthread_cond_signal(&count_hit_threshold);
+    }
+    pthread_mutex_unlock(&count_lock);
+  }
+  
+  return(NULL);
+}
+
+void *watch_count(void *idp)
+{
+  int *my_id = idp;
+
+  printf("watch_count(): thread %d\n", *my_id);
+  fflush(stdout);
+  pthread_mutex_lock(&count_lock);
+
+  while (count < COUNT_THRES) {
+    pthread_cond_wait(&count_hit_threshold, &count_lock);
+    printf("watch_count(): thread %d, count %d\n", *my_id, count);
+  }
+
+  pthread_mutex_unlock(&count_lock);
+  
+  return(NULL);
+}
+
+extern int
+main(void)
+{
+  int       i;
+  pthread_t threads[3];
+
+  pthread_create(&threads[0], NULL, inc_count, (void *)&thread_ids[0]);
+  pthread_create(&threads[1], NULL, inc_count, (void *)&thread_ids[1]);
+  pthread_create(&threads[2], NULL, watch_count, (void *)&thread_ids[2]);
+
+  for (i = 0; i < NUM_THREADS; i++) {
+    pthread_join(threads[i], NULL);
+  }
+
+  return 0;
+}
+
+
diff --git a/tests/corecheck/pth_cvsimple.stderr.exp b/tests/corecheck/pth_cvsimple.stderr.exp
new file mode 100644
index 0000000..6be4c1b
--- /dev/null
+++ b/tests/corecheck/pth_cvsimple.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
diff --git a/tests/corecheck/pth_cvsimple.stderr.exp.hd b/tests/corecheck/pth_cvsimple.stderr.exp.hd
new file mode 100644
index 0000000..17aa2dd
--- /dev/null
+++ b/tests/corecheck/pth_cvsimple.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 3 allocs, 3 frees, 36 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/corecheck/pth_cvsimple.stdout.exp b/tests/corecheck/pth_cvsimple.stdout.exp
new file mode 100644
index 0000000..92dab19
--- /dev/null
+++ b/tests/corecheck/pth_cvsimple.stdout.exp
@@ -0,0 +1,22 @@
+inc_counter(): thread 0, count = 1, unlocking mutex
+inc_counter(): thread 0, count = 2, unlocking mutex
+inc_counter(): thread 0, count = 3, unlocking mutex
+inc_counter(): thread 0, count = 4, unlocking mutex
+inc_counter(): thread 0, count = 5, unlocking mutex
+inc_counter(): thread 0, count = 6, unlocking mutex
+inc_counter(): thread 0, count = 7, unlocking mutex
+inc_counter(): thread 0, count = 8, unlocking mutex
+inc_counter(): thread 0, count = 9, unlocking mutex
+inc_counter(): thread 0, count = 10, unlocking mutex
+inc_counter(): thread 1, count = 11, unlocking mutex
+inc_counter(): thread 1, count = 12, unlocking mutex
+inc_count(): Thread 1, count 12
+inc_counter(): thread 1, count = 13, unlocking mutex
+inc_counter(): thread 1, count = 14, unlocking mutex
+inc_counter(): thread 1, count = 15, unlocking mutex
+inc_counter(): thread 1, count = 16, unlocking mutex
+inc_counter(): thread 1, count = 17, unlocking mutex
+inc_counter(): thread 1, count = 18, unlocking mutex
+inc_counter(): thread 1, count = 19, unlocking mutex
+inc_counter(): thread 1, count = 20, unlocking mutex
+watch_count(): thread 2
diff --git a/tests/corecheck/pth_cvsimple.vgtest b/tests/corecheck/pth_cvsimple.vgtest
new file mode 100644
index 0000000..df57004
--- /dev/null
+++ b/tests/corecheck/pth_cvsimple.vgtest
@@ -0,0 +1 @@
+prog: pth_cvsimple
diff --git a/tests/corecheck/pth_empty.c b/tests/corecheck/pth_empty.c
new file mode 100644
index 0000000..c936a4c
--- /dev/null
+++ b/tests/corecheck/pth_empty.c
@@ -0,0 +1,7 @@
+// Does nothing, but linking it with -lpthread is enough to trigger an error
+// that should be suppressed when it is run.
+
+int main(void)
+{
+   return 0;
+}
diff --git a/tests/corecheck/pth_empty.stderr.exp b/tests/corecheck/pth_empty.stderr.exp
new file mode 100644
index 0000000..6be4c1b
--- /dev/null
+++ b/tests/corecheck/pth_empty.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
diff --git a/tests/corecheck/pth_empty.stderr.exp.hd b/tests/corecheck/pth_empty.stderr.exp.hd
new file mode 100644
index 0000000..6d763a7
--- /dev/null
+++ b/tests/corecheck/pth_empty.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/corecheck/pth_empty.vgtest b/tests/corecheck/pth_empty.vgtest
new file mode 100644
index 0000000..b56f5fe
--- /dev/null
+++ b/tests/corecheck/pth_empty.vgtest
@@ -0,0 +1 @@
+prog: pth_empty
diff --git a/tests/pth_mutexspeed.c b/tests/corecheck/pth_mutexspeed.c
similarity index 100%
rename from tests/pth_mutexspeed.c
rename to tests/corecheck/pth_mutexspeed.c
diff --git a/tests/corecheck/pth_mutexspeed.stderr.exp b/tests/corecheck/pth_mutexspeed.stderr.exp
new file mode 100644
index 0000000..6be4c1b
--- /dev/null
+++ b/tests/corecheck/pth_mutexspeed.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
diff --git a/tests/corecheck/pth_mutexspeed.stderr.exp.hd b/tests/corecheck/pth_mutexspeed.stderr.exp.hd
new file mode 100644
index 0000000..6d763a7
--- /dev/null
+++ b/tests/corecheck/pth_mutexspeed.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/corecheck/pth_mutexspeed.stdout.exp b/tests/corecheck/pth_mutexspeed.stdout.exp
new file mode 100644
index 0000000..8208168
--- /dev/null
+++ b/tests/corecheck/pth_mutexspeed.stdout.exp
@@ -0,0 +1,2 @@
+begin 100000 lock--unlocks
+done  100000 lock--unlocks
diff --git a/tests/corecheck/pth_mutexspeed.vgtest b/tests/corecheck/pth_mutexspeed.vgtest
new file mode 100644
index 0000000..3daee3a
--- /dev/null
+++ b/tests/corecheck/pth_mutexspeed.vgtest
@@ -0,0 +1 @@
+prog: pth_mutexspeed
diff --git a/tests/corecheck/pth_once.c b/tests/corecheck/pth_once.c
new file mode 100644
index 0000000..75f6a1f
--- /dev/null
+++ b/tests/corecheck/pth_once.c
@@ -0,0 +1,82 @@
+/********************************************************
+ * An example source module to accompany...
+ *
+ * "Using POSIX Threads: Programming with Pthreads"
+ *     by Brad nichols, Dick Buttlar, Jackie Farrell
+ *     O'Reilly & Associates, Inc.
+ *
+ ********************************************************
+ * once_exam.c
+ *
+ * An example of using the pthreads_once() call to execute an
+ * initialization procedure.
+ *
+ * A program spawns multiple threads and each one tries to
+ * execute the routine welcome() using the once call. Only
+ * the first thread into the once routine will actually
+ * execute welcome().
+ *
+ * The program's main thread synchronizes its exit with the
+ * exit of the threads using the pthread_join() operation.
+ *
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+
+#include <pthread.h>
+
+#define  NUM_THREADS   10
+
+static pthread_once_t welcome_once_block = PTHREAD_ONCE_INIT;
+
+void welcome(void)
+{
+	printf("welcome: Welcome\n");
+}
+
+void *identify_yourself(void *arg)
+{
+        int *pid=(int *)arg;
+	int rtn;
+
+	if ((rtn = pthread_once(&welcome_once_block,
+			        welcome)) != 0) {
+		fprintf(stderr, "pthread_once failed with %d",rtn);
+		pthread_exit((void *)NULL);
+	}
+	printf("identify_yourself: Hi, I'm thread # %d\n",*pid);
+        return(NULL);
+}
+
+extern int
+main(void)
+{
+	int             *id_arg, thread_num, rtn;
+	pthread_t       threads[NUM_THREADS];
+
+	id_arg = (int *)malloc(NUM_THREADS*sizeof(int));
+
+	for (thread_num = 0; thread_num < NUM_THREADS; (thread_num)++) {
+
+		id_arg[thread_num] = thread_num;
+
+		if (( rtn = pthread_create(&threads[thread_num], 
+					   NULL,
+					   identify_yourself,
+					   (void *) &(id_arg[thread_num]))) 
+		    != 0) {
+		  fprintf(stderr, "pthread_create failed with %d",rtn);
+		  exit(1);
+		}
+	} 	
+
+	for (thread_num = 0; thread_num < NUM_THREADS; thread_num++) {
+	  pthread_join(threads[thread_num], NULL);
+	  printf("main: joined to thread %d\n", thread_num);
+	}
+	printf("main: Goodbye\n");
+        return 0;
+}
diff --git a/tests/corecheck/pth_once.stderr.exp b/tests/corecheck/pth_once.stderr.exp
new file mode 100644
index 0000000..6be4c1b
--- /dev/null
+++ b/tests/corecheck/pth_once.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
diff --git a/tests/corecheck/pth_once.stderr.exp.hd b/tests/corecheck/pth_once.stderr.exp.hd
new file mode 100644
index 0000000..515d565
--- /dev/null
+++ b/tests/corecheck/pth_once.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 40 bytes in 1 blocks.
+malloc/free: 11 allocs, 10 frees, 160 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/corecheck/pth_once.stdout.exp b/tests/corecheck/pth_once.stdout.exp
new file mode 100644
index 0000000..97e25d1
--- /dev/null
+++ b/tests/corecheck/pth_once.stdout.exp
@@ -0,0 +1,22 @@
+welcome: Welcome
+identify_yourself: Hi, I'm thread # 0
+identify_yourself: Hi, I'm thread # 1
+identify_yourself: Hi, I'm thread # 2
+identify_yourself: Hi, I'm thread # 3
+identify_yourself: Hi, I'm thread # 4
+identify_yourself: Hi, I'm thread # 5
+identify_yourself: Hi, I'm thread # 6
+identify_yourself: Hi, I'm thread # 7
+identify_yourself: Hi, I'm thread # 8
+identify_yourself: Hi, I'm thread # 9
+main: joined to thread 0
+main: joined to thread 1
+main: joined to thread 2
+main: joined to thread 3
+main: joined to thread 4
+main: joined to thread 5
+main: joined to thread 6
+main: joined to thread 7
+main: joined to thread 8
+main: joined to thread 9
+main: Goodbye
diff --git a/tests/corecheck/pth_once.vgtest b/tests/corecheck/pth_once.vgtest
new file mode 100644
index 0000000..50bc5b4
--- /dev/null
+++ b/tests/corecheck/pth_once.vgtest
@@ -0,0 +1 @@
+prog: pth_once
diff --git a/tests/corecheck/sigkill.c b/tests/corecheck/sigkill.c
new file mode 100644
index 0000000..6c18d4b
--- /dev/null
+++ b/tests/corecheck/sigkill.c
@@ -0,0 +1,35 @@
+
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+
+static void
+abend (int sig)
+{
+  printf ("Abended on signal %d\n", sig);
+  exit (2);
+}
+
+int
+main (void)
+{
+  struct sigaction  sa;
+
+  int i;
+  for (i = 1; i <= 65; i++) {
+     sa.sa_flags   = 0;
+     sigemptyset( &sa.sa_mask );
+     sa.sa_handler = abend;
+     errno = 0;
+     fprintf(stderr,"setting signal %d: ", i);
+     sigaction (i /*SIGKILL*/, &sa, NULL);
+     perror ("");
+     errno = 0;
+     fprintf(stderr,"getting signal %d: ", i);
+     sigaction (i /*SIGKILL*/, NULL, &sa);
+     perror ("");
+     fprintf(stderr,"\n");
+  }
+  return 0;
+}
diff --git a/tests/corecheck/sigkill.stderr.exp b/tests/corecheck/sigkill.stderr.exp
new file mode 100644
index 0000000..563be09
--- /dev/null
+++ b/tests/corecheck/sigkill.stderr.exp
@@ -0,0 +1,202 @@
+
+setting signal 1: Success
+getting signal 1: Success
+
+setting signal 2: Success
+getting signal 2: Success
+
+setting signal 3: Success
+getting signal 3: Success
+
+setting signal 4: Success
+getting signal 4: Success
+
+setting signal 5: Success
+getting signal 5: Success
+
+setting signal 6: Success
+getting signal 6: Success
+
+setting signal 7: Success
+getting signal 7: Success
+
+setting signal 8: Success
+getting signal 8: Success
+
+setting signal 9: Warning: attempt to set SIGKILL handler in __NR_sigaction.
+Invalid argument
+getting signal 9: Success
+
+setting signal 10: Success
+getting signal 10: Success
+
+setting signal 11: Success
+getting signal 11: Success
+
+setting signal 12: Success
+getting signal 12: Success
+
+setting signal 13: Success
+getting signal 13: Success
+
+setting signal 14: Success
+getting signal 14: Success
+
+setting signal 15: Success
+getting signal 15: Success
+
+setting signal 16: Success
+getting signal 16: Success
+
+setting signal 17: Success
+getting signal 17: Success
+
+setting signal 18: Success
+getting signal 18: Success
+
+setting signal 19: Warning: attempt to set SIGSTOP handler in __NR_sigaction.
+Invalid argument
+getting signal 19: Success
+
+setting signal 20: Success
+getting signal 20: Success
+
+setting signal 21: Success
+getting signal 21: Success
+
+setting signal 22: Success
+getting signal 22: Success
+
+setting signal 23: Success
+getting signal 23: Success
+
+setting signal 24: Success
+getting signal 24: Success
+
+setting signal 25: Success
+getting signal 25: Success
+
+setting signal 26: Success
+getting signal 26: Success
+
+setting signal 27: Success
+getting signal 27: Success
+
+setting signal 28: Success
+getting signal 28: Success
+
+setting signal 29: Success
+getting signal 29: Success
+
+setting signal 30: Success
+getting signal 30: Success
+
+setting signal 31: Success
+getting signal 31: Success
+
+setting signal 32: Success
+getting signal 32: Success
+
+setting signal 33: Success
+getting signal 33: Success
+
+setting signal 34: Success
+getting signal 34: Success
+
+setting signal 35: Success
+getting signal 35: Success
+
+setting signal 36: Success
+getting signal 36: Success
+
+setting signal 37: Success
+getting signal 37: Success
+
+setting signal 38: Success
+getting signal 38: Success
+
+setting signal 39: Success
+getting signal 39: Success
+
+setting signal 40: Success
+getting signal 40: Success
+
+setting signal 41: Success
+getting signal 41: Success
+
+setting signal 42: Success
+getting signal 42: Success
+
+setting signal 43: Success
+getting signal 43: Success
+
+setting signal 44: Success
+getting signal 44: Success
+
+setting signal 45: Success
+getting signal 45: Success
+
+setting signal 46: Success
+getting signal 46: Success
+
+setting signal 47: Success
+getting signal 47: Success
+
+setting signal 48: Success
+getting signal 48: Success
+
+setting signal 49: Success
+getting signal 49: Success
+
+setting signal 50: Success
+getting signal 50: Success
+
+setting signal 51: Success
+getting signal 51: Success
+
+setting signal 52: Success
+getting signal 52: Success
+
+setting signal 53: Success
+getting signal 53: Success
+
+setting signal 54: Success
+getting signal 54: Success
+
+setting signal 55: Success
+getting signal 55: Success
+
+setting signal 56: Success
+getting signal 56: Success
+
+setting signal 57: Success
+getting signal 57: Success
+
+setting signal 58: Success
+getting signal 58: Success
+
+setting signal 59: Success
+getting signal 59: Success
+
+setting signal 60: Success
+getting signal 60: Success
+
+setting signal 61: Success
+getting signal 61: Success
+
+setting signal 62: Success
+getting signal 62: Success
+
+setting signal 63: Success
+getting signal 63: Success
+
+setting signal 64: Success
+getting signal 64: Success
+
+setting signal 65: Warning: bad signal number 65 in __NR_sigaction.
+Invalid argument
+getting signal 65: Warning: bad signal number 65 in __NR_sigaction.
+Invalid argument
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
diff --git a/tests/corecheck/sigkill.stderr.exp.hd b/tests/corecheck/sigkill.stderr.exp.hd
new file mode 100644
index 0000000..69b457f
--- /dev/null
+++ b/tests/corecheck/sigkill.stderr.exp.hd
@@ -0,0 +1,206 @@
+
+setting signal 1: Success
+getting signal 1: Success
+
+setting signal 2: Success
+getting signal 2: Success
+
+setting signal 3: Success
+getting signal 3: Success
+
+setting signal 4: Success
+getting signal 4: Success
+
+setting signal 5: Success
+getting signal 5: Success
+
+setting signal 6: Success
+getting signal 6: Success
+
+setting signal 7: Success
+getting signal 7: Success
+
+setting signal 8: Success
+getting signal 8: Success
+
+setting signal 9: Warning: attempt to set SIGKILL handler in __NR_sigaction.
+Invalid argument
+getting signal 9: Success
+
+setting signal 10: Success
+getting signal 10: Success
+
+setting signal 11: Success
+getting signal 11: Success
+
+setting signal 12: Success
+getting signal 12: Success
+
+setting signal 13: Success
+getting signal 13: Success
+
+setting signal 14: Success
+getting signal 14: Success
+
+setting signal 15: Success
+getting signal 15: Success
+
+setting signal 16: Success
+getting signal 16: Success
+
+setting signal 17: Success
+getting signal 17: Success
+
+setting signal 18: Success
+getting signal 18: Success
+
+setting signal 19: Warning: attempt to set SIGSTOP handler in __NR_sigaction.
+Invalid argument
+getting signal 19: Success
+
+setting signal 20: Success
+getting signal 20: Success
+
+setting signal 21: Success
+getting signal 21: Success
+
+setting signal 22: Success
+getting signal 22: Success
+
+setting signal 23: Success
+getting signal 23: Success
+
+setting signal 24: Success
+getting signal 24: Success
+
+setting signal 25: Success
+getting signal 25: Success
+
+setting signal 26: Success
+getting signal 26: Success
+
+setting signal 27: Success
+getting signal 27: Success
+
+setting signal 28: Success
+getting signal 28: Success
+
+setting signal 29: Success
+getting signal 29: Success
+
+setting signal 30: Success
+getting signal 30: Success
+
+setting signal 31: Success
+getting signal 31: Success
+
+setting signal 32: Success
+getting signal 32: Success
+
+setting signal 33: Success
+getting signal 33: Success
+
+setting signal 34: Success
+getting signal 34: Success
+
+setting signal 35: Success
+getting signal 35: Success
+
+setting signal 36: Success
+getting signal 36: Success
+
+setting signal 37: Success
+getting signal 37: Success
+
+setting signal 38: Success
+getting signal 38: Success
+
+setting signal 39: Success
+getting signal 39: Success
+
+setting signal 40: Success
+getting signal 40: Success
+
+setting signal 41: Success
+getting signal 41: Success
+
+setting signal 42: Success
+getting signal 42: Success
+
+setting signal 43: Success
+getting signal 43: Success
+
+setting signal 44: Success
+getting signal 44: Success
+
+setting signal 45: Success
+getting signal 45: Success
+
+setting signal 46: Success
+getting signal 46: Success
+
+setting signal 47: Success
+getting signal 47: Success
+
+setting signal 48: Success
+getting signal 48: Success
+
+setting signal 49: Success
+getting signal 49: Success
+
+setting signal 50: Success
+getting signal 50: Success
+
+setting signal 51: Success
+getting signal 51: Success
+
+setting signal 52: Success
+getting signal 52: Success
+
+setting signal 53: Success
+getting signal 53: Success
+
+setting signal 54: Success
+getting signal 54: Success
+
+setting signal 55: Success
+getting signal 55: Success
+
+setting signal 56: Success
+getting signal 56: Success
+
+setting signal 57: Success
+getting signal 57: Success
+
+setting signal 58: Success
+getting signal 58: Success
+
+setting signal 59: Success
+getting signal 59: Success
+
+setting signal 60: Success
+getting signal 60: Success
+
+setting signal 61: Success
+getting signal 61: Success
+
+setting signal 62: Success
+getting signal 62: Success
+
+setting signal 63: Success
+getting signal 63: Success
+
+setting signal 64: Success
+getting signal 64: Success
+
+setting signal 65: Warning: bad signal number 65 in __NR_sigaction.
+Invalid argument
+getting signal 65: Warning: bad signal number 65 in __NR_sigaction.
+Invalid argument
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/corecheck/sigkill.vgtest b/tests/corecheck/sigkill.vgtest
new file mode 100644
index 0000000..a681430
--- /dev/null
+++ b/tests/corecheck/sigkill.vgtest
@@ -0,0 +1 @@
+prog: sigkill
diff --git a/tests/exitprog.c b/tests/exitprog.c
deleted file mode 100644
index a3c2f4f..0000000
--- a/tests/exitprog.c
+++ /dev/null
@@ -1,15 +0,0 @@
-
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#define ZILLION 1000000
-
-void main ( void )
-{
-   int i;
-   char* a = malloc(ZILLION * sizeof(char));
-   for (i = 0; i <= ZILLION; i++) a[i] = 0;
-   a = (char*)177;
-   _exit(1);
-}
diff --git a/tests/filter_addresses b/tests/filter_addresses
new file mode 100755
index 0000000..2ea84cc
--- /dev/null
+++ b/tests/filter_addresses
@@ -0,0 +1,4 @@
+#! /bin/sh
+
+sed "s/0x[0-9A-Fa-f]\+/0x......../g"
+
diff --git a/tests/filter_discards b/tests/filter_discards
new file mode 100755
index 0000000..a78ffb4
--- /dev/null
+++ b/tests/filter_discards
@@ -0,0 +1,9 @@
+#! /bin/sh
+
+dir=`dirname $0`
+
+# Remove number and position of discards
+sed "s/discard [0-9]\+ ([0-9]\+ -> [0-9]\+) translations in range/discard ... (... -> ...) translations in range/"     |
+
+$dir/filter_addresses            |
+$dir/filter_test_paths
diff --git a/tests/filter_stderr_basic b/tests/filter_stderr_basic
new file mode 100755
index 0000000..fa2bea9
--- /dev/null
+++ b/tests/filter_stderr_basic
@@ -0,0 +1,17 @@
+#! /bin/sh
+
+# This filter should be applied to *every* stderr results.  It removes Valgrind
+# startup stuff and pid numbers.
+
+# Remove ==pid== and --pid-- and ++pid++ strings 
+sed "s/\(==\|--\|++\)[0-9]\{3,5\}\1 //"                                 |
+
+# Remove "<name>, a <description> for x86 GNU/Linux." line
+sed "/^.*, .* for x86 GNU\/Linux\./d"                                  | 
+
+# Remove other introductory lines
+sed "/Copyright (C) 2000-2..., and GNU GPL'd, by Julian Seward\./d"    |
+sed "/Estimated CPU clock rate is [0-9]\+ MHz/d"                       |
+sed "/For more details, rerun with: -v/d"
+
+
diff --git a/tests/filter_test_paths b/tests/filter_test_paths
new file mode 100755
index 0000000..761bcaa
--- /dev/null
+++ b/tests/filter_test_paths
@@ -0,0 +1,4 @@
+#! /bin/sh
+
+# Anonymise paths like "/local/foo/bar/tests/baz/quux" (note "tests" is there)
+sed "s/\/.*\/tests\//\/...\/tests\//"
diff --git a/tests/floored.c b/tests/floored.c
deleted file mode 100644
index 9cdf8a6..0000000
--- a/tests/floored.c
+++ /dev/null
@@ -1,17 +0,0 @@
-
-#include <math.h>
-#include <stdio.h>
-
-int xToI ( );
-
-void main ( void )
-{
-   printf ( "the answer is %d\n", xToI () );
-}
-
-
-int xToI()
-{
-    return (int)floor(2.90) + 1;
-}
-
diff --git a/tests/inline.c b/tests/inline.c
deleted file mode 100644
index 6db2fdd..0000000
--- a/tests/inline.c
+++ /dev/null
@@ -1,20 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-
-__inline__
-static int addemup ( int* arr )
-{
-   int i, j = 0;
-   for (i = 0; i <= 10; i++)
-      j += arr[i];
-   return j;
-}
-
-void main ( void )
-{
-   int sum;
-   int* a = calloc(10, sizeof(int));
-   sum = addemup(a);
-   printf("sum is %d\n", sum);
-}
diff --git a/tests/inlineh.c b/tests/inlineh.c
deleted file mode 100644
index f658065..0000000
--- a/tests/inlineh.c
+++ /dev/null
@@ -1,23 +0,0 @@
-
-#include <stdio.h>
-
-extern int burble ( int );
-
-__inline__
-static int inline_thisfile ( void )
-{
-   return burble(17);
-}
-
-#include "inlineh.h"
-
-void main ( void )
-{
-   int a;
-   a = 0;
-   a += inline_thisfile();
-   a *= 100;
-   a += inline_otherfile();
-   a /= 100;
-   printf("answer is %d\n", a);
-}
diff --git a/tests/inlineh.h b/tests/inlineh.h
deleted file mode 100644
index e34172a..0000000
--- a/tests/inlineh.h
+++ /dev/null
@@ -1,6 +0,0 @@
-
-__inline__
-static int inline_otherfile ( void )
-{
-   return burble(19);
-}
diff --git a/tests/malloc2.c b/tests/malloc2.c
deleted file mode 100644
index 2d6a0ab..0000000
--- a/tests/malloc2.c
+++ /dev/null
@@ -1,50 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-
-/* The original test driver machinery. */
-#define N_TEST_TRANSACTIONS 50000
-#define N_TEST_ARR 10000
-
-#define M_TEST_MALLOC 1000
-
-void* test_arr[N_TEST_ARR];
-
-int main ( int argc, char** argv )
-{
-   int i, j, k, nbytes;
-   unsigned char* chp;
-   char z;
-
-   for (i = 0; i < N_TEST_ARR; i++)
-      test_arr[i] = NULL;
-
-   for (i = 0; i < N_TEST_TRANSACTIONS; i++) {
-      j = random() % N_TEST_ARR;
-      if (test_arr[j]) {
-         free(test_arr[j]);
-         test_arr[j] = NULL;
-      } else {
-         nbytes = 1 + random() % M_TEST_MALLOC;
-         if (random()%64 == 32) 
-            nbytes *= 17;
-         test_arr[j] = malloc( nbytes );
-         chp = test_arr[j];
-         for (k = 1; k < nbytes; k++) 
-            chp[k] = (unsigned char)(k + 99);
-      }
-   }
-
-   for (i = 0; test_arr[i] == NULL; i++) ;
-   free(test_arr[i]);
-   ((char*)test_arr[i])[0] = 0;
-
-   for (i = 0; i < N_TEST_ARR; i++) {
-      if (test_arr[i]) {
-         free(test_arr[i]);
-         test_arr[i] = NULL;
-      }
-   }
-
-   return 0;
-}
diff --git a/tests/manuel1.c b/tests/manuel1.c
deleted file mode 100644
index d56dfe2..0000000
--- a/tests/manuel1.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#include <stdio.h>
-#include <malloc.h>
-
-int main ()
-{
-  int x;
-
-  printf ("x = %d\n", x);
-}
diff --git a/tests/manuel2.c b/tests/manuel2.c
deleted file mode 100644
index af362b4..0000000
--- a/tests/manuel2.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#include <stdio.h>
-#include <malloc.h>
-
-int main ()
-{
-  int *x;
-
-  printf ("x = %d\n", *x);
-}
diff --git a/tests/manuel3.c b/tests/manuel3.c
deleted file mode 100644
index 44ab712..0000000
--- a/tests/manuel3.c
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <stdio.h>
-#include <malloc.h>
-
-int main ()
-{
-  int *x, y;
-
-  x = (int *) malloc (sizeof (int));
-
-  y = *x == 173;
-
-  printf ("x = %d\n", y);
-}
diff --git a/tests/memcheck/.cvsignore b/tests/memcheck/.cvsignore
new file mode 100644
index 0000000..2fbaccc
--- /dev/null
+++ b/tests/memcheck/.cvsignore
@@ -0,0 +1,42 @@
+Makefile.in
+Makefile
+badaddrvalue
+badfree
+badjump
+badloop
+buflen_check
+dir
+doublefree
+errs1
+exitprog
+filter_leak_check_size
+filter_stderr
+fprw
+fwrite
+inline
+malloc1
+malloc2
+manuel1
+manuel2
+manuel3
+memalign_test
+memcmptest
+mismatches
+mmaptest
+nanoleak
+new_override
+pushfpopf
+realloc1
+realloc2
+sigaltstack
+signal2
+supp1
+supp2
+suppfree
+trivialleak
+tronical
+weirdioctl
+*.stdout.diff
+*.stderr.diff
+*.stdout.out
+*.stderr.out
diff --git a/tests/memcheck/Makefile.am b/tests/memcheck/Makefile.am
new file mode 100644
index 0000000..a36047c
--- /dev/null
+++ b/tests/memcheck/Makefile.am
@@ -0,0 +1,61 @@
+## Process this file with automake to produce Makefile.in
+
+##---------------------------------------------------------------------------
+## Need more tests:
+## - lots more mmap/munmap/mremap/mprotect ones
+
+## Notes:
+##   - HEAD and ERASER stderr.exp are different for signal2 due to lazy vs.
+##     strict EIP updating
+##---------------------------------------------------------------------------
+
+noinst_PROGRAMS = \
+	badaddrvalue badfree badjump badloop buflen_check \
+	doublefree errs1 exitprog fprw fwrite inits inline \
+	malloc1 malloc2 manuel1 manuel2 manuel3 \
+	memalign_test memcmptest mmaptest nanoleak pushfpopf \
+	realloc1 realloc2 sigaltstack signal2 supp1 supp2 suppfree \
+	trivialleak tronical weirdioctl	\
+	mismatches new_override
+
+CFLAGS   = $(WERROR) -Winline -Wall -Wshadow -g
+CXXFLAGS = $(CFLAGS)
+
+# C ones
+badaddrvalue_SOURCES 	= badaddrvalue.c
+badfree_SOURCES 	= badfree.c
+badjump_SOURCES 	= badjump.c
+badloop_SOURCES 	= badloop.c
+buflen_check_SOURCES	= buflen_check.c
+doublefree_SOURCES 	= doublefree.c
+errs1_SOURCES 		= errs1.c
+exitprog_SOURCES 	= exitprog.c
+fprw_SOURCES 		= fprw.c
+fwrite_SOURCES 		= fwrite.c
+inits_SOURCES		= inits.c
+inline_SOURCES 	        = inline.c
+malloc1_SOURCES 	= malloc1.c
+malloc2_SOURCES 	= malloc2.c
+manuel1_SOURCES 	= manuel1.c
+manuel2_SOURCES 	= manuel2.c
+manuel3_SOURCES 	= manuel3.c
+mmaptest_SOURCES 	= mmaptest.c
+memalign_test_SOURCES 	= memalign_test.c
+memcmptest_SOURCES 	= memcmptest.c
+nanoleak_SOURCES 	= nanoleak.c
+pushfpopf_SOURCES 	= pushfpopf_c.c pushfpopf_s.s
+realloc1_SOURCES 	= realloc1.c
+realloc2_SOURCES 	= realloc2.c
+signal2_SOURCES 	= signal2.c
+supp1_SOURCES 		= supp.c
+supp2_SOURCES 		= supp.c
+suppfree_SOURCES 	= suppfree.c
+sigaltstack_SOURCES 	= sigaltstack.c
+trivialleak_SOURCES 	= trivialleak.c
+tronical_SOURCES 	= tronical.S
+weirdioctl_SOURCES 	= weirdioctl.c
+
+# C++ ones
+mismatches_SOURCES	= mismatches.cpp
+new_override_SOURCES 	= new_override.cpp
+
diff --git a/tests/badaddrvalue.c b/tests/memcheck/badaddrvalue.c
similarity index 100%
rename from tests/badaddrvalue.c
rename to tests/memcheck/badaddrvalue.c
diff --git a/tests/memcheck/badaddrvalue.stderr.exp b/tests/memcheck/badaddrvalue.stderr.exp
new file mode 100644
index 0000000..8888c85
--- /dev/null
+++ b/tests/memcheck/badaddrvalue.stderr.exp
@@ -0,0 +1,26 @@
+
+Invalid write of size 1
+   at 0x........: main (badaddrvalue.c:8)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+   Address 0x........ is 1 bytes before a block of size 8 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (badaddrvalue.c:7)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+
+Invalid read of size 1
+   at 0x........: main (badaddrvalue.c:9)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+   Address 0x........ is 1 bytes before a block of size 8 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (badaddrvalue.c:7)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 8 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 8 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/badaddrvalue.stderr.exp.hd b/tests/memcheck/badaddrvalue.stderr.exp.hd
new file mode 100644
index 0000000..8888c85
--- /dev/null
+++ b/tests/memcheck/badaddrvalue.stderr.exp.hd
@@ -0,0 +1,26 @@
+
+Invalid write of size 1
+   at 0x........: main (badaddrvalue.c:8)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+   Address 0x........ is 1 bytes before a block of size 8 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (badaddrvalue.c:7)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+
+Invalid read of size 1
+   at 0x........: main (badaddrvalue.c:9)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+   Address 0x........ is 1 bytes before a block of size 8 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (badaddrvalue.c:7)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badaddrvalue)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 8 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 8 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/badaddrvalue.stdout.exp b/tests/memcheck/badaddrvalue.stdout.exp
new file mode 100644
index 0000000..98d9bcb
--- /dev/null
+++ b/tests/memcheck/badaddrvalue.stdout.exp
@@ -0,0 +1 @@
+17
diff --git a/tests/memcheck/badaddrvalue.vgtest b/tests/memcheck/badaddrvalue.vgtest
new file mode 100644
index 0000000..91187e8
--- /dev/null
+++ b/tests/memcheck/badaddrvalue.vgtest
@@ -0,0 +1 @@
+prog: badaddrvalue
diff --git a/tests/memcheck/badfree-2trace.stderr.exp b/tests/memcheck/badfree-2trace.stderr.exp
new file mode 100644
index 0000000..741fd25
--- /dev/null
+++ b/tests/memcheck/badfree-2trace.stderr.exp
@@ -0,0 +1,16 @@
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (badfree.c:12)
+   Address 0x........ is not stack'd, malloc'd or free'd
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (badfree.c:15)
+   Address 0x........ is on thread 1's stack
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 2 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/badfree-2trace.vgtest b/tests/memcheck/badfree-2trace.vgtest
new file mode 100644
index 0000000..8a60456
--- /dev/null
+++ b/tests/memcheck/badfree-2trace.vgtest
@@ -0,0 +1,2 @@
+vgopts: --num-callers=2
+prog:   badfree
diff --git a/tests/memcheck/badfree.c b/tests/memcheck/badfree.c
new file mode 100644
index 0000000..3a22567
--- /dev/null
+++ b/tests/memcheck/badfree.c
@@ -0,0 +1,18 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main ( void )
+{
+   void* p = (void*)0x87654321;
+   int q[] = { 1, 2, 3 };
+   
+   /* Free a pointer to Never-Never Land */
+   free(p);
+
+   /* Free a pointer to a stack block */
+   free(q);
+
+   return 0;
+}
diff --git a/tests/memcheck/badfree.stderr.exp b/tests/memcheck/badfree.stderr.exp
new file mode 100644
index 0000000..37c9b3e
--- /dev/null
+++ b/tests/memcheck/badfree.stderr.exp
@@ -0,0 +1,20 @@
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (badfree.c:12)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/badfree)
+   Address 0x........ is not stack'd, malloc'd or free'd
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (badfree.c:15)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/badfree)
+   Address 0x........ is on thread 1's stack
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 2 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/badfree.vgtest b/tests/memcheck/badfree.vgtest
new file mode 100644
index 0000000..455863a
--- /dev/null
+++ b/tests/memcheck/badfree.vgtest
@@ -0,0 +1 @@
+prog: badfree
diff --git a/tests/badjump.c b/tests/memcheck/badjump.c
similarity index 100%
rename from tests/badjump.c
rename to tests/memcheck/badjump.c
diff --git a/tests/memcheck/badjump.stderr.exp b/tests/memcheck/badjump.stderr.exp
new file mode 100644
index 0000000..1be7f70
--- /dev/null
+++ b/tests/memcheck/badjump.stderr.exp
@@ -0,0 +1,6 @@
+
+Jump to the invalid address stated on the next line
+   at 0x........: ???
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __libc_start_main@@GLIBC_2.0 (...libc...)
+   Address 0x........ is not stack'd, malloc'd or free'd
diff --git a/tests/memcheck/badjump.vgtest b/tests/memcheck/badjump.vgtest
new file mode 100644
index 0000000..1e82b86
--- /dev/null
+++ b/tests/memcheck/badjump.vgtest
@@ -0,0 +1 @@
+prog: badjump
diff --git a/tests/badloop.c b/tests/memcheck/badloop.c
similarity index 100%
rename from tests/badloop.c
rename to tests/memcheck/badloop.c
diff --git a/tests/memcheck/badloop.stderr.exp b/tests/memcheck/badloop.stderr.exp
new file mode 100644
index 0000000..ebfa1c2
--- /dev/null
+++ b/tests/memcheck/badloop.stderr.exp
@@ -0,0 +1,11 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (badloop.c:12)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/badloop)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/badloop.vgtest b/tests/memcheck/badloop.vgtest
new file mode 100644
index 0000000..abd0f39
--- /dev/null
+++ b/tests/memcheck/badloop.vgtest
@@ -0,0 +1 @@
+prog: badloop
diff --git a/tests/memcheck/buflen_check.c b/tests/memcheck/buflen_check.c
new file mode 100644
index 0000000..25f1714
--- /dev/null
+++ b/tests/memcheck/buflen_check.c
@@ -0,0 +1,29 @@
+#include <sys/socket.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+int main(void)
+{
+   struct sockaddr name;
+   int res1, res2;
+   int len = 10;
+
+   res1 = socket(PF_UNIX, SOCK_STREAM, 0);
+   if (res1 == 0) {
+      fprintf(stderr, "socket() failed\n");
+      exit(1);
+   }
+
+   /* Valgrind 1.0.X doesn't report the second error */
+   res1 = getsockname(-1, NULL,  &len);    /* NULL is bogus */
+   res2 = getsockname(-1, &name, NULL);    /* NULL is bogus */
+   if (res1 == -1) {
+      fprintf(stderr, "getsockname(1) failed\n");
+   }
+   if (res2 == -1) {
+      fprintf(stderr, "getsockname(2) failed\n");
+   }
+   
+   return 0;
+}
+
diff --git a/tests/memcheck/buflen_check.stderr.exp b/tests/memcheck/buflen_check.stderr.exp
new file mode 100644
index 0000000..a1b9b36
--- /dev/null
+++ b/tests/memcheck/buflen_check.stderr.exp
@@ -0,0 +1,20 @@
+
+Syscall param socketcall.getsockname(name) contains unaddressable byte(s)
+   at 0x........: getsockname (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: socket@@GLIBC_2.0 (in /.../tests/memcheck/buflen_check)
+   Address 0x........ is not stack'd, malloc'd or free'd
+
+Syscall param socketcall.getsockname(namelen_in) contains uninitialised or unaddressable byte(s)
+   at 0x........: getsockname (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: socket@@GLIBC_2.0 (in /.../tests/memcheck/buflen_check)
+   Address 0x........ is not stack'd, malloc'd or free'd
+getsockname(1) failed
+getsockname(2) failed
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/buflen_check.stderr.exp.hd b/tests/memcheck/buflen_check.stderr.exp.hd
new file mode 100644
index 0000000..855f51e
--- /dev/null
+++ b/tests/memcheck/buflen_check.stderr.exp.hd
@@ -0,0 +1,14 @@
+
+Syscall param socketcall.getsockname(name) contains unaddressable byte(s)
+   at 0x........: getsockname (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: socket@@GLIBC_2.0 (in /.../tests/memcheck/buflen_check)
+   Address 0x........ is not stack'd, malloc'd or free'd
+getsockname(1) failed
+getsockname(2) failed
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/buflen_check.vgtest b/tests/memcheck/buflen_check.vgtest
new file mode 100644
index 0000000..e14c8f1
--- /dev/null
+++ b/tests/memcheck/buflen_check.vgtest
@@ -0,0 +1 @@
+prog: buflen_check
diff --git a/tests/doublefree.c b/tests/memcheck/doublefree.c
similarity index 100%
rename from tests/doublefree.c
rename to tests/memcheck/doublefree.c
diff --git a/tests/memcheck/doublefree.stderr.exp b/tests/memcheck/doublefree.stderr.exp
new file mode 100644
index 0000000..282523f
--- /dev/null
+++ b/tests/memcheck/doublefree.stderr.exp
@@ -0,0 +1,17 @@
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (doublefree.c:10)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/doublefree)
+   Address 0x........ is 0 bytes inside a block of size 177 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (doublefree.c:10)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/doublefree)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 2 frees, 177 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/doublefree.vgtest b/tests/memcheck/doublefree.vgtest
new file mode 100644
index 0000000..9c0efac
--- /dev/null
+++ b/tests/memcheck/doublefree.vgtest
@@ -0,0 +1 @@
+prog: doublefree
diff --git a/tests/errs1.c b/tests/memcheck/errs1.c
similarity index 100%
rename from tests/errs1.c
rename to tests/memcheck/errs1.c
diff --git a/tests/memcheck/errs1.stderr.exp b/tests/memcheck/errs1.stderr.exp
new file mode 100644
index 0000000..2de4b48
--- /dev/null
+++ b/tests/memcheck/errs1.stderr.exp
@@ -0,0 +1,28 @@
+
+Invalid read of size 1
+   at 0x........: ddd (errs1.c:7)
+   by 0x........: bbb (errs1.c:9)
+   by 0x........: aaa (errs1.c:10)
+   by 0x........: main (errs1.c:17)
+   Address 0x........ is 1 bytes before a block of size 10 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: zzzzzzz (errs1.c:12)
+   by 0x........: yyy (errs1.c:13)
+   by 0x........: xxx (errs1.c:14)
+
+Invalid write of size 1
+   at 0x........: ddd (errs1.c:7)
+   by 0x........: bbb (errs1.c:9)
+   by 0x........: aaa (errs1.c:10)
+   by 0x........: main (errs1.c:17)
+   Address 0x........ is 1 bytes before a block of size 10 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: zzzzzzz (errs1.c:12)
+   by 0x........: yyy (errs1.c:13)
+   by 0x........: xxx (errs1.c:14)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 10 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 10 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/errs1.vgtest b/tests/memcheck/errs1.vgtest
new file mode 100644
index 0000000..fbe7c76
--- /dev/null
+++ b/tests/memcheck/errs1.vgtest
@@ -0,0 +1 @@
+prog: errs1
diff --git a/tests/memcheck/exitprog.c b/tests/memcheck/exitprog.c
new file mode 100644
index 0000000..3067216
--- /dev/null
+++ b/tests/memcheck/exitprog.c
@@ -0,0 +1,16 @@
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#define ZILLION 1000000
+
+int main ( void )
+{
+   int i;
+   char* a = malloc(ZILLION * sizeof(char));
+   for (i = 0; i <= ZILLION; i++) a[i] = 0;
+   a = (char*)177;
+   _exit(1);
+}
diff --git a/tests/memcheck/exitprog.stderr.exp b/tests/memcheck/exitprog.stderr.exp
new file mode 100644
index 0000000..1b30fe0
--- /dev/null
+++ b/tests/memcheck/exitprog.stderr.exp
@@ -0,0 +1,16 @@
+
+Invalid write of size 1
+   at 0x........: main (exitprog.c:15)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/exitprog)
+   Address 0x........ is 0 bytes after a block of size 1000000 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (exitprog.c:12)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/exitprog)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 1000000 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 1000000 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/exitprog.vgtest b/tests/memcheck/exitprog.vgtest
new file mode 100644
index 0000000..0095028
--- /dev/null
+++ b/tests/memcheck/exitprog.vgtest
@@ -0,0 +1 @@
+prog: exitprog
diff --git a/tests/memcheck/filter_leak_check_size b/tests/memcheck/filter_leak_check_size
new file mode 100755
index 0000000..03def89
--- /dev/null
+++ b/tests/memcheck/filter_leak_check_size
@@ -0,0 +1,4 @@
+#! /bin/sh
+
+./filter_stderr | \
+sed "s/checked [0-9]\+ bytes./checked ... bytes./"
diff --git a/tests/memcheck/filter_stderr b/tests/memcheck/filter_stderr
new file mode 100755
index 0000000..0d5e763
--- /dev/null
+++ b/tests/memcheck/filter_stderr
@@ -0,0 +1,24 @@
+#! /bin/sh
+
+# Skip first four lines (valgrind intro)  
+# XXX: be more clever/subtle; eg. if there's just a 1-line error message
+# don't cut it
+
+dir=`dirname $0`
+
+$dir/../filter_stderr_basic                             |
+
+# Anonymise addresses
+$dir/../filter_addresses                                |
+
+# Anonymise line numbers in vg_clientfuncs.c
+sed "s/vg_clientfuncs.c:[0-9]\+/vg_clientfuncs.c:.../"  |
+
+$dir/../filter_test_paths                               |
+
+# Anonymise paths like "(in /foo/bar/libc-baz.so)"
+sed "s/(in \/.*libc.*)$/(in \/...libc...)/"             |
+
+# Anonymise paths like "__libc_start_main (../foo/bar/libc-quux.c:129)"
+sed "s/__libc_\(.*\) (.*)$/__libc_\1 (...libc...)/"
+
diff --git a/tests/fprw.c b/tests/memcheck/fprw.c
similarity index 100%
rename from tests/fprw.c
rename to tests/memcheck/fprw.c
diff --git a/tests/memcheck/fprw.stderr.exp b/tests/memcheck/fprw.stderr.exp
new file mode 100644
index 0000000..2137572
--- /dev/null
+++ b/tests/memcheck/fprw.stderr.exp
@@ -0,0 +1,83 @@
+
+Use of uninitialised value of size 8
+   at 0x........: main (fprw.c:14)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Use of uninitialised value of size 4
+   at 0x........: main (fprw.c:15)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Use of uninitialised value of size 8
+   at 0x........: main (fprw.c:16)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Use of uninitialised value of size 4
+   at 0x........: main (fprw.c:17)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Invalid read of size 8
+   at 0x........: main (fprw.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+   Address 0x........ is 0 bytes inside a block of size 8 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (fprw.c:18)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Invalid write of size 8
+   at 0x........: main (fprw.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+   Address 0x........ is 0 bytes inside a block of size 8 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (fprw.c:18)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Invalid read of size 4
+   at 0x........: main (fprw.c:21)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+   Address 0x........ is 0 bytes inside a block of size 4 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (fprw.c:19)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Invalid write of size 4
+   at 0x........: main (fprw.c:21)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+   Address 0x........ is 0 bytes inside a block of size 4 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (fprw.c:19)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (fprw.c:22)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+   Address 0x........ is not stack'd, malloc'd or free'd
+
+Invalid write of size 8
+   at 0x........: main (fprw.c:24)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+   Address 0x........ is 0 bytes inside a block of size 4 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (fprw.c:23)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/fprw)
+
+ERROR SUMMARY: 10 errors from 10 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 4 bytes in 1 blocks.
+malloc/free: 3 allocs, 3 frees, 16 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/fprw.vgtest b/tests/memcheck/fprw.vgtest
new file mode 100644
index 0000000..d44e04a
--- /dev/null
+++ b/tests/memcheck/fprw.vgtest
@@ -0,0 +1,2 @@
+vgopts: --single-step=yes
+prog:   fprw
diff --git a/tests/fwrite.c b/tests/memcheck/fwrite.c
similarity index 100%
rename from tests/fwrite.c
rename to tests/memcheck/fwrite.c
diff --git a/tests/memcheck/fwrite.stderr.exp b/tests/memcheck/fwrite.stderr.exp
new file mode 100644
index 0000000..9c26de2
--- /dev/null
+++ b/tests/memcheck/fwrite.stderr.exp
@@ -0,0 +1,16 @@
+
+Syscall param write(buf) contains uninitialised or unaddressable byte(s)
+   at 0x........: __libc_write (...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __libc_start_main@@GLIBC_2.0 (...libc...)
+   Address 0x........ is 0 bytes inside a block of size 10 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (fwrite.c:6)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __libc_start_main@@GLIBC_2.0 (...libc...)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 10 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 10 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/fwrite.stdout.exp b/tests/memcheck/fwrite.stdout.exp
new file mode 100644
index 0000000..cb43b5c
--- /dev/null
+++ b/tests/memcheck/fwrite.stdout.exp
Binary files differ
diff --git a/tests/memcheck/fwrite.vgtest b/tests/memcheck/fwrite.vgtest
new file mode 100644
index 0000000..f43efd0
--- /dev/null
+++ b/tests/memcheck/fwrite.vgtest
@@ -0,0 +1 @@
+prog: fwrite
diff --git a/tests/memcheck/inits.c b/tests/memcheck/inits.c
new file mode 100644
index 0000000..7dd0c93
--- /dev/null
+++ b/tests/memcheck/inits.c
@@ -0,0 +1,20 @@
+
+#include <stdio.h>
+
+/* Static and global vars are inited to zero, non-static local vars aren't. */
+
+int        g;
+static int gs;
+
+int main(void)
+{
+   int        l;
+   static int ls;
+   
+   if (gs == 0xDEADBEEF) printf("1!\n");
+   if (g  == 0xDEADBEEF) printf("2!\n");
+   if (ls == 0xDEADBEEF) printf("3!\n");
+   if (l  == 0xDEADBEEF) printf("4!\n");  // complains
+   
+   return 0;
+}
diff --git a/tests/memcheck/inits.stderr.exp b/tests/memcheck/inits.stderr.exp
new file mode 100644
index 0000000..e703ced
--- /dev/null
+++ b/tests/memcheck/inits.stderr.exp
@@ -0,0 +1,11 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (inits.c:17)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/inits)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/inits.vgtest b/tests/memcheck/inits.vgtest
new file mode 100644
index 0000000..e654dc6
--- /dev/null
+++ b/tests/memcheck/inits.vgtest
@@ -0,0 +1 @@
+prog: inits
diff --git a/tests/memcheck/inline.c b/tests/memcheck/inline.c
new file mode 100644
index 0000000..cb023b2
--- /dev/null
+++ b/tests/memcheck/inline.c
@@ -0,0 +1,21 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+
+__inline__
+static int addemup ( int* arr )
+{
+   int i, j = 0;
+   for (i = 0; i <= 10; i++)
+      j += arr[i];
+   return j;
+}
+
+int main ( void )
+{
+   int sum;
+   int* a = calloc(10, sizeof(int));
+   sum = addemup(a);
+   printf("sum is %d\n", sum);
+   return 0;
+}
diff --git a/tests/memcheck/inline.stderr.exp b/tests/memcheck/inline.stderr.exp
new file mode 100644
index 0000000..a2225c1
--- /dev/null
+++ b/tests/memcheck/inline.stderr.exp
@@ -0,0 +1,17 @@
+
+Invalid read of size 4
+   at 0x........: addemup (inline.c:10)
+   by 0x........: main (inline.c:18)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: calloc@@GLIBC_2.0 (in /.../tests/memcheck/inline)
+   Address 0x........ is 0 bytes after a block of size 40 alloc'd
+   at 0x........: calloc (vg_clientfuncs.c:...)
+   by 0x........: main (inline.c:17)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: calloc@@GLIBC_2.0 (in /.../tests/memcheck/inline)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 40 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 40 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/inline.stdout.exp b/tests/memcheck/inline.stdout.exp
new file mode 100644
index 0000000..ad1401e
--- /dev/null
+++ b/tests/memcheck/inline.stdout.exp
@@ -0,0 +1 @@
+sum is 0
diff --git a/tests/memcheck/inline.vgtest b/tests/memcheck/inline.vgtest
new file mode 100644
index 0000000..89673b1
--- /dev/null
+++ b/tests/memcheck/inline.vgtest
@@ -0,0 +1 @@
+prog: inline
diff --git a/tests/malloc1.c b/tests/memcheck/malloc1.c
similarity index 100%
rename from tests/malloc1.c
rename to tests/memcheck/malloc1.c
diff --git a/tests/memcheck/malloc1.stderr.exp b/tests/memcheck/malloc1.stderr.exp
new file mode 100644
index 0000000..1e4c67f
--- /dev/null
+++ b/tests/memcheck/malloc1.stderr.exp
@@ -0,0 +1,28 @@
+
+Invalid write of size 1
+   at 0x........: really (malloc1.c:20)
+   by 0x........: main (malloc1.c:10)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/malloc1)
+   Address 0x........ is 1 bytes inside a block of size 10 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: really (malloc1.c:19)
+   by 0x........: main (malloc1.c:10)
+   by 0x........: __libc_start_main (...libc...)
+
+Invalid write of size 1
+   at 0x........: really (malloc1.c:23)
+   by 0x........: main (malloc1.c:10)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/malloc1)
+   Address 0x........ is 1 bytes before a block of size 10 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: really (malloc1.c:21)
+   by 0x........: main (malloc1.c:10)
+   by 0x........: __libc_start_main (...libc...)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 10 bytes in 1 blocks.
+malloc/free: 2 allocs, 1 frees, 20 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/malloc1.vgtest b/tests/memcheck/malloc1.vgtest
new file mode 100644
index 0000000..43c402a
--- /dev/null
+++ b/tests/memcheck/malloc1.vgtest
@@ -0,0 +1 @@
+prog: malloc1
diff --git a/tests/memcheck/malloc2.c b/tests/memcheck/malloc2.c
new file mode 100644
index 0000000..44cc7bb
--- /dev/null
+++ b/tests/memcheck/malloc2.c
@@ -0,0 +1,49 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/* The original test driver machinery. */
+#define N_TEST_TRANSACTIONS 500
+#define N_TEST_ARR 2000
+
+#define M_TEST_MALLOC 1000
+
+void* test_arr[N_TEST_ARR];
+
+int main ( int argc, char** argv )
+{
+   int i, j, k, nbytes;
+   unsigned char* chp;
+
+   for (i = 0; i < N_TEST_ARR; i++)
+      test_arr[i] = NULL;
+
+   for (i = 0; i < N_TEST_TRANSACTIONS; i++) {
+      j = random() % N_TEST_ARR;
+      if (test_arr[j]) {
+         free(test_arr[j]);
+         test_arr[j] = NULL;
+      } else {
+         nbytes = 1 + random() % M_TEST_MALLOC;
+         if (random()%64 == 32) 
+            nbytes *= 17;
+         test_arr[j] = malloc( nbytes );
+         chp = test_arr[j];
+         for (k = 1; k < nbytes; k++) 
+            chp[k] = (unsigned char)(k + 99);
+      }
+   }
+
+   for (i = 0; test_arr[i] == NULL; i++) ;
+   free(test_arr[i]);
+   ((char*)test_arr[i])[0] = 0;
+
+   for (i = 0; i < N_TEST_ARR; i++) {
+      if (test_arr[i]) {
+         free(test_arr[i]);
+         test_arr[i] = NULL;
+      }
+   }
+
+   return 0;
+}
diff --git a/tests/memcheck/malloc2.stderr.exp b/tests/memcheck/malloc2.stderr.exp
new file mode 100644
index 0000000..dd86b2d
--- /dev/null
+++ b/tests/memcheck/malloc2.stderr.exp
@@ -0,0 +1,27 @@
+
+Invalid write of size 1
+   at 0x........: main (malloc2.c:39)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/malloc2)
+   Address 0x........ is 0 bytes inside a block of size 429 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (malloc2.c:38)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/malloc2)
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (malloc2.c:43)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/malloc2)
+   Address 0x........ is 0 bytes inside a block of size 429 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (malloc2.c:38)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: free@@GLIBC_2.0 (in /.../tests/memcheck/malloc2)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 443 allocs, 444 frees, 265463 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/malloc2.vgtest b/tests/memcheck/malloc2.vgtest
new file mode 100644
index 0000000..d2dd1b4
--- /dev/null
+++ b/tests/memcheck/malloc2.vgtest
@@ -0,0 +1 @@
+prog: malloc2
diff --git a/tests/memcheck/manuel1.c b/tests/memcheck/manuel1.c
new file mode 100644
index 0000000..ac1f3c8
--- /dev/null
+++ b/tests/memcheck/manuel1.c
@@ -0,0 +1,10 @@
+#include <stdio.h>
+
+int main ()
+{
+  int x;
+
+  printf ("x = %d\n", x==0xDEADBEEF ? 99 : 88);
+
+  return 0;
+}
diff --git a/tests/memcheck/manuel1.stderr.exp b/tests/memcheck/manuel1.stderr.exp
new file mode 100644
index 0000000..c674937
--- /dev/null
+++ b/tests/memcheck/manuel1.stderr.exp
@@ -0,0 +1,11 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (manuel1.c:7)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/manuel1)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/manuel1.stdout.exp b/tests/memcheck/manuel1.stdout.exp
new file mode 100644
index 0000000..d26cbc9
--- /dev/null
+++ b/tests/memcheck/manuel1.stdout.exp
@@ -0,0 +1 @@
+x = 88
diff --git a/tests/memcheck/manuel1.vgtest b/tests/memcheck/manuel1.vgtest
new file mode 100644
index 0000000..e3ad9c7
--- /dev/null
+++ b/tests/memcheck/manuel1.vgtest
@@ -0,0 +1 @@
+prog: manuel1
diff --git a/tests/memcheck/manuel2.c b/tests/memcheck/manuel2.c
new file mode 100644
index 0000000..3b7135e
--- /dev/null
+++ b/tests/memcheck/manuel2.c
@@ -0,0 +1,11 @@
+#include <stdio.h>
+#include <malloc.h>
+
+int main ()
+{
+  int *x;
+
+  printf ("x = %d\n", *x==0xDEADBEEF ? 99 : 88);
+
+  return 0;
+}
diff --git a/tests/memcheck/manuel2.stderr.exp b/tests/memcheck/manuel2.stderr.exp
new file mode 100644
index 0000000..55ff720
--- /dev/null
+++ b/tests/memcheck/manuel2.stderr.exp
@@ -0,0 +1,11 @@
+
+Use of uninitialised value of size 4
+   at 0x........: main (manuel2.c:8)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/manuel2)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/manuel2.stdout.exp b/tests/memcheck/manuel2.stdout.exp
new file mode 100644
index 0000000..d26cbc9
--- /dev/null
+++ b/tests/memcheck/manuel2.stdout.exp
@@ -0,0 +1 @@
+x = 88
diff --git a/tests/memcheck/manuel2.vgtest b/tests/memcheck/manuel2.vgtest
new file mode 100644
index 0000000..1c785a5
--- /dev/null
+++ b/tests/memcheck/manuel2.vgtest
@@ -0,0 +1 @@
+prog: manuel2
diff --git a/tests/memcheck/manuel3.c b/tests/memcheck/manuel3.c
new file mode 100644
index 0000000..ea98fa9
--- /dev/null
+++ b/tests/memcheck/manuel3.c
@@ -0,0 +1,28 @@
+#include <stdio.h>
+#include <malloc.h>
+
+int gcc_cant_inline_me ( int );
+
+int main ()
+{
+  int *x, y;
+
+  x = (int *) malloc (sizeof (int));
+
+  y = *x == 173;
+
+  if (gcc_cant_inline_me(y)) { } 
+
+  return 0;
+}
+
+/* must be AFTER main */
+int gcc_cant_inline_me ( int n )
+{
+   if (n == 42) 
+      return 1; /* forty-two, dudes! */
+   else
+      return 0; /* some other number, dudes! */
+}
+
+
diff --git a/tests/memcheck/manuel3.stderr.exp b/tests/memcheck/manuel3.stderr.exp
new file mode 100644
index 0000000..7a257c3
--- /dev/null
+++ b/tests/memcheck/manuel3.stderr.exp
@@ -0,0 +1,12 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: gcc_cant_inline_me (manuel3.c:22)
+   by 0x........: main (manuel3.c:14)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __libc_start_main@@GLIBC_2.0 (...libc...)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 4 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 4 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/manuel3.vgtest b/tests/memcheck/manuel3.vgtest
new file mode 100644
index 0000000..0481cc6
--- /dev/null
+++ b/tests/memcheck/manuel3.vgtest
@@ -0,0 +1 @@
+prog: manuel3
diff --git a/tests/memalign_test.c b/tests/memcheck/memalign_test.c
similarity index 100%
rename from tests/memalign_test.c
rename to tests/memcheck/memalign_test.c
diff --git a/tests/memcheck/memalign_test.stderr.exp b/tests/memcheck/memalign_test.stderr.exp
new file mode 100644
index 0000000..4725928
--- /dev/null
+++ b/tests/memcheck/memalign_test.stderr.exp
@@ -0,0 +1,17 @@
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (memalign_test.c:17)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: valloc@@GLIBC_2.0 (in /.../tests/memcheck/memalign_test)
+   Address 0x........ is 0 bytes inside a block of size 111110 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (memalign_test.c:15)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: valloc@@GLIBC_2.0 (in /.../tests/memcheck/memalign_test)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 10 allocs, 11 frees, 611105 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/memalign_test.vgtest b/tests/memcheck/memalign_test.vgtest
new file mode 100644
index 0000000..56b601c
--- /dev/null
+++ b/tests/memcheck/memalign_test.vgtest
@@ -0,0 +1 @@
+prog: memalign_test
diff --git a/tests/memcheck/memcmptest.c b/tests/memcheck/memcmptest.c
new file mode 100644
index 0000000..83eb2d4
--- /dev/null
+++ b/tests/memcheck/memcmptest.c
@@ -0,0 +1,20 @@
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+char* s1;
+char* s2;
+
+int main ( void )
+{
+  s1 = malloc(10); strcpy(s1,"fooble");
+  s2 = malloc(10); strcpy(s2,"fooble");
+  if (memcmp(s1, s2, 8) != 0)
+    printf("different\n");
+  else
+    printf("same (?!)\n");
+  return 0;
+}
+
+	
diff --git a/tests/memcheck/memcmptest.stderr.exp b/tests/memcheck/memcmptest.stderr.exp
new file mode 100644
index 0000000..d7b1c3a
--- /dev/null
+++ b/tests/memcheck/memcmptest.stderr.exp
@@ -0,0 +1,16 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: memcmp (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/memcmptest)
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: memcmp (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/memcmptest)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 20 bytes in 2 blocks.
+malloc/free: 2 allocs, 0 frees, 20 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/memcmptest.stdout.exp b/tests/memcheck/memcmptest.stdout.exp
new file mode 100644
index 0000000..7164804
--- /dev/null
+++ b/tests/memcheck/memcmptest.stdout.exp
@@ -0,0 +1 @@
+same (?!)
diff --git a/tests/memcheck/memcmptest.vgtest b/tests/memcheck/memcmptest.vgtest
new file mode 100644
index 0000000..f31a8f5
--- /dev/null
+++ b/tests/memcheck/memcmptest.vgtest
@@ -0,0 +1 @@
+prog: memcmptest
diff --git a/tests/memcheck/mismatches.cpp b/tests/memcheck/mismatches.cpp
new file mode 100644
index 0000000..857a075
--- /dev/null
+++ b/tests/memcheck/mismatches.cpp
@@ -0,0 +1,27 @@
+#include <stdlib.h>
+
+int main()
+{
+  int* fpointer = (int*)malloc(10);
+  delete fpointer;          // should give warning
+  fpointer = (int*)malloc(10);
+  delete [] fpointer;       // should give warning
+  fpointer = (int*)malloc(10);
+  free (fpointer);          // should work!
+
+  int* nvec = new int[10];
+  delete nvec;              // should give a warning
+  nvec = new int[10];
+  free (nvec);              // should give a warning
+  nvec = new int[10];
+  delete [] nvec;           // should work!
+
+  int* n = new int;
+  delete [] n;              // should give a warning
+  n = new int;
+  free(n);                  // should give a warning
+  n = new int;
+  delete n;                 // should work!
+
+  return 0;
+}
diff --git a/tests/memcheck/mismatches.stderr.exp b/tests/memcheck/mismatches.stderr.exp
new file mode 100644
index 0000000..caf65dc
--- /dev/null
+++ b/tests/memcheck/mismatches.stderr.exp
@@ -0,0 +1,72 @@
+
+Mismatched free() / delete / delete []
+   at 0x........: __builtin_delete (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:6)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+   Address 0x........ is 0 bytes inside a block of size 10 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:5)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+
+Mismatched free() / delete / delete []
+   at 0x........: __builtin_vec_delete (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:8)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+   Address 0x........ is 0 bytes inside a block of size 10 alloc'd
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:7)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+
+Mismatched free() / delete / delete []
+   at 0x........: __builtin_delete (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:13)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+   Address 0x........ is 0 bytes inside a block of size 40 alloc'd
+   at 0x........: __builtin_vec_new (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:12)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+
+Mismatched free() / delete / delete []
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:15)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+   Address 0x........ is 0 bytes inside a block of size 40 alloc'd
+   at 0x........: __builtin_vec_new (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:14)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+
+Mismatched free() / delete / delete []
+   at 0x........: __builtin_vec_delete (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+   Address 0x........ is 0 bytes inside a block of size 4 alloc'd
+   at 0x........: __builtin_new (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:19)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+
+Mismatched free() / delete / delete []
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:22)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+   Address 0x........ is 0 bytes inside a block of size 4 alloc'd
+   at 0x........: __builtin_new (vg_clientfuncs.c:...)
+   by 0x........: main (mismatches.cpp:21)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __builtin_new (in /.../tests/memcheck/mismatches)
+
+ERROR SUMMARY: 6 errors from 6 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 9 allocs, 9 frees, 162 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/mismatches.vgtest b/tests/memcheck/mismatches.vgtest
new file mode 100644
index 0000000..3a87ef0
--- /dev/null
+++ b/tests/memcheck/mismatches.vgtest
@@ -0,0 +1 @@
+prog: mismatches
diff --git a/tests/memcheck/mmaptest.c b/tests/memcheck/mmaptest.c
new file mode 100644
index 0000000..74a21ed
--- /dev/null
+++ b/tests/memcheck/mmaptest.c
@@ -0,0 +1,15 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+int main()
+{
+    int fd;
+
+    mkdir("dir", 0777);
+    fd = open("dir", O_RDONLY);
+    mmap(NULL, 4711, PROT_READ, MAP_PRIVATE, fd, 0);
+    return 0;
+}
diff --git a/tests/memcheck/mmaptest.stderr.exp b/tests/memcheck/mmaptest.stderr.exp
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/tests/memcheck/mmaptest.stderr.exp
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/mmaptest.vgtest b/tests/memcheck/mmaptest.vgtest
new file mode 100644
index 0000000..1540c20
--- /dev/null
+++ b/tests/memcheck/mmaptest.vgtest
@@ -0,0 +1 @@
+prog: mmaptest
diff --git a/tests/nanoleak.c b/tests/memcheck/nanoleak.c
similarity index 100%
rename from tests/nanoleak.c
rename to tests/memcheck/nanoleak.c
diff --git a/tests/memcheck/nanoleak.stderr.exp b/tests/memcheck/nanoleak.stderr.exp
new file mode 100644
index 0000000..8dc3ae7
--- /dev/null
+++ b/tests/memcheck/nanoleak.stderr.exp
@@ -0,0 +1,26 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 1000 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 1000 bytes allocated.
+For counts of detected errors, rerun with: -v
+searching for pointers to 1 not-freed blocks.
+checked ... bytes.
+
+definitely lost: 1000 bytes in 1 blocks.
+possibly lost:   0 bytes in 0 blocks.
+still reachable: 0 bytes in 0 blocks.
+
+1000 bytes in 1 blocks are definitely lost in loss record 1 of 1
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: main (nanoleak.c:6)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __libc_start_main@@GLIBC_2.0 (...libc...)
+
+LEAK SUMMARY:
+   definitely lost: 1000 bytes in 1 blocks.
+   possibly lost:   0 bytes in 0 blocks.
+   still reachable: 0 bytes in 0 blocks.
+Reachable blocks (those to which a pointer was found) are not shown.
+To see them, rerun with: --show-reachable=yes
+
diff --git a/tests/memcheck/nanoleak.vgtest b/tests/memcheck/nanoleak.vgtest
new file mode 100644
index 0000000..2fadc98
--- /dev/null
+++ b/tests/memcheck/nanoleak.vgtest
@@ -0,0 +1,3 @@
+vgopts: --leak-check=yes
+prog: nanoleak
+stderr_filter: filter_leak_check_size
diff --git a/tests/memcheck/new_override.cpp b/tests/memcheck/new_override.cpp
new file mode 100644
index 0000000..5b48611
--- /dev/null
+++ b/tests/memcheck/new_override.cpp
@@ -0,0 +1,30 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+class Test {
+public:
+  int a, b, c, d;
+};
+
+void *operator new(size_t size)
+{
+  void *ret = malloc(size);
+  printf("Here.\n");
+  for (unsigned int i = 0; i < size; i++) ((char *) ret)[i] = 0xFF;
+  return ret;
+}
+
+int main(int argc, char *argv[]) {
+  Test *toto;
+  int i;
+  int j = 0;
+
+  toto = new Test[2];
+
+  for (i = 0; i < 2; i++) {
+    if (toto[i].a) {
+      j++;
+    }
+    //printf("%d : %08x %08x %08x %08x\n", i, toto[i].a, toto[i].b, toto[i].c, toto[i].d);
+  }
+}
diff --git a/tests/memcheck/new_override.stderr.exp b/tests/memcheck/new_override.stderr.exp
new file mode 100644
index 0000000..8ba31b6
--- /dev/null
+++ b/tests/memcheck/new_override.stderr.exp
@@ -0,0 +1,11 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (new_override.cpp:25)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/new_override)
+
+ERROR SUMMARY: 2 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 32 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 32 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/new_override.vgtest b/tests/memcheck/new_override.vgtest
new file mode 100644
index 0000000..4f11a5b
--- /dev/null
+++ b/tests/memcheck/new_override.vgtest
@@ -0,0 +1 @@
+prog: new_override
diff --git a/tests/memcheck/pushfpopf.stderr.exp b/tests/memcheck/pushfpopf.stderr.exp
new file mode 100644
index 0000000..8e10fb2
--- /dev/null
+++ b/tests/memcheck/pushfpopf.stderr.exp
@@ -0,0 +1,12 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: fooble (in /.../tests/memcheck/pushfpopf)
+   by 0x........: main (pushfpopf_c.c:12)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: (within /.../tests/memcheck/pushfpopf)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/pushfpopf.stdout.exp b/tests/memcheck/pushfpopf.stdout.exp
new file mode 100644
index 0000000..180f871
--- /dev/null
+++ b/tests/memcheck/pushfpopf.stdout.exp
@@ -0,0 +1 @@
+fooble: result is 22
diff --git a/tests/memcheck/pushfpopf.vgtest b/tests/memcheck/pushfpopf.vgtest
new file mode 100644
index 0000000..f87b791
--- /dev/null
+++ b/tests/memcheck/pushfpopf.vgtest
@@ -0,0 +1 @@
+prog: pushfpopf
diff --git a/tests/pushfpopf_c.c b/tests/memcheck/pushfpopf_c.c
similarity index 100%
rename from tests/pushfpopf_c.c
rename to tests/memcheck/pushfpopf_c.c
diff --git a/tests/pushfpopf.s b/tests/memcheck/pushfpopf_s.s
similarity index 100%
rename from tests/pushfpopf.s
rename to tests/memcheck/pushfpopf_s.s
diff --git a/tests/realloc1.c b/tests/memcheck/realloc1.c
similarity index 100%
rename from tests/realloc1.c
rename to tests/memcheck/realloc1.c
diff --git a/tests/memcheck/realloc1.stderr.exp b/tests/memcheck/realloc1.stderr.exp
new file mode 100644
index 0000000..14ec594
--- /dev/null
+++ b/tests/memcheck/realloc1.stderr.exp
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 49 bytes in 1 blocks.
+malloc/free: 49 allocs, 48 frees, 1225 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/realloc1.vgtest b/tests/memcheck/realloc1.vgtest
new file mode 100644
index 0000000..d0d10d7
--- /dev/null
+++ b/tests/memcheck/realloc1.vgtest
@@ -0,0 +1 @@
+prog: realloc1
diff --git a/tests/memcheck/realloc2.c b/tests/memcheck/realloc2.c
new file mode 100644
index 0000000..c89ff8a
--- /dev/null
+++ b/tests/memcheck/realloc2.c
@@ -0,0 +1,21 @@
+/* This test demonstrated an obscure bug in malloclists handling caused by
+   multiple blocks hashing to the same list and one being overwritten at
+   realloc time due to bad ordering of the things happening.  Now runs
+   without error. */
+
+#include <malloc.h>
+#include <stdio.h>
+
+int main ( void )
+{
+  char* p;
+  int i;
+  for (i = 0; i < 10000; i++) {
+    p = malloc(10 + 10 * (i % 100));
+    p = realloc(p, 500);
+    p = realloc(p, 600);
+    free(p);
+  }
+  return 0;
+}
+
diff --git a/tests/memcheck/realloc2.stderr.exp b/tests/memcheck/realloc2.stderr.exp
new file mode 100644
index 0000000..8ed8426
--- /dev/null
+++ b/tests/memcheck/realloc2.stderr.exp
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 30000 allocs, 30000 frees, 16050000 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/realloc2.vgtest b/tests/memcheck/realloc2.vgtest
new file mode 100644
index 0000000..0a28b23
--- /dev/null
+++ b/tests/memcheck/realloc2.vgtest
@@ -0,0 +1 @@
+prog: realloc2
diff --git a/tests/sigaltstack.c b/tests/memcheck/sigaltstack.c
similarity index 100%
rename from tests/sigaltstack.c
rename to tests/memcheck/sigaltstack.c
diff --git a/tests/memcheck/sigaltstack.stderr.exp b/tests/memcheck/sigaltstack.stderr.exp
new file mode 100644
index 0000000..ceeb462
--- /dev/null
+++ b/tests/memcheck/sigaltstack.stderr.exp
@@ -0,0 +1,19 @@
+
+calling sigaltstack, stack base is 0x........
+setting sigaction
+Syscall param sigaction(act) contains uninitialised or unaddressable byte(s)
+   at 0x........: __libc_sigaction (...libc...)
+   by 0x........: main (sigaltstack.c:27)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: sigaltstack@@GLIBC_2.0 (in /.../tests/memcheck/sigaltstack)
+   Address 0x........ is on thread 1's stack
+res = 0
+raising the signal
+caught signal, local var is on 0x........
+done
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 8192 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 8192 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/sigaltstack.vgtest b/tests/memcheck/sigaltstack.vgtest
new file mode 100644
index 0000000..f61eeae
--- /dev/null
+++ b/tests/memcheck/sigaltstack.vgtest
@@ -0,0 +1 @@
+prog: sigaltstack
diff --git a/tests/memcheck/signal2.c b/tests/memcheck/signal2.c
new file mode 100644
index 0000000..a1df705
--- /dev/null
+++ b/tests/memcheck/signal2.c
@@ -0,0 +1,20 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+
+void sig_hdlr ( int signo )
+{
+   printf ( "caught sig segv\n" );
+   exit(1);
+}
+
+int main ( void )
+{
+   printf ( "installing sig handler\n" );
+   signal(SIGSEGV, sig_hdlr);
+   printf ( "doing bad thing\n" );
+   * (int*) 65536 = 0;
+   printf ( "exited normally ?!\n" );
+   return 0;
+}
diff --git a/tests/memcheck/signal2.stderr.exp b/tests/memcheck/signal2.stderr.exp
new file mode 100644
index 0000000..3ab7302
--- /dev/null
+++ b/tests/memcheck/signal2.stderr.exp
@@ -0,0 +1,12 @@
+
+Invalid write of size 4
+   at 0x........: main (signal2.c:17)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: exit@@GLIBC_2.0 (in /.../tests/memcheck/signal2)
+   Address 0x........ is not stack'd, malloc'd or free'd
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/signal2.stderr.exp.hd b/tests/memcheck/signal2.stderr.exp.hd
new file mode 100644
index 0000000..2cd1fc8
--- /dev/null
+++ b/tests/memcheck/signal2.stderr.exp.hd
@@ -0,0 +1,12 @@
+
+Invalid write of size 4
+   at 0x........: main (signal2.c:16)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: exit@@GLIBC_2.0 (in /.../tests/memcheck/signal2)
+   Address 0x........ is not stack'd, malloc'd or free'd
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/signal2.stdout.exp b/tests/memcheck/signal2.stdout.exp
new file mode 100644
index 0000000..3e16af0
--- /dev/null
+++ b/tests/memcheck/signal2.stdout.exp
@@ -0,0 +1,3 @@
+installing sig handler
+doing bad thing
+caught sig segv
diff --git a/tests/memcheck/signal2.vgtest b/tests/memcheck/signal2.vgtest
new file mode 100644
index 0000000..c301370
--- /dev/null
+++ b/tests/memcheck/signal2.vgtest
@@ -0,0 +1 @@
+prog: signal2
diff --git a/tests/memcheck/supp.c b/tests/memcheck/supp.c
new file mode 100644
index 0000000..50c4a81
--- /dev/null
+++ b/tests/memcheck/supp.c
@@ -0,0 +1,12 @@
+#include <stdlib.h>
+
+int
+main ()
+{
+  int x;
+
+  if (x == 0)
+     return 0;
+  else
+     return 1;
+}
diff --git a/tests/memcheck/supp.supp b/tests/memcheck/supp.supp
new file mode 100644
index 0000000..477c6c4
--- /dev/null
+++ b/tests/memcheck/supp.supp
@@ -0,0 +1,6 @@
+{
+  name_of_this_suppression
+  Cond
+  obj:*supp1
+  fun:__libc_start_main
+}
diff --git a/tests/memcheck/supp1.stderr.exp b/tests/memcheck/supp1.stderr.exp
new file mode 100644
index 0000000..6d763a7
--- /dev/null
+++ b/tests/memcheck/supp1.stderr.exp
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/supp1.vgtest b/tests/memcheck/supp1.vgtest
new file mode 100644
index 0000000..31130ff
--- /dev/null
+++ b/tests/memcheck/supp1.vgtest
@@ -0,0 +1,2 @@
+vgopts: --suppressions=supp.supp
+prog: supp1
diff --git a/tests/memcheck/supp2.stderr.exp b/tests/memcheck/supp2.stderr.exp
new file mode 100644
index 0000000..b245f04
--- /dev/null
+++ b/tests/memcheck/supp2.stderr.exp
@@ -0,0 +1,11 @@
+
+Conditional jump or move depends on uninitialised value(s)
+   at 0x........: main (supp.c:8)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: __libc_start_main@@GLIBC_2.0 (...libc...)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/supp2.vgtest b/tests/memcheck/supp2.vgtest
new file mode 100644
index 0000000..f5200b0
--- /dev/null
+++ b/tests/memcheck/supp2.vgtest
@@ -0,0 +1,2 @@
+vgopts: --suppressions=supp.supp
+prog: supp2
diff --git a/tests/suppfree.c b/tests/memcheck/suppfree.c
similarity index 100%
rename from tests/suppfree.c
rename to tests/memcheck/suppfree.c
diff --git a/tests/memcheck/suppfree.stderr.exp b/tests/memcheck/suppfree.stderr.exp
new file mode 100644
index 0000000..149bf84
--- /dev/null
+++ b/tests/memcheck/suppfree.stderr.exp
@@ -0,0 +1,17 @@
+
+Invalid free() / delete / delete[]
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: ddd (suppfree.c:7)
+   by 0x........: ccc (suppfree.c:12)
+   by 0x........: bbb (suppfree.c:17)
+   Address 0x........ is 0 bytes inside a block of size 10 free'd
+   at 0x........: free (vg_clientfuncs.c:...)
+   by 0x........: ddd (suppfree.c:6)
+   by 0x........: ccc (suppfree.c:12)
+   by 0x........: bbb (suppfree.c:17)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 2 frees, 10 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/suppfree.vgtest b/tests/memcheck/suppfree.vgtest
new file mode 100644
index 0000000..bd38f8a
--- /dev/null
+++ b/tests/memcheck/suppfree.vgtest
@@ -0,0 +1 @@
+prog: suppfree
diff --git a/tests/memcheck/trivialleak.c b/tests/memcheck/trivialleak.c
new file mode 100644
index 0000000..f3a8963
--- /dev/null
+++ b/tests/memcheck/trivialleak.c
@@ -0,0 +1,14 @@
+#include <stdlib.h>
+
+static void test()
+  {
+    void* leak;
+    int i;
+    for (i = 0; i < 1000; i++)
+       leak = (void*)malloc( 1 );
+  }
+  int main()
+  {
+    test();
+    return 0;
+  }
diff --git a/tests/memcheck/trivialleak.stderr.exp b/tests/memcheck/trivialleak.stderr.exp
new file mode 100644
index 0000000..12bb84b
--- /dev/null
+++ b/tests/memcheck/trivialleak.stderr.exp
@@ -0,0 +1,26 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 1000 bytes in 1000 blocks.
+malloc/free: 1000 allocs, 0 frees, 1000 bytes allocated.
+For counts of detected errors, rerun with: -v
+searching for pointers to 1000 not-freed blocks.
+checked ... bytes.
+
+definitely lost: 1000 bytes in 1000 blocks.
+possibly lost:   0 bytes in 0 blocks.
+still reachable: 0 bytes in 0 blocks.
+
+1000 bytes in 1000 blocks are definitely lost in loss record 1 of 1
+   at 0x........: malloc (vg_clientfuncs.c:...)
+   by 0x........: test (trivialleak.c:8)
+   by 0x........: main (trivialleak.c:13)
+   by 0x........: __libc_start_main (...libc...)
+
+LEAK SUMMARY:
+   definitely lost: 1000 bytes in 1000 blocks.
+   possibly lost:   0 bytes in 0 blocks.
+   still reachable: 0 bytes in 0 blocks.
+Reachable blocks (those to which a pointer was found) are not shown.
+To see them, rerun with: --show-reachable=yes
+
diff --git a/tests/memcheck/trivialleak.vgtest b/tests/memcheck/trivialleak.vgtest
new file mode 100644
index 0000000..c5b68a8
--- /dev/null
+++ b/tests/memcheck/trivialleak.vgtest
@@ -0,0 +1,3 @@
+vgopts: --leak-check=yes
+prog: trivialleak
+stderr_filter: filter_leak_check_size
diff --git a/tests/memcheck/tronical.S b/tests/memcheck/tronical.S
new file mode 100644
index 0000000..030a2af
--- /dev/null
+++ b/tests/memcheck/tronical.S
@@ -0,0 +1,102 @@
+/*
+
+Assembly derived from the following program compiled with -O2.
+This fools Valgrind, causing it to give a false error.
+
+#include <stdio.h>
+
+struct Foo
+{
+    int a1 : 1;
+    int a2 : 1;
+    int a3 : 1;
+    int a4 : 1;
+    int a5 : 1;
+    int a6 : 1;
+    int a7 : 1;
+    int bleh : 1;
+};
+
+struct Foo* foo;
+
+void set()
+{
+    foo->bleh = 1;
+}
+
+void get()
+{
+    if ( foo->bleh == 0 )
+        printf( "blieb\n" );
+}
+
+int main()
+{
+  foo = malloc(sizeof(struct Foo));
+    set();
+
+    get();
+
+    return 0;
+}
+
+*/
+
+	.file	"tronical.c"
+	.version	"01.01"
+gcc2_compiled.:
+.text
+	.align 4
+.globl set
+	.type	 set,@function
+set:
+	pushl	%ebp
+	movl	foo, %eax
+	orb	$128, (%eax)
+	movl	%esp, %ebp
+	popl	%ebp
+	ret
+.Lfe1:
+	.size	 set,.Lfe1-set
+	.section	.rodata.str1.1,"ams",@progbits,1
+.LC0:
+	.string	"blieb\n"
+.text
+	.align 4
+.globl get
+	.type	 get,@function
+get:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$8, %esp
+	movl	foo, %eax
+	cmpb	$0, (%eax)
+	js	.L4
+	subl	$12, %esp
+	pushl	$.LC0
+	call	printf
+	addl	$16, %esp
+.L4:
+	leave
+	ret
+.Lfe2:
+	.size	 get,.Lfe2-get
+	.align 4
+.globl main
+	.type	 main,@function
+main:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$20, %esp
+	pushl	$4
+	call	malloc
+	movl	%eax, foo
+	call	set
+	call	get
+	xorl	%eax, %eax
+	leave
+	ret
+.Lfe3:
+	.size	 main,.Lfe3-main
+	.comm	foo,4,4
+	.ident	"GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-98)"
diff --git a/tests/memcheck/tronical.stderr.exp b/tests/memcheck/tronical.stderr.exp
new file mode 100644
index 0000000..2c63087
--- /dev/null
+++ b/tests/memcheck/tronical.stderr.exp
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 4 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 4 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/tronical.vgtest b/tests/memcheck/tronical.vgtest
new file mode 100644
index 0000000..97623ad
--- /dev/null
+++ b/tests/memcheck/tronical.vgtest
@@ -0,0 +1 @@
+prog: tronical
diff --git a/tests/memcheck/weirdioctl.c b/tests/memcheck/weirdioctl.c
new file mode 100644
index 0000000..a78de65
--- /dev/null
+++ b/tests/memcheck/weirdioctl.c
@@ -0,0 +1,44 @@
+
+/* A program which sets a readable fd to have a timeout, and therefore
+   needs --weird-hacks=ioctl-VTIME in order to run without
+   blocking. */
+
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <termio.h>
+
+int main ( void )
+{
+   int c, i;
+   int res;
+         struct termio tty, oldtty;
+
+          /**
+           ** Save the old tty settings, and get rid of echo
+           ** for the new tty settings
+           **/
+          ioctl(0, TCGETA, &oldtty);
+          tty = oldtty;
+          tty.c_lflag    &= ~(ICANON|ECHO|ECHOE|ECHOK|ECHONL);
+          tty.c_cc[VMIN]  = 0;
+          tty.c_cc[VTIME] = 5;
+          res = ioctl(0, TCSETA, &tty);
+	  printf("first ioctl returned %d\n", res);
+
+          /**
+           ** Now do whatever stuff you want non-echoed
+           **/
+          i = 0;
+	  while (i++ < 50) {
+	    c = getchar();
+	    printf("got %d\n", c);
+	  }
+
+          /**
+           ** Now reset the old settings
+           **/
+          res = ioctl(0, TCSETA, &oldtty);
+	  printf("second ioctl returned %d\n", res);
+
+return 0;
+}
diff --git a/tests/memcheck/weirdioctl.stderr.exp b/tests/memcheck/weirdioctl.stderr.exp
new file mode 100644
index 0000000..7d5c9aa
--- /dev/null
+++ b/tests/memcheck/weirdioctl.stderr.exp
@@ -0,0 +1,12 @@
+
+Syscall param ioctl(TCSET{A,AW,AF}) contains uninitialised or unaddressable byte(s)
+   at 0x........: __ioctl (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ioctl@@GLIBC_2.0 (in /.../tests/memcheck/weirdioctl)
+   Address 0x........ is on thread 1's stack
+
+ERROR SUMMARY: 2 errors from 1 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/memcheck/weirdioctl.stdout.exp b/tests/memcheck/weirdioctl.stdout.exp
new file mode 100644
index 0000000..bb65b7c
--- /dev/null
+++ b/tests/memcheck/weirdioctl.stdout.exp
@@ -0,0 +1,52 @@
+first ioctl returned -1
+got 118
+got 103
+got 111
+got 112
+got 116
+got 115
+got 58
+got 32
+got 45
+got 45
+got 119
+got 101
+got 105
+got 114
+got 100
+got 45
+got 104
+got 97
+got 99
+got 107
+got 115
+got 61
+got 105
+got 111
+got 99
+got 116
+got 108
+got 45
+got 86
+got 84
+got 73
+got 77
+got 69
+got 10
+got 112
+got 114
+got 111
+got 103
+got 58
+got 32
+got 32
+got 32
+got 119
+got 101
+got 105
+got 114
+got 100
+got 105
+got 111
+got 99
+second ioctl returned -1
diff --git a/tests/memcheck/weirdioctl.vgtest b/tests/memcheck/weirdioctl.vgtest
new file mode 100644
index 0000000..e8d8630
--- /dev/null
+++ b/tests/memcheck/weirdioctl.vgtest
@@ -0,0 +1,3 @@
+vgopts: --weird-hacks=ioctl-VTIME
+prog:   weirdioctl
+args:   < weirdioctl.vgtest
diff --git a/tests/memcmptest.c b/tests/memcmptest.c
deleted file mode 100644
index 56dd85f..0000000
--- a/tests/memcmptest.c
+++ /dev/null
@@ -1,19 +0,0 @@
-
-#include <string.h>
-#include <stdio.h>
-
-char* s1;
-char* s2;
-
-int main ( void )
-{
-  s1 = malloc(10); strcpy(s1,"fooble");
-  s2 = malloc(10); strcpy(s2,"fooble");
-  if (memcmp(s1, s2, 8) != 0)
-    printf("different\n");
-  else
-    printf("same (?!)\n");
-  return 0;
-}
-
-	
diff --git a/tests/memtests.cpp b/tests/memtests.cpp
deleted file mode 100644
index b5f2f3f..0000000
--- a/tests/memtests.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <stdlib.h>
-
-int main()
-{
-  int* fpointer = (int*)malloc(10);
-  delete fpointer; // should give warning
-  fpointer = (int*)malloc(10);
-  delete [] fpointer; // should give warning
-  fpointer = (int*)malloc(10);
-  free (fpointer); // should work!
-
-  int* nvec = new int[10];
-  delete nvec; // should give a warning
-  nvec = new int[10];
-  free (nvec); // should give a warning
-  nvec = new int[10];
-  delete [] nvec; // should work!
-
-  int* n = new int;
-  delete [] n; // should give a warning
-  n = new int;
-  free(n); // should give a warning
-  n = new int;
-  delete n; // should work!
-
-  free(0);
-
-  return 0;
-}
diff --git a/tests/mmaptest.c b/tests/mmaptest.c
deleted file mode 100644
index 4e52b1a..0000000
--- a/tests/mmaptest.c
+++ /dev/null
@@ -1,15 +0,0 @@
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/mman.h>
-
-int main()
-{
-    int fd;
-
-    mkdir("dir", 0666);
-    fd = open("dir", O_RDONLY);
-    mmap(NULL, 4711, PROT_READ, MAP_PRIVATE, fd, 0);
-    return 0;
-}
diff --git a/tests/new_override.cpp b/tests/new_override.cpp
deleted file mode 100644
index 8aafd04..0000000
--- a/tests/new_override.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-
-class Test {
-public:
-  int a, b, c, d;
-};
-
-void *operator new(size_t size)
-{
-  void *ret = malloc(size);
-  printf("Here.\n");
-  for (unsigned int i = 0; i < size; i++) ((char *) ret)[i] = 0xFF;
-  return ret;
-}
-
-int main(int argc, char *argv[]) {
-  Test *toto;
-  int i;
-  int j = 0;
-
-  toto = new Test[2];
-
-  for (i = 0; i < 2; i++) {
-    if (toto[i].a) {
-      j++;
-    }
-    printf("%d : %08x %08x %08x %08x\n", i, toto[i].a, toto[i].b, toto[i].c, toto[i].d);
-  }
-}
diff --git a/tests/none/.cvsignore b/tests/none/.cvsignore
new file mode 100644
index 0000000..1b02230
--- /dev/null
+++ b/tests/none/.cvsignore
@@ -0,0 +1,36 @@
+Makefile.in
+Makefile
+bitfield1
+bt_everything
+bt_literal
+coolo_sigaction
+coolo_strlen
+cpuid
+dastest
+floored
+fucomip
+gxx304
+munmap_exe
+pluto
+rcl_assert
+rcrl
+readline1
+sha1_test
+shortpush
+shorts
+smc1
+pth_atfork1
+pth_cancel1
+pth_cancel2
+pth_cvsimple
+pth_mutexspeed
+pth_once
+pth_semaphore1
+pth_simple_mutex
+pth_simple_threads
+pth_specific
+pth_yield
+*.stdout.diff
+*.stderr.diff
+*.stdout.out
+*.stderr.out
diff --git a/tests/none/Makefile.am b/tests/none/Makefile.am
new file mode 100644
index 0000000..32c5a1b
--- /dev/null
+++ b/tests/none/Makefile.am
@@ -0,0 +1,45 @@
+## Process this file with automake to produce Makefile.in
+
+##---------------------------------------------------------------------------
+## These ones all work fine without producing errors in any skin.
+##---------------------------------------------------------------------------
+
+noinst_PROGRAMS = \
+	bitfield1 bt_everything bt_literal coolo_strlen \
+	cpuid dastest floored fork fucomip munmap_exe rcl_assert \
+	rcrl readline1 sha1_test shortpush shorts smc1 \
+	pth_blockedsig \
+	coolo_sigaction gxx304
+
+CFLAGS   = $(WERROR) -Winline -Wall -Wshadow -g
+CXXFLAGS = $(CFLAGS)
+
+# generic C ones
+bitfield1_SOURCES 	= bitfield1.c
+bt_everything_SOURCES 	= bt_everything.c
+bt_literal_SOURCES 	= bt_literal.c
+cpuid_SOURCES 		= cpuid_c.c cpuid_s.s
+coolo_strlen_SOURCES 	= coolo_strlen.c
+dastest_SOURCES 	= dastest_c.c dastest_s.s
+fork_SOURCES 		= fork.c
+floored_SOURCES 	= floored.c
+floored_LDADD 		= -lm
+fucomip_SOURCES 	= fucomip.c
+munmap_exe_SOURCES 	= munmap_exe.c
+rcl_assert_SOURCES 	= rcl_assert.S
+rcrl_SOURCES 		= rcrl.c
+readline1_SOURCES 	= readline1.c
+smc1_SOURCES 		= smc1.c
+sha1_test_SOURCES 	= sha1_test.c
+shortpush_SOURCES 	= shortpush.c
+shorts_SOURCES 		= shorts.c
+
+# pthread C ones
+pth_blockedsig_SOURCES	= pth_blockedsig.c
+pth_blockedsig_LDADD	= -lpthread
+
+# generic C++ ones
+coolo_sigaction_SOURCES	= coolo_sigaction.cpp
+gxx304_SOURCES		= gxx304.cpp
+
+
diff --git a/tests/none/bitfield1.c b/tests/none/bitfield1.c
new file mode 100644
index 0000000..183c7e8
--- /dev/null
+++ b/tests/none/bitfield1.c
@@ -0,0 +1,19 @@
+
+#include <malloc.h>
+
+typedef
+   struct {
+      int          x;
+      unsigned int y:1;
+      int          z;
+   } 
+   Fooble;
+
+int main ( void )
+{
+   Fooble* f = malloc(sizeof(Fooble));
+   f->x = 1;
+   f->z = 1;
+   f->y = (f == (Fooble*)17 ? 1 : 0);
+   return 0;
+}
diff --git a/tests/none/bitfield1.stderr.exp b/tests/none/bitfield1.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/bitfield1.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/bitfield1.stderr.exp.hd b/tests/none/bitfield1.stderr.exp.hd
new file mode 100644
index 0000000..4f61f32
--- /dev/null
+++ b/tests/none/bitfield1.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 12 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 12 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/bitfield1.vgtest b/tests/none/bitfield1.vgtest
new file mode 100644
index 0000000..88260d8
--- /dev/null
+++ b/tests/none/bitfield1.vgtest
@@ -0,0 +1 @@
+prog: bitfield1
diff --git a/tests/bt_everything.c b/tests/none/bt_everything.c
similarity index 100%
rename from tests/bt_everything.c
rename to tests/none/bt_everything.c
diff --git a/tests/none/bt_everything.stderr.exp b/tests/none/bt_everything.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/bt_everything.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/bt_everything.stderr.exp.hd b/tests/none/bt_everything.stderr.exp.hd
new file mode 100644
index 0000000..3a4f79b
--- /dev/null
+++ b/tests/none/bt_everything.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 1 frees, 200 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/bt_everything.stdout.exp b/tests/none/bt_everything.stdout.exp
new file mode 100644
index 0000000..fd67221
--- /dev/null
+++ b/tests/none/bt_everything.stdout.exp
@@ -0,0 +1,2 @@
+MEM-L: final res 0xd2bfea53, carrydep 0x5b80deee
+REG-L: final res 0x605d78ff, carrydep 0x7c0dc86a
diff --git a/tests/none/bt_everything.vgtest b/tests/none/bt_everything.vgtest
new file mode 100644
index 0000000..711210d
--- /dev/null
+++ b/tests/none/bt_everything.vgtest
@@ -0,0 +1 @@
+prog: bt_everything
diff --git a/tests/bt_literal.c b/tests/none/bt_literal.c
similarity index 100%
rename from tests/bt_literal.c
rename to tests/none/bt_literal.c
diff --git a/tests/none/bt_literal.stderr.exp b/tests/none/bt_literal.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/bt_literal.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/bt_literal.stderr.exp.hd b/tests/none/bt_literal.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/tests/none/bt_literal.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/bt_literal.stdout.exp b/tests/none/bt_literal.stdout.exp
new file mode 100644
index 0000000..c9bb28d
--- /dev/null
+++ b/tests/none/bt_literal.stdout.exp
@@ -0,0 +1,16 @@
+0x0 -> 0x a 0x a 0x a
+0x1 -> 0x1b 0x1b 0x1b
+0x2 -> 0x2a 0x2a 0x2a
+0x3 -> 0x3b 0x3b 0x3b
+0x4 -> 0x4a 0x4a 0x4a
+0x5 -> 0x5b 0x5b 0x5b
+0x6 -> 0x6a 0x6a 0x6a
+0x7 -> 0x7b 0x7b 0x7b
+0x8 -> 0x82 0x82 0x82
+0x9 -> 0x93 0x93 0x93
+0xa -> 0xa2 0xa2 0xa2
+0xb -> 0xb3 0xb3 0xb3
+0xc -> 0xc2 0xc2 0xc2
+0xd -> 0xd3 0xd3 0xd3
+0xe -> 0xe2 0xe2 0xe2
+0xf -> 0xf3 0xf3 0xf3
diff --git a/tests/none/bt_literal.vgtest b/tests/none/bt_literal.vgtest
new file mode 100644
index 0000000..9c06c64
--- /dev/null
+++ b/tests/none/bt_literal.vgtest
@@ -0,0 +1 @@
+prog: bt_literal
diff --git a/tests/coolo_sigaction.cpp b/tests/none/coolo_sigaction.cpp
similarity index 100%
rename from tests/coolo_sigaction.cpp
rename to tests/none/coolo_sigaction.cpp
diff --git a/tests/none/coolo_sigaction.stderr.exp b/tests/none/coolo_sigaction.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/coolo_sigaction.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/coolo_sigaction.stderr.exp.hd b/tests/none/coolo_sigaction.stderr.exp.hd
new file mode 100644
index 0000000..564abd8
--- /dev/null
+++ b/tests/none/coolo_sigaction.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 1 frees, 372 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/coolo_sigaction.stdout.exp b/tests/none/coolo_sigaction.stdout.exp
new file mode 100644
index 0000000..652c76e
--- /dev/null
+++ b/tests/none/coolo_sigaction.stdout.exp
@@ -0,0 +1 @@
+handled 17
diff --git a/tests/none/coolo_sigaction.vgtest b/tests/none/coolo_sigaction.vgtest
new file mode 100644
index 0000000..4ee1d82
--- /dev/null
+++ b/tests/none/coolo_sigaction.vgtest
@@ -0,0 +1 @@
+prog: coolo_sigaction
diff --git a/tests/coolo_strlen.c b/tests/none/coolo_strlen.c
similarity index 100%
rename from tests/coolo_strlen.c
rename to tests/none/coolo_strlen.c
diff --git a/tests/none/coolo_strlen.stderr.exp b/tests/none/coolo_strlen.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/coolo_strlen.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/coolo_strlen.stderr.exp.hd b/tests/none/coolo_strlen.stderr.exp.hd
new file mode 100644
index 0000000..5164e75
--- /dev/null
+++ b/tests/none/coolo_strlen.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 33 bytes in 1 blocks.
+malloc/free: 1 allocs, 0 frees, 33 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/coolo_strlen.vgtest b/tests/none/coolo_strlen.vgtest
new file mode 100644
index 0000000..b49ad79
--- /dev/null
+++ b/tests/none/coolo_strlen.vgtest
@@ -0,0 +1 @@
+prog: coolo_strlen
diff --git a/tests/none/cpuid.stderr.exp b/tests/none/cpuid.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/cpuid.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/cpuid.stderr.exp.hd b/tests/none/cpuid.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/tests/none/cpuid.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/cpuid.stdout.exp b/tests/none/cpuid.stdout.exp
new file mode 100644
index 0000000..ab872bf
--- /dev/null
+++ b/tests/none/cpuid.stdout.exp
@@ -0,0 +1,2 @@
+cpuid words (0): 0x1 0x756e6547 0x6c65746e 0x49656e69
+cpuid words (1): 0x52b 0x0 0x0 0x1bf
diff --git a/tests/none/cpuid.vgtest b/tests/none/cpuid.vgtest
new file mode 100644
index 0000000..36a2db0
--- /dev/null
+++ b/tests/none/cpuid.vgtest
@@ -0,0 +1 @@
+prog: cpuid
diff --git a/tests/cpuid_c.c b/tests/none/cpuid_c.c
similarity index 100%
rename from tests/cpuid_c.c
rename to tests/none/cpuid_c.c
diff --git a/tests/cpuid_s.s b/tests/none/cpuid_s.s
similarity index 100%
rename from tests/cpuid_s.s
rename to tests/none/cpuid_s.s
diff --git a/tests/none/dastest.stderr.exp b/tests/none/dastest.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/dastest.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/dastest.stderr.exp.hd b/tests/none/dastest.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/tests/none/dastest.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/dastest.stdout.exp b/tests/none/dastest.stdout.exp
new file mode 100644
index 0000000..a122b1e
--- /dev/null
+++ b/tests/none/dastest.stdout.exp
@@ -0,0 +1,2 @@
+dastest: x = 49
+dastest: das(x) = 49
diff --git a/tests/none/dastest.vgtest b/tests/none/dastest.vgtest
new file mode 100644
index 0000000..91f9033
--- /dev/null
+++ b/tests/none/dastest.vgtest
@@ -0,0 +1 @@
+prog: dastest
diff --git a/tests/dastest_c.c b/tests/none/dastest_c.c
similarity index 100%
rename from tests/dastest_c.c
rename to tests/none/dastest_c.c
diff --git a/tests/dastest.s b/tests/none/dastest_s.s
similarity index 100%
rename from tests/dastest.s
rename to tests/none/dastest_s.s
diff --git a/tests/none/filter_stderr b/tests/none/filter_stderr
new file mode 100755
index 0000000..07d877d
--- /dev/null
+++ b/tests/none/filter_stderr
@@ -0,0 +1,3 @@
+#! /bin/sh
+
+../filter_stderr_basic
diff --git a/tests/none/floored.c b/tests/none/floored.c
new file mode 100644
index 0000000..678a4f5
--- /dev/null
+++ b/tests/none/floored.c
@@ -0,0 +1,18 @@
+
+#include <math.h>
+#include <stdio.h>
+
+int xToI ( );
+
+int main ( void )
+{
+   printf ( "the answer is %d\n", xToI () );
+   return 0;
+}
+
+
+int xToI()
+{
+    return (int)floor(2.90) + 1;
+}
+
diff --git a/tests/none/floored.stderr.exp b/tests/none/floored.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/floored.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/floored.stderr.exp.hd b/tests/none/floored.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/tests/none/floored.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/floored.stdout.exp b/tests/none/floored.stdout.exp
new file mode 100644
index 0000000..a8bb84d
--- /dev/null
+++ b/tests/none/floored.stdout.exp
@@ -0,0 +1 @@
+the answer is 3
diff --git a/tests/none/floored.vgtest b/tests/none/floored.vgtest
new file mode 100644
index 0000000..2e22206
--- /dev/null
+++ b/tests/none/floored.vgtest
@@ -0,0 +1 @@
+prog: floored
diff --git a/tests/none/fork.c b/tests/none/fork.c
new file mode 100644
index 0000000..2a987d2
--- /dev/null
+++ b/tests/none/fork.c
@@ -0,0 +1,15 @@
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <stdio.h>
+
+int main(void)
+{
+  pid_t pid;
+
+  pid = fork ();
+
+  printf("my pid is %s\n", pid==0 ? "zero" : "non-zero");
+
+  return 0;
+}
diff --git a/tests/none/fork.stderr.exp b/tests/none/fork.stderr.exp
new file mode 100644
index 0000000..b28b04f
--- /dev/null
+++ b/tests/none/fork.stderr.exp
@@ -0,0 +1,3 @@
+
+
+
diff --git a/tests/none/fork.stderr.exp.hd b/tests/none/fork.stderr.exp.hd
new file mode 100644
index 0000000..f1512e4
--- /dev/null
+++ b/tests/none/fork.stderr.exp.hd
@@ -0,0 +1,13 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/fork.stdout.exp b/tests/none/fork.stdout.exp
new file mode 100644
index 0000000..d3f09d4
--- /dev/null
+++ b/tests/none/fork.stdout.exp
@@ -0,0 +1,2 @@
+my pid is non-zero
+my pid is zero
diff --git a/tests/none/fork.vgtest b/tests/none/fork.vgtest
new file mode 100644
index 0000000..0de247f
--- /dev/null
+++ b/tests/none/fork.vgtest
@@ -0,0 +1 @@
+prog: fork
diff --git a/tests/fucomip.c b/tests/none/fucomip.c
similarity index 100%
rename from tests/fucomip.c
rename to tests/none/fucomip.c
diff --git a/tests/none/fucomip.stderr.exp b/tests/none/fucomip.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/fucomip.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/fucomip.stderr.exp.hd b/tests/none/fucomip.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/tests/none/fucomip.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/fucomip.vgtest b/tests/none/fucomip.vgtest
new file mode 100644
index 0000000..6755f13
--- /dev/null
+++ b/tests/none/fucomip.vgtest
@@ -0,0 +1 @@
+prog: fucomip
diff --git a/tests/gxx304.cpp b/tests/none/gxx304.cpp
similarity index 100%
rename from tests/gxx304.cpp
rename to tests/none/gxx304.cpp
diff --git a/tests/none/gxx304.stderr.exp b/tests/none/gxx304.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/gxx304.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/gxx304.stderr.exp.hd b/tests/none/gxx304.stderr.exp.hd
new file mode 100644
index 0000000..4944bba
--- /dev/null
+++ b/tests/none/gxx304.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 1 frees, 24 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/gxx304.vgtest b/tests/none/gxx304.vgtest
new file mode 100644
index 0000000..202a58a
--- /dev/null
+++ b/tests/none/gxx304.vgtest
@@ -0,0 +1 @@
+prog: gxx304
diff --git a/tests/none/munmap_exe.c b/tests/none/munmap_exe.c
new file mode 100644
index 0000000..e17d885
--- /dev/null
+++ b/tests/none/munmap_exe.c
@@ -0,0 +1,24 @@
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* Point of this is that the fd of an PROT_EXEC segment is -1, so Valgrind
+   shouldn't add it to its list of exe segs, and thus it won't be discarded
+   upon the munmap() (so no "discard" message). */
+
+int main()
+{
+    void* m;
+    
+    m = mmap(NULL, 100, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+
+    if (m == (void*)-1) {
+       fprintf(stderr, "error mmapping\n");
+       exit(1);
+    }
+    
+    munmap(m, 100);
+
+    return 0;
+}
diff --git a/tests/none/munmap_exe.stderr.exp b/tests/none/munmap_exe.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/munmap_exe.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/munmap_exe.stderr.exp.hd b/tests/none/munmap_exe.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/tests/none/munmap_exe.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/munmap_exe.vgtest b/tests/none/munmap_exe.vgtest
new file mode 100644
index 0000000..8409a03
--- /dev/null
+++ b/tests/none/munmap_exe.vgtest
@@ -0,0 +1 @@
+prog: munmap_exe
diff --git a/tests/none/pth_blockedsig.c b/tests/none/pth_blockedsig.c
new file mode 100644
index 0000000..65fe7ba
--- /dev/null
+++ b/tests/none/pth_blockedsig.c
@@ -0,0 +1,66 @@
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+#include <pthread.h>
+
+static void sig_usr1(int);
+
+static pthread_t main_thread;
+
+void *
+child_main(void *no_args)
+{
+//  int i;
+  
+// Only do it once, to shorten test --njn
+//  for (i = 0; i < 5; ++i)
+//    {
+      sleep (1);
+      fprintf (stdout, "thread %ld sending SIGUSR1 to thread %ld\n",
+               pthread_self (), main_thread);
+      if (pthread_kill (main_thread, SIGUSR1) != 0)
+        fprintf (stderr, "error doing pthread_kill\n"); 
+//    }
+
+  return no_args;
+}
+
+int
+main(void)
+{
+  struct sigaction sigact;
+  sigset_t newmask, oldmask;
+  pthread_t child;
+
+  memset(&newmask, 0, sizeof newmask);
+  sigemptyset (&newmask);
+  sigaddset (&newmask, SIGUSR1);
+
+  if (pthread_sigmask (SIG_BLOCK, &newmask, &oldmask) != 0)
+    fprintf (stderr, "SIG_BLOCK error");
+  
+  memset (&sigact, 0, sizeof sigact);
+  sigact.sa_handler = sig_usr1;
+  if (sigaction(SIGUSR1, &sigact, NULL) != 0)
+    fprintf (stderr, "signal(SIGINT) error");
+  
+  main_thread = pthread_self ();
+  if (pthread_create (&child, NULL, child_main, NULL) != 0)
+    fprintf (stderr, "error creating thread");
+
+  pthread_join (child, NULL);
+  
+  exit(0);
+}
+
+static void
+sig_usr1 (int signo)
+{
+  fprintf (stderr, "SHOULD NOT BE HERE (SIGUSR1)!!!!\n");
+  return;
+}
+
+
diff --git a/tests/none/pth_blockedsig.stderr.exp b/tests/none/pth_blockedsig.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/pth_blockedsig.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/pth_blockedsig.stderr.exp.hd b/tests/none/pth_blockedsig.stderr.exp.hd
new file mode 100644
index 0000000..dc3cc2b
--- /dev/null
+++ b/tests/none/pth_blockedsig.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 1 allocs, 1 frees, 12 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/pth_blockedsig.stdout.exp b/tests/none/pth_blockedsig.stdout.exp
new file mode 100644
index 0000000..be7b259
--- /dev/null
+++ b/tests/none/pth_blockedsig.stdout.exp
@@ -0,0 +1 @@
+thread 2 sending SIGUSR1 to thread 1
diff --git a/tests/none/pth_blockedsig.vgtest b/tests/none/pth_blockedsig.vgtest
new file mode 100644
index 0000000..4532980
--- /dev/null
+++ b/tests/none/pth_blockedsig.vgtest
@@ -0,0 +1 @@
+prog: pth_blockedsig
diff --git a/tests/pth_specific.c b/tests/none/pth_specific.c
similarity index 100%
rename from tests/pth_specific.c
rename to tests/none/pth_specific.c
diff --git a/tests/rcl_assert.s b/tests/none/rcl_assert.S
similarity index 100%
rename from tests/rcl_assert.s
rename to tests/none/rcl_assert.S
diff --git a/tests/none/rcl_assert.stderr.exp b/tests/none/rcl_assert.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/rcl_assert.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/rcl_assert.stderr.exp.hd b/tests/none/rcl_assert.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/tests/none/rcl_assert.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/rcl_assert.vgtest b/tests/none/rcl_assert.vgtest
new file mode 100644
index 0000000..0355bfd
--- /dev/null
+++ b/tests/none/rcl_assert.vgtest
@@ -0,0 +1 @@
+prog: rcl_assert
diff --git a/tests/rcrl.c b/tests/none/rcrl.c
similarity index 100%
rename from tests/rcrl.c
rename to tests/none/rcrl.c
diff --git a/tests/none/rcrl.stderr.exp b/tests/none/rcrl.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/rcrl.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/rcrl.stderr.exp.hd b/tests/none/rcrl.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/tests/none/rcrl.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/rcrl.stdout.exp b/tests/none/rcrl.stdout.exp
new file mode 100644
index 0000000..355f303
--- /dev/null
+++ b/tests/none/rcrl.stdout.exp
@@ -0,0 +1 @@
+x = 0.999939
diff --git a/tests/none/rcrl.vgtest b/tests/none/rcrl.vgtest
new file mode 100644
index 0000000..ea1b8ba
--- /dev/null
+++ b/tests/none/rcrl.vgtest
@@ -0,0 +1 @@
+prog: rcrl
diff --git a/tests/none/readline1.c b/tests/none/readline1.c
new file mode 100644
index 0000000..63c4b89
--- /dev/null
+++ b/tests/none/readline1.c
@@ -0,0 +1,27 @@
+
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+
+int rl_insert ( int, int );
+
+int main ( void )
+{
+   rl_insert(1, 'z');
+
+   return 0;
+}
+
+int zzzstrlen ( char* str )
+{
+   if (str[1] == 0) return 2; else return 10;
+}
+
+int rl_insert ( int count, int c )
+{
+   char str[2];
+   str[1] = 0;
+   str[0] = c;
+   printf("HERE strlen  is %d\n", zzzstrlen(str));
+   return 0;
+}
diff --git a/tests/none/readline1.stderr.exp b/tests/none/readline1.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/readline1.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/readline1.stderr.exp.hd b/tests/none/readline1.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/tests/none/readline1.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/readline1.stdout.exp b/tests/none/readline1.stdout.exp
new file mode 100644
index 0000000..bee59c7
--- /dev/null
+++ b/tests/none/readline1.stdout.exp
@@ -0,0 +1 @@
+HERE strlen  is 2
diff --git a/tests/none/readline1.vgtest b/tests/none/readline1.vgtest
new file mode 100644
index 0000000..cbfd47a
--- /dev/null
+++ b/tests/none/readline1.vgtest
@@ -0,0 +1 @@
+prog: readline1
diff --git a/tests/sha1.test.c b/tests/none/sha1_test.c
similarity index 100%
rename from tests/sha1.test.c
rename to tests/none/sha1_test.c
diff --git a/tests/none/sha1_test.stderr.exp b/tests/none/sha1_test.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/sha1_test.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/sha1_test.stderr.exp.hd b/tests/none/sha1_test.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/tests/none/sha1_test.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/sha1_test.vgtest b/tests/none/sha1_test.vgtest
new file mode 100644
index 0000000..eb20557
--- /dev/null
+++ b/tests/none/sha1_test.vgtest
@@ -0,0 +1 @@
+prog: sha1_test
diff --git a/tests/shortpush.c b/tests/none/shortpush.c
similarity index 100%
rename from tests/shortpush.c
rename to tests/none/shortpush.c
diff --git a/tests/none/shortpush.stderr.exp b/tests/none/shortpush.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/shortpush.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/shortpush.stderr.exp.hd b/tests/none/shortpush.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/tests/none/shortpush.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/shortpush.vgtest b/tests/none/shortpush.vgtest
new file mode 100644
index 0000000..7fc35ef
--- /dev/null
+++ b/tests/none/shortpush.vgtest
@@ -0,0 +1 @@
+prog: shortpush
diff --git a/tests/shorts.c b/tests/none/shorts.c
similarity index 100%
rename from tests/shorts.c
rename to tests/none/shorts.c
diff --git a/tests/none/shorts.stderr.exp b/tests/none/shorts.stderr.exp
new file mode 100644
index 0000000..328e795
--- /dev/null
+++ b/tests/none/shorts.stderr.exp
@@ -0,0 +1,4 @@
+
+case2
+case4
+
diff --git a/tests/none/shorts.stderr.exp.hd b/tests/none/shorts.stderr.exp.hd
new file mode 100644
index 0000000..a1f5070
--- /dev/null
+++ b/tests/none/shorts.stderr.exp.hd
@@ -0,0 +1,9 @@
+
+case2
+case4
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/shorts.vgtest b/tests/none/shorts.vgtest
new file mode 100644
index 0000000..af03ee0
--- /dev/null
+++ b/tests/none/shorts.vgtest
@@ -0,0 +1 @@
+prog: shorts
diff --git a/tests/none/smc1.c b/tests/none/smc1.c
new file mode 100644
index 0000000..0b0ebdf
--- /dev/null
+++ b/tests/none/smc1.c
@@ -0,0 +1,73 @@
+
+/* Test Heimdall's ability to spot writes to code which has been
+   translated, and discard the out-of-date translations.
+
+   CORRECT output is
+
+      in p 0
+      in q 1
+      in p 2
+      in q 3
+      in p 4
+      in q 5
+      in p 6
+      in q 7
+      in p 8
+      in q 9
+
+  WRONG output (if you fail to spot code-writes to code[0 .. 4]) is
+
+      in p 0
+      in p 1
+      in p 2
+      in p 3
+      in p 4
+      in p 5
+      in p 6
+      in p 7
+      in p 8
+      in p 9
+*/
+
+#include <stdio.h>
+
+typedef unsigned int Addr;
+typedef unsigned char UChar;
+
+void q ( int n )
+{
+   printf("in q %d\n", n);
+}
+
+void p ( int n )
+{
+   printf("in p %d\n", n);
+}
+
+UChar code[100];
+
+/* Make `code' be JMP-32 dest */
+void set_dest ( Addr dest )
+{
+   unsigned int delta;
+   delta = dest - ((Addr)(&code[0]));
+   delta -= 5;
+   
+   code[0] = 0xE9;   /* JMP d32 */
+   code[1] = (delta & 0xFF);
+   code[2] = ((delta >> 8) & 0xFF);
+   code[3] = ((delta >> 16) & 0xFF);
+   code[4] = ((delta >> 24) & 0xFF);
+}
+
+int main ( void )
+{
+   int i;
+   for (i = 0; i < 10; i += 2) {
+      set_dest ( (Addr)&p );
+      (  (void (*)(int)) (&code[0])  ) (i);
+      set_dest ( (Addr)&q );
+      (  (void (*)(int)) (&code[0])  ) (i+1);
+   }
+   return 0;
+}
diff --git a/tests/none/smc1.stderr.exp b/tests/none/smc1.stderr.exp
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/tests/none/smc1.stderr.exp
@@ -0,0 +1,2 @@
+
+
diff --git a/tests/none/smc1.stderr.exp.hd b/tests/none/smc1.stderr.exp.hd
new file mode 100644
index 0000000..c4aa6f0
--- /dev/null
+++ b/tests/none/smc1.stderr.exp.hd
@@ -0,0 +1,7 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+malloc/free: in use at exit: 0 bytes in 0 blocks.
+malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
+For a detailed leak analysis,  rerun with: --leak-check=yes
+For counts of detected errors, rerun with: -v
diff --git a/tests/none/smc1.stdout.exp b/tests/none/smc1.stdout.exp
new file mode 100644
index 0000000..d7fc032
--- /dev/null
+++ b/tests/none/smc1.stdout.exp
@@ -0,0 +1,10 @@
+in p 0
+in p 1
+in p 2
+in p 3
+in p 4
+in p 5
+in p 6
+in p 7
+in p 8
+in p 9
diff --git a/tests/none/smc1.vgtest b/tests/none/smc1.vgtest
new file mode 100644
index 0000000..e2ef32c
--- /dev/null
+++ b/tests/none/smc1.vgtest
@@ -0,0 +1 @@
+prog: smc1
diff --git a/tests/pth_cancel2.c b/tests/pth_cancel2.c
deleted file mode 100644
index 688dded..0000000
--- a/tests/pth_cancel2.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/********************************************************
- * An example source module to accompany...
- *
- * "Using POSIX Threads: Programming with Pthreads"
- *     by Brad nichols, Dick Buttlar, Jackie Farrell
- *     O'Reilly & Associates, Inc.
- *
- ********************************************************
- * async_safe --
- *
- * Example showing macro wrappers for calling non-async
- * safe routines when the caller has asynchronous 
- * cancellation turned on
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <pthread.h>
-
-
-#define async_cancel_safe_read(fd,buf,amt) \
-   { \
-      int oldtype; \
-      pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, &oldtype); \
-      if (read(fd,buf,amt) < 0) \
-         perror("read"),exit(1); \
-      pthread_setcanceltype(oldtype,NULL); \
-      pthread_testcancel(); \
-   } 
-   
-
-#define async_cancel_safe_write(fd,buf,amt) \
-   { \
-      int oldtype; \
-      pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, &oldtype); \
-      if (write(fd,buf,amt) < 0) \
-         perror("write"), exit(1); \
-      pthread_setcanceltype(oldtype,NULL); \
-      pthread_testcancel(); \
-   }
-
-
-static int fd;
-   
-void *io(void *arg)
-{
-   int *fd=(int *)arg; 
-   char buf[20]="String";
-   int amt=20;
-
-   for (;;) {
-      async_cancel_safe_write(*fd,buf,amt);
-      async_cancel_safe_read(*fd,buf,amt);
-   }
-   return(NULL);
-}
-
-void *killer(void *arg)
-{ 
-   pthread_t * target = (pthread_t *)arg;
-   sleep(1);
-   pthread_cancel(*target);
-   return(NULL);
-}
-
-extern int
-main(void)
-{
-   pthread_t io_thread, killer_thread;   
-
-   extern void *io(void *);
-   extern void *killer(void  *);
-
-   if ((fd = open(".ktemp",O_CREAT | O_RDWR, 0666)) < 0)
-      perror("open"), exit(1);
-
-   pthread_create(&io_thread, 
-		  NULL,
-		  io,
-		  (void *)&fd);
-   pthread_create(&killer_thread,
-		  NULL,
-		  killer,
-		  (void *)&io_thread);
-
-   pthread_join(io_thread, NULL);
-
-   pthread_join(killer_thread,NULL);
-
-   if ((close(fd)) < 0)
-     perror("close"),exit(1);
-   if ((unlink(".ktemp")) < 0)
-     perror("unlink"),exit(1);
-
-   return 0;
-}
diff --git a/tests/pth_cvsimple.c b/tests/pth_cvsimple.c
deleted file mode 100644
index ba1101b..0000000
--- a/tests/pth_cvsimple.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/********************************************************
- * An example source module to accompany...
- *
- * "Using POSIX Threads: Programming with Pthreads"
- *     by Brad nichols, Dick Buttlar, Jackie Farrell
- *     O'Reilly & Associates, Inc.
- *
- ********************************************************
- *
- * cvsimple.c
- *
- * Demonstrates pthread cancellation.
- *
- */
-
-#include <stdio.h>
-#include <pthread.h>
-
-#define NUM_THREADS  3
-#define TCOUNT 10
-#define COUNT_THRES 12
-
-int     count = 0;
-int     thread_ids[3] = {0,1,2};
-pthread_mutex_t count_lock=PTHREAD_MUTEX_INITIALIZER; 
-pthread_cond_t count_hit_threshold=PTHREAD_COND_INITIALIZER; 
-
-void *inc_count(void *idp)
-{
-  int i=0, save_state, save_type;
-  int *my_id = idp;
-
-  for (i=0; i<TCOUNT; i++) {
-    pthread_mutex_lock(&count_lock);
-    count++;
-    printf("inc_counter(): thread %d, count = %d, unlocking mutex\n", 
-	   *my_id, count);
-    if (count == COUNT_THRES) {
-      printf("inc_count(): Thread %d, count %d\n", *my_id, count);
-      pthread_cond_signal(&count_hit_threshold);
-    }
-    pthread_mutex_unlock(&count_lock);
-  }
-  
-  return(NULL);
-}
-
-void *watch_count(void *idp)
-{
-  int i=0, save_state, save_type;
-  int *my_id = idp;
-
-  printf("watch_count(): thread %d\n", *my_id);
-  fflush(stdout);
-  pthread_mutex_lock(&count_lock);
-
-  while (count < COUNT_THRES) {
-    pthread_cond_wait(&count_hit_threshold, &count_lock);
-    printf("watch_count(): thread %d, count %d\n", *my_id, count);
-  }
-
-  pthread_mutex_unlock(&count_lock);
-  
-  return(NULL);
-}
-
-extern int
-main(void)
-{
-  int       i;
-  pthread_t threads[3];
-
-  pthread_create(&threads[0], NULL, inc_count, (void *)&thread_ids[0]);
-  pthread_create(&threads[1], NULL, inc_count, (void *)&thread_ids[1]);
-  pthread_create(&threads[2], NULL, watch_count, (void *)&thread_ids[2]);
-
-  for (i = 0; i < NUM_THREADS; i++) {
-    pthread_join(threads[i], NULL);
-  }
-
-  return 0;
-}
-
-
diff --git a/tests/pth_once.c b/tests/pth_once.c
deleted file mode 100644
index 0d795c5..0000000
--- a/tests/pth_once.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/********************************************************
- * An example source module to accompany...
- *
- * "Using POSIX Threads: Programming with Pthreads"
- *     by Brad nichols, Dick Buttlar, Jackie Farrell
- *     O'Reilly & Associates, Inc.
- *
- ********************************************************
- * once_exam.c
- *
- * An example of using the pthreads_once() call to execute an
- * initialization procedure.
- *
- * A program spawns multiple threads and each one tries to
- * execute the routine welcome() using the once call. Only
- * the first thread into the once routine will actually
- * execute welcome().
- *
- * The program's main thread synchronizes its exit with the
- * exit of the threads using the pthread_join() operation.
- *
-*/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/types.h>
-
-#include <pthread.h>
-
-#define  NUM_THREADS   10
-
-static pthread_once_t welcome_once_block = PTHREAD_ONCE_INIT;
-
-void welcome(void)
-{
-	printf("welcome: Welcome\n");
-}
-
-void *identify_yourself(void *arg)
-{
-        int *pid=(int *)arg;
-	int rtn;
-
-	if ((rtn = pthread_once(&welcome_once_block,
-			        welcome)) != 0) {
-		fprintf(stderr, "pthread_once failed with %d",rtn);
-		pthread_exit((void *)NULL);
-	}
-	printf("identify_yourself: Hi, I'm thread # %d\n",*pid);
-        return(NULL);
-}
-
-extern int
-main(void)
-{
-	int             *id_arg, thread_num, rtn;
-	pthread_t       threads[NUM_THREADS];
-
-	id_arg = (int *)malloc(NUM_THREADS*sizeof(int));
-
-	for (thread_num = 0; thread_num < NUM_THREADS; (thread_num)++) {
-
-		id_arg[thread_num] = thread_num;
-
-		if (( rtn = pthread_create(&threads[thread_num], 
-					   NULL,
-					   identify_yourself,
-					   (void *) &(id_arg[thread_num]))) 
-		    != 0) {
-		  fprintf(stderr, "pthread_create failed with %d",rtn);
-		  exit(1);
-		}
-	} 	
-
-	for (thread_num = 0; thread_num < NUM_THREADS; thread_num++) {
-	  pthread_join(threads[thread_num], NULL);
-	  printf("main: joined to thread %d\n", thread_num);
-	}
-	printf("main: Goodbye\n");
-}
diff --git a/tests/readline1.c b/tests/readline1.c
deleted file mode 100644
index 0d2c1a5..0000000
--- a/tests/readline1.c
+++ /dev/null
@@ -1,25 +0,0 @@
-
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-
-int rl_insert ( int, int );
-
-void main ( void )
-{
-   rl_insert(1, 'z');
-}
-
-int zzzstrlen ( char* str )
-{
-   if (str[1] == 0) return 2; else return 10;
-}
-
-int rl_insert ( int count, int c )
-{
-   char str[2];
-   str[1] = 0;
-   str[0] = c;
-   printf("HERE strlen  is %d\n", zzzstrlen(str));
-   return 0;
-}
diff --git a/tests/sigkill.c b/tests/sigkill.c
deleted file mode 100644
index c4c7b77..0000000
--- a/tests/sigkill.c
+++ /dev/null
@@ -1,35 +0,0 @@
-
-#include <errno.h>
-#include <stdio.h>
-#include <signal.h>
-#include <stdlib.h>
-
-static void
-abend (int sig)
-{
-  printf ("Abended on signal %d\n", sig);
-  exit (2);
-}
-
-int
-main (void)
-{
-  struct sigaction  sa;
-
-  int i;
-  for (i = 1; i <= 64; i++) {
-     sa.sa_flags   = 0;
-     sigemptyset( &sa.sa_mask );
-     sa.sa_handler = abend;
-     errno = 0;
-     fprintf(stderr, "setting signal %d: ", i);
-     sigaction (i /*SIGKILL*/, &sa, NULL);
-     perror ("");
-     errno = 0;
-     fprintf(stderr, "getting signal %d: ", i);
-     sigaction (i /*SIGKILL*/, NULL, &sa);
-     perror ("");
-     fprintf(stderr, "\n");
-  }
-  return 0;
-}
diff --git a/tests/signal2.c b/tests/signal2.c
deleted file mode 100644
index f04b1b4..0000000
--- a/tests/signal2.c
+++ /dev/null
@@ -1,19 +0,0 @@
-
-#include <stdio.h>
-#include <signal.h>
-
-void sig_hdlr ( int signo )
-{
-   printf ( "caught sig segv\n" );
-   exit(1);
-}
-
-int main ( void )
-{
-   printf ( "installing sig handler\n" );
-   signal(SIGSEGV, sig_hdlr);
-   printf ( "doing bad thing\n" );
-   * (int*) 65536 = 0;
-   printf ( "exited normally ?!\n" );
-   return 0;
-}
diff --git a/tests/smc1.c b/tests/smc1.c
deleted file mode 100644
index 398f88d..0000000
--- a/tests/smc1.c
+++ /dev/null
@@ -1,72 +0,0 @@
-
-/* Test Heimdall's ability to spot writes to code which has been
-   translated, and discard the out-of-date translations.
-
-   CORRECT output is
-
-      in p 0
-      in q 1
-      in p 2
-      in q 3
-      in p 4
-      in q 5
-      in p 6
-      in q 7
-      in p 8
-      in q 9
-
-  WRONG output (if you fail to spot code-writes to code[0 .. 4]) is
-
-      in p 0
-      in p 1
-      in p 2
-      in p 3
-      in p 4
-      in p 5
-      in p 6
-      in p 7
-      in p 8
-      in p 9
-*/
-
-#include <stdio.h>
-
-typedef unsigned int Addr;
-typedef unsigned char UChar;
-
-void q ( int n )
-{
-   printf("in q %d\n", n);
-}
-
-void p ( int n )
-{
-   printf("in p %d\n", n);
-}
-
-UChar code[100];
-
-/* Make `code' be JMP-32 dest */
-void set_dest ( Addr dest )
-{
-   unsigned int delta;
-   delta = dest - ((Addr)(&code[0]));
-   delta -= 5;
-   
-   code[0] = 0xE9;   /* JMP d32 */
-   code[1] = (delta & 0xFF);
-   code[2] = ((delta >> 8) & 0xFF);
-   code[3] = ((delta >> 16) & 0xFF);
-   code[4] = ((delta >> 24) & 0xFF);
-}
-
-void main ( void )
-{
-   int i;
-   for (i = 0; i < 10; i += 2) {
-      set_dest ( (Addr)&p );
-      (  (void (*)(int)) (&code[0])  ) (i);
-      set_dest ( (Addr)&q );
-      (  (void (*)(int)) (&code[0])  ) (i+1);
-   }
-}
diff --git a/tests/stpcpy.c b/tests/stpcpy.c
deleted file mode 100644
index 54464be..0000000
--- a/tests/stpcpy.c
+++ /dev/null
@@ -1,51 +0,0 @@
-
-#if 0
-/*
-Subject:  valgrind glibc suppression
-   Date:  Fri, 22 Mar 2002 23:54:44 -0500 (EST)
-   From:  Alex Larsson <alexl@redhat.com>
-     To:  jseward@acm.org
-
-
-Hi.
-
-I'm getting a lot of errors in __stpcpy(). I think this may be a bug in 
-glibc. I didn't analyze the stpcpy asm in detail, so it might still be a 
-valgrind bug, but it's probably a glibc bug.
-
-Here is a test case:
-*/
-#endif
-
-#include <string.h>
-#include <stdlib.h>
-
-int main()
-{
-  char *string;
-  char buffer[10];
-  
-  string = malloc (1);
-  string[0] = '\0';
-  
-  stpcpy (buffer, string);
-}
-
-#if 0
-/*
-Gives warnings like:
-==10941== Use of uninitialised CPU condition code
-==10941==    at 0x4034B9DA: __stpcpy (__stpcpy:36)
-==10941==    by 0x402DF627: __libc_start_main (../sysdeps/generic/libc-start.c:129)
-==10941==    by 0x80483D1: __libc_start_main@@GLIBC_2.0 (in /home/alex/other_src/valgrind-20020320/a.out)
-==10941==    by <bogus frame pointer> ???
-
-Here is the supression i use:
-{
-   __stpcpy(Value0)
-   Value0
-   fun:__stpcpy
-   fun:*
-}
-*/
-#endif
diff --git a/tests/trivialleak.c b/tests/trivialleak.c
deleted file mode 100644
index 002a48a..0000000
--- a/tests/trivialleak.c
+++ /dev/null
@@ -1,12 +0,0 @@
-static void test()
-  {
-    void* leak;
-    int i;
-    for (i = 0; i < 1000; i++)
-       leak = (void*)malloc( 100 );
-  }
-  int main()
-  {
-    test();
-    return 0;
-  }
diff --git a/tests/tronical.c b/tests/tronical.c
deleted file mode 100644
index dabd83d..0000000
--- a/tests/tronical.c
+++ /dev/null
@@ -1,37 +0,0 @@
-#include <stdio.h>
-
-struct Foo
-{
-    int a1 : 1;
-    int a2 : 1;
-    int a3 : 1;
-    int a4 : 1;
-    int a5 : 1;
-    int a6 : 1;
-    int a7 : 1;
-    int bleh : 1;
-};
-
-struct Foo* foo;
-
-void set()
-{
-    foo->bleh = 1;
-}
-
-void get()
-{
-    if ( foo->bleh == 0 )
-	printf( "blieb\n" );
-}
-
-int main()
-{
-  foo = malloc(sizeof(struct Foo));
-    set();
-
-    get();
-
-    return 0;
-}
-
diff --git a/tests/tronical.s b/tests/tronical.s
deleted file mode 100644
index ee17c09..0000000
--- a/tests/tronical.s
+++ /dev/null
@@ -1,58 +0,0 @@
-	.file	"tronical.c"
-	.version	"01.01"
-gcc2_compiled.:
-.text
-	.align 4
-.globl set
-	.type	 set,@function
-set:
-	pushl	%ebp
-	movl	foo, %eax
-	orb	$128, (%eax)
-	movl	%esp, %ebp
-	popl	%ebp
-	ret
-.Lfe1:
-	.size	 set,.Lfe1-set
-	.section	.rodata.str1.1,"ams",@progbits,1
-.LC0:
-	.string	"blieb\n"
-.text
-	.align 4
-.globl get
-	.type	 get,@function
-get:
-	pushl	%ebp
-	movl	%esp, %ebp
-	subl	$8, %esp
-	movl	foo, %eax
-	cmpb	$0, (%eax)
-	js	.L4
-	subl	$12, %esp
-	pushl	$.LC0
-	call	printf
-	addl	$16, %esp
-.L4:
-	leave
-	ret
-.Lfe2:
-	.size	 get,.Lfe2-get
-	.align 4
-.globl main
-	.type	 main,@function
-main:
-	pushl	%ebp
-	movl	%esp, %ebp
-	subl	$20, %esp
-	pushl	$4
-	call	malloc
-	movl	%eax, foo
-	call	set
-	call	get
-	xorl	%eax, %eax
-	leave
-	ret
-.Lfe3:
-	.size	 main,.Lfe3-main
-	.comm	foo,4,4
-	.ident	"GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-98)"
diff --git a/tests/oneparam.c b/tests/unused/oneparam.c
similarity index 100%
rename from tests/oneparam.c
rename to tests/unused/oneparam.c
diff --git a/tests/pth_cancel1.c b/tests/unused/pth_cancel1.c
similarity index 100%
rename from tests/pth_cancel1.c
rename to tests/unused/pth_cancel1.c
diff --git a/tests/pth_pause.c b/tests/unused/pth_pause.c
similarity index 100%
rename from tests/pth_pause.c
rename to tests/unused/pth_pause.c
diff --git a/tests/pth_semaphore1.c b/tests/unused/pth_semaphore1.c
similarity index 100%
rename from tests/pth_semaphore1.c
rename to tests/unused/pth_semaphore1.c
diff --git a/tests/pth_signal1.c b/tests/unused/pth_signal1.c
similarity index 100%
rename from tests/pth_signal1.c
rename to tests/unused/pth_signal1.c
diff --git a/tests/pth_signal2.c b/tests/unused/pth_signal2.c
similarity index 100%
rename from tests/pth_signal2.c
rename to tests/unused/pth_signal2.c
diff --git a/tests/pth_signal_gober.c b/tests/unused/pth_signal_gober.c
similarity index 100%
rename from tests/pth_signal_gober.c
rename to tests/unused/pth_signal_gober.c
diff --git a/tests/pth_sigpending.c b/tests/unused/pth_sigpending.c
similarity index 100%
rename from tests/pth_sigpending.c
rename to tests/unused/pth_sigpending.c
diff --git a/tests/pth_simple_mutex.c b/tests/unused/pth_simple_mutex.c
similarity index 100%
rename from tests/pth_simple_mutex.c
rename to tests/unused/pth_simple_mutex.c
diff --git a/tests/pth_simple_threads.c b/tests/unused/pth_simple_threads.c
similarity index 100%
rename from tests/pth_simple_threads.c
rename to tests/unused/pth_simple_threads.c
diff --git a/tests/pth_threadpool.c b/tests/unused/pth_threadpool.c
similarity index 100%
rename from tests/pth_threadpool.c
rename to tests/unused/pth_threadpool.c
diff --git a/tests/pth_yield.c b/tests/unused/pth_yield.c
similarity index 100%
rename from tests/pth_yield.c
rename to tests/unused/pth_yield.c
diff --git a/tests/signal1.c b/tests/unused/signal1.c
similarity index 100%
rename from tests/signal1.c
rename to tests/unused/signal1.c
diff --git a/tests/signal3.c b/tests/unused/signal3.c
similarity index 100%
rename from tests/signal3.c
rename to tests/unused/signal3.c
diff --git a/tests/sigwait_all.c b/tests/unused/sigwait_all.c
similarity index 100%
rename from tests/sigwait_all.c
rename to tests/unused/sigwait_all.c
diff --git a/tests/twoparams.c b/tests/unused/twoparams.c
similarity index 100%
rename from tests/twoparams.c
rename to tests/unused/twoparams.c
diff --git a/tests/twoparams.s b/tests/unused/twoparams.s
similarity index 100%
rename from tests/twoparams.s
rename to tests/unused/twoparams.s
diff --git a/tests/vg_regtest b/tests/vg_regtest
new file mode 100755
index 0000000..d76a1b6
--- /dev/null
+++ b/tests/vg_regtest
@@ -0,0 +1,311 @@
+#! /usr/bin/perl -w
+#
+# Valgrind regression testing script.
+#
+# Each test is defined in a file <test>.vgtest, containing one or more of the
+# following lines:
+#   - prog:   <prog to run>                         (compulsory)
+#   - args:   <args for prog>                       (default: none)
+#   - vgopts: <Valgrind options>                    (default: none)
+#   - stdout_filter: <filter to run stdout through> (default: none)
+#   - stderr_filter: <filter to run stderr through> (default: filter_stderr)
+#
+# Also have "vgopts.hd:" for options to be only passed if --head true, and
+# corresponding "vgopts.er" for --eraser.
+#
+# Expected results (filtered) are kept in <test>.stderr.exp and
+# <test>.stdout.exp.  The latter can be missing if it would be empty.
+#
+# If results don't match, the output can be found in <test>.std<strm>.out,
+# and the diff between expected and actual in <test>.std<strm>.diff.
+#
+# usage: vg_regtest [options] <dirs | files>
+#
+# You can specify individual files to test, or whole directories, or both.
+#
+# Options:
+#   --head:     use 1.0.X expected stderr results
+#   --eraser:   use ERASER expected stderr results    (default)
+#   --all:      run tests in all subdirs
+#   --valgrind: valgrind to use.  Default is one in this build tree.
+#
+# The difference between the 1.0.X and ERASER results is that ERASER gives
+# shorter stack traces.  The ERASER stderr results are kept in 
+# <test>.stderr.er.
+#----------------------------------------------------------------------------
+# Adding a new tests subdirectory:
+# - Add directory to valgrind/configure.in
+# - Write a Makefile.am for it
+# - Write a filter_stderr for it;  it should always call
+#   ../filter_stderr_basic as its first step
+# - Add test programs, .vgtest, .stderr.exp{,.hd}, .stdout.exp files
+#
+# Note that if you add new basis filters in tests/, if they call other basic
+# filters, use the $dir trick to get the directory right as in filter_discards.
+#----------------------------------------------------------------------------
+
+use strict;
+
+#----------------------------------------------------------------------------
+# Global vars
+#----------------------------------------------------------------------------
+my $usage="vg_regtest [--head|--eraser, --all]\n";
+
+my $tmp="vg_regtest.tmp.$$";
+
+# Test variables
+my $vgopts;             # valgrind options
+my $prog;               # test prog
+my $args;               # test prog args
+my $stdout_filter;      # filter program to run stdout results file through
+my $stderr_filter;      # filter program to run stderr results file through
+
+my @failures;           # List of failed tests
+
+my $exp = "";           # --eraser is default
+
+# Assumes we're in tests/
+my $valgrind = "../bin/valgrind";
+
+chomp(my $tests_dir = `pwd`);
+
+# default filter is the one named "filter_stderr" in the test's directory
+my $default_stderr_filter = "filter_stderr";
+
+
+#----------------------------------------------------------------------------
+# Process command line, setup
+#----------------------------------------------------------------------------
+
+# If $prog is a relative path, it prepends $dir to it.  Useful for two reasons:
+#
+# 1. Can prepend "." onto programs to avoid trouble with users who don't have
+#    "." in their path (by making $dir = ".")
+# 2. Can prepend the current dir to make the command absolute to avoid
+#    subsequent trouble when we change directories.
+#
+# Also checks the program exists and is executable.
+sub validate_program ($$) 
+{
+    my ($dir, $prog) = @_;
+
+    # If absolute path, leave it alone.  If relative, make it
+    # absolute -- by prepending current dir -- so we can change
+    # dirs and still use it.
+    $prog = "$dir/$prog" if ($prog !~ /^\//);
+    (-f $prog) or die "`$prog' not found or not a file ($dir)\n";
+    (-x $prog) or die "`$prog' not found or not executable ($dir)\n";
+
+    return $prog;
+}
+
+sub process_command_line() 
+{
+    my $alldirs = 0;
+    my @fs;
+    
+    for my $arg (@ARGV) {
+        if ($arg =~ /^-/) {
+            if      ($arg =~ /^--head$/) {
+                $exp = ".hd";
+            } elsif ($arg =~ /^--eraser$/) {
+                $exp = "";
+            } elsif ($arg =~ /^--all$/) {
+                $alldirs = 1;
+            } elsif ($arg =~ /^--valgrind=(.*)$/) {
+                $valgrind = $1;
+            } else {
+                die $usage;
+            }
+        } else {
+            push(@fs, $arg);
+        }
+    }
+    $valgrind = validate_program($tests_dir, $valgrind);
+
+    if ($alldirs) {
+        @fs = ();
+        foreach my $f (glob "*") {
+            push(@fs, $f) if (-d $f);
+        }
+    }
+
+    (0 != @fs) or die "No test files or directories specified\n";
+
+    return @fs;
+}
+
+#----------------------------------------------------------------------------
+# Read a .vgtest file
+#----------------------------------------------------------------------------
+sub read_vgtest_file($)
+{
+    my ($f) = @_;
+
+    # Defaults.
+    ($vgopts, $prog, $args, $stdout_filter, $stderr_filter) = 
+        ("", undef, "", undef, undef);
+
+    # Every test directory must have a "filter_stderr"
+    $stderr_filter = validate_program(".", $default_stderr_filter);
+
+    open(INPUTFILE, "< $f") || die "File $f not openable\n";
+
+    while (my $line = <INPUTFILE>) {
+        if      ($line =~ /^\s*vgopts:\s*(.*)$/) {
+            $vgopts = $1;
+        } elsif ($line =~ /^\s*prog:\s*(.*)$/) {
+            $prog = validate_program(".", $1);
+        } elsif ($line =~ /^\s*args:\s*(.*)$/) {
+            $args = $1;
+        } elsif ($line =~ /^\s*vgopts\.hd:\s*(.*)$/) {
+            $vgopts = $1 if ($exp eq ".hd");
+        } elsif ($line =~ /^\s*vgopts\.er:\s*(.*)$/) {
+            $vgopts = $1 if ($exp eq "");
+        } elsif ($line =~ /^\s*stdout_filter:\s*(.*)$/) {
+            $stdout_filter = validate_program(".", $1);
+        } elsif ($line =~ /^\s*stderr_filter:\s*(.*)$/) {
+            $stderr_filter = validate_program(".", $1);
+        } else {
+            die "Bad line in $f: $line\n";
+        }
+    }
+    close(INPUTFILE);
+
+    if (!defined $prog) {
+        die "no `prog:' line in `$f'\n";
+    }
+}
+
+#----------------------------------------------------------------------------
+# Do one test
+#----------------------------------------------------------------------------
+# Since most of the program time is spent in system() calls, need this to
+# propagate a Ctrl-C enabling us to quit.
+sub mysystem($) 
+{
+    (system($_[0]) != 2) or exit 1;      # 2 is SIGINT
+}
+
+sub do_one_test($$) 
+{
+    my ($dir, $vgtest) = @_;
+    $vgtest =~ /^(.*)\.vgtest/;
+    my $name = $1;
+    my $fullname = "$dir/$name"; 
+
+    read_vgtest_file($vgtest);
+
+    printf("%-30s valgrind $vgopts $prog $args\n", "$fullname:");
+
+    # If --eraser, pass the apt. --skin option for the directory (can be
+    # overridden by an "args:" or "args.er:" line, though)
+    if ($exp eq ".hd") {
+        mysystem("$valgrind $vgopts $prog $args > $name.stdout.out 2> $name.stderr.out");
+    } else {
+        mysystem("$valgrind --skin=$dir $vgopts $prog $args > $name.stdout.out 2> $name.stderr.out");
+    }
+
+    if (defined $stdout_filter) {
+        mysystem("$stdout_filter < $name.stdout.out > $tmp");
+        rename($tmp, "$name.stdout.out");
+    }
+
+    mysystem("$stderr_filter < $name.stderr.out > $tmp");
+    rename($tmp, "$name.stderr.out");
+
+    # If stdout expected empty, .exp file might be missing so diff with 
+    # /dev/null
+    my $stdout_exp = ( -r "$name.stdout.exp" 
+                     ? "$name.stdout.exp" 
+                     : "/dev/null" );
+
+    # If 1.0.X/HEAD and ERASER versions have the same expected stderr output,
+    # foo.stderr.exp.hd might be missing, so use foo.stderr.exp instead if
+    # --head is true.
+    my $stderr_exp = "$name.stderr.exp$exp";
+    if ($exp eq ".hd" && not -r $stderr_exp) {
+       $stderr_exp = "$name.stderr.exp";
+    }
+    (-r $stderr_exp) or die "Could not read `$stderr_exp'\n";
+
+    mysystem("diff -C0 $stdout_exp $name.stdout.out > $name.stdout.diff");
+    mysystem("diff -C0 $stderr_exp $name.stderr.out > $name.stderr.diff");
+
+    for my $ext ("stdout", "stderr") {
+        if (-s "$name.$ext.diff") {
+            print "*** $fullname failed ($ext) ***\n";
+            push(@failures, sprintf("%-30s $ext", "$fullname"));
+        } else {
+            unlink("$name.$ext.out", "$name.$ext.diff");
+        }
+    }
+}
+
+#----------------------------------------------------------------------------
+# Test one directory
+#----------------------------------------------------------------------------
+sub test_one_dir($) 
+{
+    my ($dir) = @_;
+    $dir =~ s/\/$//;    # trim a trailing '/'
+
+    print "-- Running tests in $dir ----------------------------------\n";
+    chdir($dir) or die "Could not change into $dir\n";
+
+    my @vgtests = glob "*\.vgtest";
+    
+    foreach my $vgtest (@vgtests) {
+        do_one_test($dir, $vgtest);
+    }
+    chdir("..");
+    print "\n";
+}
+
+#----------------------------------------------------------------------------
+# Summarise results
+#----------------------------------------------------------------------------
+sub summarise_results 
+{
+    print "-- Failed tests -------------------------------\n";
+    if (0 == @failures) {
+        print "   (none)\n";
+    } else {
+        foreach my $failure (@failures) {
+            print "$failure\n";
+        }
+    }
+}
+
+#----------------------------------------------------------------------------
+# main(), sort of
+#----------------------------------------------------------------------------
+
+# undefine $VALGRIND_OPTS
+if ( exists $ENV{VALGRIND_OPTS} ) {
+  undef $ENV{VALGRIND_OPTS};
+}
+
+my @fs = process_command_line();
+foreach my $f (@fs) {
+    if (-d $f) {
+        test_one_dir($f);
+    } else { 
+        # Allow the .vgtest suffix to be given or omitted
+        if ($f =~ /.vgtest$/ && -r $f) {
+            # do nothing
+        } elsif (-r "$f.vgtest") {
+            $f = "$f.vgtest";
+        } else {
+            die "`$f' neither a directory nor a readable test file/name\n"
+        }
+        my $dir  = `dirname  $f`;   chomp $dir;
+        my $file = `basename $f`;   chomp $file;
+        chdir($dir) or die "Could not change into $dir\n";
+        do_one_test($dir, $file);
+        chdir($tests_dir);
+    }
+}
+summarise_results();
+
+
diff --git a/tests/weirdioctl.c b/tests/weirdioctl.c
deleted file mode 100644
index a666795..0000000
--- a/tests/weirdioctl.c
+++ /dev/null
@@ -1,43 +0,0 @@
-
-/* A program which sets a readable fd to have a timeout, and therefore
-   needs --weird-hacks=ioctl-VTIME in order to run without
-   blocking. */
-
-#include <stdio.h>
-#include <sys/ioctl.h>
-#include <termio.h>
-
-int main ( void )
-{
-   int c;
-   int res;
-         struct termio tty, oldtty;
-
-          /**
-           ** Save the old tty settings, and get rid of echo
-           ** for the new tty settings
-           **/
-          ioctl(0, TCGETA, &oldtty);
-          tty = oldtty;
-          tty.c_lflag    &= ~(ICANON|ECHO|ECHOE|ECHOK|ECHONL);
-          tty.c_cc[VMIN]  = 0;
-          tty.c_cc[VTIME] = 5;
-          res = ioctl(0, TCSETA, &tty);
-	  printf("first ioctl returned %d\n", res);
-
-          /**
-           ** Now do whatever stuff you want non-echoed
-           **/
-	  while (1) {
-	    c = getchar();
-	    printf("got %d\n", c);
-	  }
-
-          /**
-           ** Now reset the old settings
-           **/
-          res = ioctl(0, TCSETA, &oldtty);
-	  printf("second ioctl returned %d\n", res);
-
-return 0;
-}
diff --git a/valgrind.h b/valgrind.h
index 5a819c7..fe880db 100644
--- a/valgrind.h
+++ b/valgrind.h
@@ -1,27 +1,58 @@
 
 /*
+   ----------------------------------------------------------------
+
+   Notice that the following BSD-style license applies to this one
+   file (valgrind.h) only.  The entire rest of Valgrind is licensed
+   under the terms of the GNU General Public License, version 2.  See
+   the COPYING file in the source distribution for details.
+
+   ----------------------------------------------------------------
+
    This file is part of Valgrind, an x86 protected-mode emulator 
    designed for debugging and profiling binaries on x86-Unixes.
 
-   Copyright (C) 2000-2002 Julian Seward 
-      jseward@acm.org
+   Copyright (C) 2000-2002 Julian Seward.  All rights reserved.
 
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
 
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
+   1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
 
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307, USA.
+   2. The origin of this software must not be misrepresented; you must 
+      not claim that you wrote the original software.  If you use this 
+      software in a product, an acknowledgment in the product 
+      documentation would be appreciated but is not required.
 
-   The GNU General Public License is contained in the file LICENSE.
+   3. Altered source versions must be plainly marked as such, and must
+      not be misrepresented as being the original software.
+
+   4. The name of the author may not be used to endorse or promote 
+      products derived from this software without specific prior written 
+      permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   ----------------------------------------------------------------
+
+   Notice that the above BSD-style license applies to this one file
+   (valgrind.h) only.  The entire rest of Valgrind is licensed under
+   the terms of the GNU General Public License, version 2.  See the
+   COPYING file in the source distribution for details.
+
+   ---------------------------------------------------------------- 
 */
 
 
@@ -31,8 +62,8 @@
 
 /* This file is for inclusion into client (your!) code.
 
-   You can use these macros to manipulate and query memory permissions
-   inside your own programs.
+   You can use these macros to manipulate and query Valgrind's 
+   execution inside your own programs.
 
    The resulting executables will still run without Valgrind, just a
    little bit more slowly than they otherwise would, but otherwise
@@ -84,121 +115,14 @@
 
 /* Some request codes.  There are many more of these, but most are not
    exposed to end-user view.  These are the public ones, all of the
-   form 0x1000 + small_number. 
+   form 0x1000 + small_number.
 */
 
-#define VG_USERREQ__MAKE_NOACCESS        0x1001
-#define VG_USERREQ__MAKE_WRITABLE        0x1002
-#define VG_USERREQ__MAKE_READABLE        0x1003
-#define VG_USERREQ__DISCARD              0x1004
-#define VG_USERREQ__CHECK_WRITABLE       0x1005
-#define VG_USERREQ__CHECK_READABLE       0x1006
-#define VG_USERREQ__MAKE_NOACCESS_STACK  0x1007
-#define VG_USERREQ__RUNNING_ON_VALGRIND  0x1008
-#define VG_USERREQ__DO_LEAK_CHECK        0x1009 /* untested */
-#define VG_USERREQ__DISCARD_TRANSLATIONS 0x100A
-
-
-/* Client-code macros to manipulate the state of memory. */
-
-/* Mark memory at _qzz_addr as unaddressible and undefined for
-   _qzz_len bytes.  Returns an int handle pertaining to the block
-   descriptions Valgrind will use in subsequent error messages. */
-#define VALGRIND_MAKE_NOACCESS(_qzz_addr,_qzz_len)               \
-   ({unsigned int _qzz_res;                                      \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
-                            VG_USERREQ__MAKE_NOACCESS,           \
-                            _qzz_addr, _qzz_len, 0, 0);          \
-    _qzz_res;                                                    \
-   })
-
-/* Similarly, mark memory at _qzz_addr as addressible but undefined
-   for _qzz_len bytes. */
-#define VALGRIND_MAKE_WRITABLE(_qzz_addr,_qzz_len)               \
-   ({unsigned int _qzz_res;                                      \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
-                            VG_USERREQ__MAKE_WRITABLE,           \
-                            _qzz_addr, _qzz_len, 0, 0);          \
-    _qzz_res;                                                    \
-   })
-
-/* Similarly, mark memory at _qzz_addr as addressible and defined
-   for _qzz_len bytes. */
-#define VALGRIND_MAKE_READABLE(_qzz_addr,_qzz_len)               \
-   ({unsigned int _qzz_res;                                      \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
-                            VG_USERREQ__MAKE_READABLE,           \
-                            _qzz_addr, _qzz_len, 0, 0);          \
-    _qzz_res;                                                    \
-   })
-
-/* Discard a block-description-handle obtained from the above three
-   macros.  After this, Valgrind will no longer be able to relate
-   addressing errors to the user-defined block associated with the
-   handle.  The permissions settings associated with the handle remain
-   in place.  Returns 1 for an invalid handle, 0 for a valid
-   handle. */
-#define VALGRIND_DISCARD(_qzz_blkindex)                          \
-   ({unsigned int _qzz_res;                                      \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
-                            VG_USERREQ__DISCARD,                 \
-                            0, _qzz_blkindex, 0, 0);             \
-    _qzz_res;                                                    \
-   })
-
-
-
-/* Client-code macros to check the state of memory. */
-
-/* Check that memory at _qzz_addr is addressible for _qzz_len bytes.
-   If suitable addressibility is not established, Valgrind prints an
-   error message and returns the address of the first offending byte.
-   Otherwise it returns zero. */
-#define VALGRIND_CHECK_WRITABLE(_qzz_addr,_qzz_len)                \
-   ({unsigned int _qzz_res;                                        \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
-                            VG_USERREQ__CHECK_WRITABLE,            \
-                            _qzz_addr, _qzz_len, 0, 0);            \
-    _qzz_res;                                                      \
-   })
-
-/* Check that memory at _qzz_addr is addressible and defined for
-   _qzz_len bytes.  If suitable addressibility and definedness are not
-   established, Valgrind prints an error message and returns the
-   address of the first offending byte.  Otherwise it returns zero. */
-#define VALGRIND_CHECK_READABLE(_qzz_addr,_qzz_len)                \
-   ({unsigned int _qzz_res;                                        \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
-                            VG_USERREQ__CHECK_READABLE,            \
-                            _qzz_addr, _qzz_len, 0, 0);            \
-    _qzz_res;                                                      \
-   })
-
-
-/* Use this macro to force the definedness and addressibility of a
-   value to be checked.  If suitable addressibility and definedness
-   are not established, Valgrind prints an error message and returns
-   the address of the first offending byte.  Otherwise it returns
-   zero. */
-#define VALGRIND_CHECK_DEFINED(__lvalue)                           \
-   (void)                                                          \
-   VALGRIND_CHECK_READABLE(                                        \
-      (volatile unsigned char *)&(__lvalue),                       \
-                      (unsigned int)(sizeof (__lvalue)))
-
-
-
-/* Mark memory, intended to be on the client's stack, at _qzz_addr as
-   unaddressible and undefined for _qzz_len bytes.  Does not return a
-   value.  The record associated with this setting will be
-   automatically removed by Valgrind when the containing routine
-   exits. */
-#define VALGRIND_MAKE_NOACCESS_STACK(_qzz_addr,_qzz_len)           \
-   {unsigned int _qzz_res;                                         \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
-                            VG_USERREQ__MAKE_NOACCESS_STACK,       \
-                            _qzz_addr, _qzz_len, 0, 0);            \
-   }
+typedef
+   enum { VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001,
+          VG_USERREQ__DISCARD_TRANSLATIONS,
+          VG_USERREQ__FINAL_DUMMY_CLIENT_REQUEST,
+   } Vg_ClientRequest;
 
 
 /* Returns 1 if running on Valgrind, 0 if running on the real CPU. 
@@ -212,22 +136,6 @@
    })
 
 
-/* Mark memory, intended to be on the client's stack, at _qzz_addr as
-   unaddressible and undefined for _qzz_len bytes.  Does not return a
-   value.  The record associated with this setting will be
-   automatically removed by Valgrind when the containing routine
-   exits.  
-
-   Currently implemented but untested.
-*/
-#define VALGRIND_DO_LEAK_CHECK                                     \
-   {unsigned int _qzz_res;                                         \
-    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
-                            VG_USERREQ__DO_LEAK_CHECK,             \
-                            0, 0, 0, 0);                           \
-   }
-
-
 /* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
    _qzz_len - 1].  Useful if you are debugging a JITter or some such,
    since it provides a way to make sure valgrind will retranslate the
diff --git a/valgrind.in b/valgrind.in
index 7b99277..4fee909 100755
--- a/valgrind.in
+++ b/valgrind.in
@@ -1,11 +1,37 @@
 #!/bin/sh
+##--------------------------------------------------------------------##
+##--- The startup script.                                 valgrind ---##
+##--------------------------------------------------------------------##
+
+#  This file is part of Valgrind, an x86 protected-mode emulator 
+#  designed for debugging and profiling binaries on x86-Unixes.
+#
+#  Copyright (C) 2002 Julian Seward
+#     jseward@acm.org
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License as
+#  published by the Free Software Foundation; either version 2 of the
+#  License, or (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+#  02111-1307, USA.
+#
+#  The GNU General Public License is contained in the file COPYING.
+
 
 # Should point to the installation directory
 prefix="@prefix@"
 exec_prefix="@exec_prefix@"
 VALGRIND="@libdir@/valgrind"
 
-
 # Other stuff ...
 version="@VERSION@"
 emailto="jseward@acm.org"
@@ -13,168 +39,57 @@
 # The default name of the suppressions file
 vgsupp="--suppressions=$VALGRIND/default.supp"
 
-# name we were invoked with
-vgname=`echo $0 | sed 's,^.*/,,'`
-
 # Valgrind options
 vgopts=
 
-# Prog and arg to run
-argopts=
+# --skin=<foo> arg, specifying skin used
+skin_arg=
 
-# Show usage info?
-dousage=0
-
-# show version info?
-doversion=0
-
-# Collect up args for Valgrind
+# Collect up args for Valgrind.  Only --version and --skin are intercepted 
+# here;  the rest are passed to vg_main.c.
 while [ $+ != 0 ]
 do
   arg=$1
   case "$arg" in
-#   options for the user
-    --help)                 dousage=1; break;;
-    --version)              doversion=1; break;;
-    --logfile-fd=*)         vgopts="$vgopts $arg"; shift;;
-    -v)                     vgopts="$vgopts $arg"; shift;;
-    --verbose)              vgopts="$vgopts -v"; shift;;
-    -q)                     vgopts="$vgopts $arg"; shift;;
-    --quiet)                vgopts="$vgopts $arg"; shift;;
-    --error-limit=no)       vgopts="$vgopts $arg"; shift;;
-    --error-limit=yes)      vgopts="$vgopts $arg"; shift;;
-    --check-addrVs=no)      vgopts="$vgopts $arg"; shift;;
-    --check-addrVs=yes)     vgopts="$vgopts $arg"; shift;;
-    --gdb-attach=no)        vgopts="$vgopts $arg"; shift;;
-    --gdb-attach=yes)       vgopts="$vgopts $arg"; shift;;
-    --demangle=no)          vgopts="$vgopts $arg"; shift;;
-    --demangle=yes)         vgopts="$vgopts $arg"; shift;;
-    --num-callers=*)        vgopts="$vgopts $arg"; shift;;
-    --partial-loads-ok=no)  vgopts="$vgopts $arg"; shift;;
-    --partial-loads-ok=yes) vgopts="$vgopts $arg"; shift;;
-    --leak-check=no)        vgopts="$vgopts $arg"; shift;;
-    --leak-check=yes)       vgopts="$vgopts $arg"; shift;;
-    --show-reachable=no)    vgopts="$vgopts $arg"; shift;;
-    --show-reachable=yes)   vgopts="$vgopts $arg"; shift;;
-    --leak-resolution=low)  vgopts="$vgopts $arg"; shift;;
-    --leak-resolution=med)  vgopts="$vgopts $arg"; shift;;
-    --leak-resolution=high) vgopts="$vgopts $arg"; shift;;
-    --sloppy-malloc=no)     vgopts="$vgopts $arg"; shift;;
-    --sloppy-malloc=yes)    vgopts="$vgopts $arg"; shift;;
-    --alignment=*)          vgopts="$vgopts $arg"; shift;;
-    --trace-children=no)    vgopts="$vgopts $arg"; shift;;
-    --trace-children=yes)   vgopts="$vgopts $arg"; shift;;
-    --workaround-gcc296-bugs=no)    vgopts="$vgopts $arg"; shift;;
-    --workaround-gcc296-bugs=yes)   vgopts="$vgopts $arg"; shift;;
-    --freelist-vol=*)       vgopts="$vgopts $arg"; shift;;
-    --suppressions=*)       vgopts="$vgopts $arg"; shift;;
-    --cachesim=yes)         vgopts="$vgopts $arg"; shift;;
-    --cachesim=no)          vgopts="$vgopts $arg"; shift;;
-    --I1=*,*,*)             vgopts="$vgopts $arg"; shift;;
-    --D1=*,*,*)             vgopts="$vgopts $arg"; shift;;
-    --L2=*,*,*)             vgopts="$vgopts $arg"; shift;;
-    --weird-hacks=*)        vgopts="$vgopts $arg"; shift;;
-#   options for debugging Valgrind
-    --sanity-level=*)       vgopts="$vgopts $arg"; shift;;
-    --single-step=yes)      vgopts="$vgopts $arg"; shift;;
-    --single-step=no)       vgopts="$vgopts $arg"; shift;;
-    --optimise=yes)         vgopts="$vgopts $arg"; shift;;
-    --optimise=no)          vgopts="$vgopts $arg"; shift;;
-    --instrument=yes)       vgopts="$vgopts $arg"; shift;;
-    --instrument=no)        vgopts="$vgopts $arg"; shift;;
-    --cleanup=yes)          vgopts="$vgopts $arg"; shift;;
-    --cleanup=no)           vgopts="$vgopts $arg"; shift;;
-    --smc-check=none)       vgopts="$vgopts $arg"; shift;;
-    --smc-check=some)       vgopts="$vgopts $arg"; shift;;
-    --smc-check=all)        vgopts="$vgopts $arg"; shift;;
-    --trace-syscalls=yes)   vgopts="$vgopts $arg"; shift;;
-    --trace-syscalls=no)    vgopts="$vgopts $arg"; shift;;
-    --trace-signals=yes)    vgopts="$vgopts $arg"; shift;;
-    --trace-signals=no)     vgopts="$vgopts $arg"; shift;;
-    --trace-symtab=yes)     vgopts="$vgopts $arg"; shift;;
-    --trace-symtab=no)      vgopts="$vgopts $arg"; shift;;
-    --trace-malloc=yes)     vgopts="$vgopts $arg"; shift;;
-    --trace-malloc=no)      vgopts="$vgopts $arg"; shift;;
-    --trace-sched=yes)      vgopts="$vgopts $arg"; shift;;
-    --trace-sched=no)       vgopts="$vgopts $arg"; shift;;
-    --trace-pthread=none)   vgopts="$vgopts $arg"; shift;;
-    --trace-pthread=some)   vgopts="$vgopts $arg"; shift;;
-    --trace-pthread=all)    vgopts="$vgopts $arg"; shift;;
-    --stop-after=*)         vgopts="$vgopts $arg"; shift;;
-    --dump-error=*)         vgopts="$vgopts $arg"; shift;;
-    -*)                     dousage=1; break;;
+    --version)              echo "valgrind-$version"; exit 1 ;;
+    --skin=*)               skin_arg=$arg;            shift;;
+    -*)                     vgopts="$vgopts $arg";    shift;;
     *)                      break;;
   esac
 done
 
-if [ z"$doversion" = z1 ]; then
-   echo "valgrind-$version"
+
+# Decide on the skin.  Default to memory checking if not specified.
+if [ z"$skin_arg" = z ]; then
+   skin=memcheck
+else
+   # Hack off the "--skin=" prefix.
+   skin=`echo $skin_arg | sed 's/--skin=//'`
+fi
+
+# Setup skin shared object.
+skin_so="vgskin_${skin}.so"
+if [ ! -r $VALGRIND/$skin_so ] ; then
+   echo
+   echo "Extension error:"
+   echo "  The shared library \`$skin_so' for the chosen"
+   echo "  skin \`$skin' could not be found in"
+   echo "  $VALGRIND"
+   echo
    exit 1
 fi
 
-if [ $# = 0 ] || [ z"$dousage" = z1 ]; then
-   echo
-   echo "usage: $vgname [options] prog-and-args"
-   echo
-   echo "  options for the user, with defaults in [ ], are:"
-   echo "    --help                    show this message"
-   echo "    --version                 show version"
-   echo "    -q --quiet                run silently; only print error msgs"
-   echo "    -v --verbose              be more verbose, incl counts of errors"
-   echo "    --gdb-attach=no|yes       start GDB when errors detected? [no]"
-   echo "    --demangle=no|yes         automatically demangle C++ names? [yes]"
-   echo "    --num-callers=<number>    show <num> callers in stack traces [4]"
-   echo "    --error-limit=no|yes      stop showing new errors if too many? [yes]"
-   echo "    --partial-loads-ok=no|yes too hard to explain here; see manual [yes]"
-   echo "    --leak-check=no|yes       search for memory leaks at exit? [no]"
-   echo "    --leak-resolution=low|med|high"
-   echo "                              amount of bt merging in leak check [low]"
-   echo "    --show-reachable=no|yes   show reachable blocks in leak check? [no]"
-   echo "    --sloppy-malloc=no|yes    round malloc sizes to next word? [no]"
-   echo "    --alignment=<number>      set minimum alignment of allocations [4]"
-   echo "    --trace-children=no|yes   Valgrind-ise child processes? [no]"
-   echo "    --logfile-fd=<number>     file descriptor for messages [2=stderr]"
-   echo "    --freelist-vol=<number>   volume of freed blocks queue [1000000]"
-   echo "    --workaround-gcc296-bugs=no|yes  self explanatory [no]"
-   echo "    --suppressions=<filename> suppress errors described in"
-   echo "                              suppressions file <filename>"
-   echo "    --check-addrVs=no|yes     experimental lighterweight checking? [yes]"
-   echo "                              yes == Valgrind's original behaviour"
-   echo "    --cachesim=no|yes         do cache profiling? [no]"
-   echo "    --I1=<size>,<assoc>,<line_size>  set I1 cache manually"
-   echo "    --D1=<size>,<assoc>,<line_size>  set D1 cache manually"
-   echo "    --L2=<size>,<assoc>,<line_size>  set L2 cache manually"
-   echo "    --weird-hacks=hack1,hack2,...  [no hacks selected]"
-   echo "         recognised hacks are: ioctl-VTIME truncate-writes"
-   echo ""
-   echo
-   echo "  options for debugging Valgrind itself are:"
-   echo "    --sanity-level=<number>   level of sanity checking to do [1]"
-   echo "    --single-step=no|yes      translate each instr separately? [no]"
-   echo "    --optimise=no|yes         improve intermediate code? [yes]"
-   echo "    --instrument=no|yes       actually do memory checks? [yes]"
-   echo "    --cleanup=no|yes          improve after instrumentation? [yes]"
-   echo "    --smc-check=none|some|all check writes for s-m-c? [some]"
-   echo "    --trace-syscalls=no|yes   show all system calls? [no]"
-   echo "    --trace-signals=no|yes    show signal handling details? [no]"
-   echo "    --trace-symtab=no|yes     show symbol table details? [no]"
-   echo "    --trace-malloc=no|yes     show client malloc details? [no]"
-   echo "    --trace-sched=no|yes      show thread scheduler details? [no]"
-   echo "    --trace-pthread=none|some|all  show pthread event details? [no]"
-   echo "    --stop-after=<number>     switch to real CPU after executing"
-   echo "                              <number> basic blocks [infinity]"
-   echo "    --dump-error=<number>     show translation for basic block"
-   echo "                              associated with <number>'th"
-   echo "                              error context [0=don't show any]"
-   echo
-   echo "  Extra options are read from env variable \$VALGRIND_OPTS"
-   echo
-   echo "  Valgrind is Copyright (C) 2000-2002 Julian Seward"
-   echo "  and licensed under the GNU General Public License, version 2."
-   echo "  Bug reports, feedback, admiration, abuse, etc, to: $emailto."
-   echo
-   exit 1
+VG_CMD="$@"
+VG_ARGS="$VALGRIND_OPTS $vgsupp $vgopts"
+
+# If no command given, act like -h was given so vg_main.c prints out
+# the usage string.  And set VG_CMD to be any program, doesn't matter which
+# because it won't be run anyway (we use 'true').
+if [ z"$VG_CMD" = z ] ; then
+   VG_ARGS="$VG_ARGS -h"
+   VG_CMD=true
 fi
+export VG_ARGS
 
 # A bit subtle.  The LD_PRELOAD added entry must be absolute
 # and not depend on LD_LIBRARY_PATH.  This is so that we can
@@ -182,13 +97,19 @@
 # libpthread.so fall out of visibility, independently of
 # whether valgrind.so is visible.
 
-VG_ARGS="$VALGRIND_OPTS $vgsupp $vgopts"
-export VG_ARGS
 LD_LIBRARY_PATH=$VALGRIND:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH
-LD_PRELOAD=$VALGRIND/valgrind.so:$LD_PRELOAD
+
+# Insert skin .so before valgrind.so to override template functions.
+LD_PRELOAD=$VALGRIND/$skin_so:$VALGRIND/valgrind.so:$LD_PRELOAD
 export LD_PRELOAD
 #LD_DEBUG=files
 #LD_DEBUG=symbols
 #export LD_DEBUG
-exec "$@"
+
+exec $VG_CMD
+
+##--------------------------------------------------------------------##
+##--- end                                                 valgrind ---##
+##--------------------------------------------------------------------##
+
diff --git a/vg_addrcheck.c b/vg_addrcheck.c
new file mode 100644
index 0000000..a8d9075
--- /dev/null
+++ b/vg_addrcheck.c
@@ -0,0 +1,2587 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The AddrCheck skin: like MemCheck, but only does address     ---*/
+/*--- checking.  No definedness checking.                          ---*/
+/*---                                               vg_addrcheck.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_addrcheck_include.h"
+//#include "vg_profile.c"
+
+/*------------------------------------------------------------*/
+/*--- Defns                                                ---*/
+/*------------------------------------------------------------*/
+
+/* These many bytes below %ESP are considered addressible if we're
+   doing the --workaround-gcc296-bugs hack. */
+#define VG_GCC296_BUG_STACK_SLOP 1024
+
+
+typedef 
+   enum { 
+      /* Bad syscall params */
+      ParamSupp,
+      /* Memory errors in core (pthread ops, signal handling) */
+      CoreMemSupp,
+      /* Invalid read/write attempt at given size */
+      Addr1Supp, Addr2Supp, Addr4Supp, Addr8Supp,
+      /* Invalid or mismatching free */
+      FreeSupp
+   } 
+   AddrCheckSuppKind;
+
+/* What kind of error it is. */
+typedef 
+   enum { CoreMemErr,
+          AddrErr, 
+          ParamErr, UserErr,  /* behaves like an anonymous ParamErr */
+          FreeErr, FreeMismatchErr
+   }
+   AddrCheckErrorKind;
+
+/* What kind of memory access is involved in the error? */
+typedef
+   enum { ReadAxs, WriteAxs, ExecAxs }
+   AxsKind;
+
+/* Extra context for memory errors */
+typedef
+   struct {
+      /* AddrErr */
+      AxsKind axskind;
+      /* AddrErr */
+      Int size;
+      /* AddrErr, FreeErr, FreeMismatchErr, ParamErr, UserErr */
+      AcAddrInfo addrinfo;
+      /* ParamErr, UserErr, CoreMemErr */
+      Bool isWrite;
+   }
+   AddrCheckError;
+
+/*------------------------------------------------------------*/
+/*--- Comparing and printing errors                        ---*/
+/*------------------------------------------------------------*/
+
+static __inline__
+void clear_AcAddrInfo ( AcAddrInfo* ai )
+{
+   ai->akind      = Unknown;
+   ai->blksize    = 0;
+   ai->rwoffset   = 0;
+   ai->lastchange = NULL;
+   ai->stack_tid  = VG_INVALID_THREADID;
+   ai->maybe_gcc  = False;
+}
+
+static __inline__
+void clear_AddrCheckError ( AddrCheckError* err_extra )
+{
+   err_extra->axskind   = ReadAxs;
+   err_extra->size      = 0;
+   clear_AcAddrInfo ( &err_extra->addrinfo );
+   err_extra->isWrite   = False;
+}
+
+__attribute__((unused))
+static Bool eq_AcAddrInfo ( VgRes res, AcAddrInfo* ai1, AcAddrInfo* ai2 )
+{
+   if (ai1->akind != Undescribed 
+       && ai2->akind != Undescribed
+       && ai1->akind != ai2->akind) 
+      return False;
+   if (ai1->akind == Freed || ai1->akind == Mallocd) {
+      if (ai1->blksize != ai2->blksize)
+         return False;
+      if (!VG_(eq_ExeContext)(res, ai1->lastchange, ai2->lastchange))
+         return False;
+   }
+   return True;
+}
+
+/* Compare error contexts, to detect duplicates.  Note that if they
+   are otherwise the same, the faulting addrs and associated rwoffsets
+   are allowed to be different.  */
+
+Bool SK_(eq_SkinError) ( VgRes res,
+                         SkinError* e1, SkinError* e2 )
+{
+   AddrCheckError* e1_extra = e1->extra;
+   AddrCheckError* e2_extra = e2->extra;
+   
+   switch (e1->ekind) {
+      case CoreMemErr:
+         if (e1_extra->isWrite != e2_extra->isWrite)   return False;
+         if (e2->ekind != CoreMemErr)                  return False; 
+         if (e1->string == e2->string)                 return True;
+         if (0 == VG_(strcmp)(e1->string, e2->string)) return True;
+         return False;
+
+      case UserErr:
+      case ParamErr:
+         if (e1_extra->isWrite != e2_extra->isWrite)
+            return False;
+         if (e1->ekind == ParamErr 
+             && 0 != VG_(strcmp)(e1->string, e2->string))
+            return False;
+         return True;
+
+      case FreeErr:
+      case FreeMismatchErr:
+         /* JRS 2002-Aug-26: comparing addrs seems overkill and can
+            cause excessive duplication of errors.  Not even AddrErr
+            below does that.  So don't compare either the .addr field
+            or the .addrinfo fields. */
+         /* if (e1->addr != e2->addr) return False; */
+         /* if (!eq_AcAddrInfo(res, &e1_extra->addrinfo, &e2_extra->addrinfo)) 
+               return False;
+         */
+         return True;
+
+      case AddrErr:
+         /* if (e1_extra->axskind != e2_extra->axskind) return False; */
+         if (e1_extra->size != e2_extra->size) return False;
+         /*
+         if (!eq_AcAddrInfo(res, &e1_extra->addrinfo, &e2_extra->addrinfo)) 
+            return False;
+         */
+         return True;
+
+      default: 
+         VG_(printf)("Error:\n  unknown AddrCheck error code %d\n", e1->ekind);
+         VG_(panic)("unknown error code in SK_(eq_SkinError)");
+   }
+}
+
+static void pp_AcAddrInfo ( Addr a, AcAddrInfo* ai )
+{
+   switch (ai->akind) {
+      case Stack: 
+         VG_(message)(Vg_UserMsg, 
+                      "   Address 0x%x is on thread %d's stack", 
+                      a, ai->stack_tid);
+         break;
+      case Unknown:
+         if (ai->maybe_gcc) {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is just below %%esp.  Possibly a bug in GCC/G++",
+               a);
+            VG_(message)(Vg_UserMsg, 
+               "   v 2.96 or 3.0.X.  To suppress, use: --workaround-gcc296-bugs=yes");
+	 } else {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is not stack'd, malloc'd or free'd", a);
+         }
+         break;
+      case Freed: case Mallocd: {
+         UInt delta;
+         UChar* relative;
+         if (ai->rwoffset < 0) {
+            delta    = (UInt)(- ai->rwoffset);
+            relative = "before";
+         } else if (ai->rwoffset >= ai->blksize) {
+            delta    = ai->rwoffset - ai->blksize;
+            relative = "after";
+         } else {
+            delta    = ai->rwoffset;
+            relative = "inside";
+         }
+         {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is %d bytes %s a block of size %d %s",
+               a, delta, relative, 
+               ai->blksize,
+               ai->akind==Mallocd ? "alloc'd" 
+                  : ai->akind==Freed ? "free'd" 
+                                     : "client-defined");
+         }
+         VG_(pp_ExeContext)(ai->lastchange);
+         break;
+      }
+      default:
+         VG_(panic)("pp_AcAddrInfo");
+   }
+}
+
+void SK_(pp_SkinError) ( SkinError* err, void (*pp_ExeContext)(void) )
+{
+   AddrCheckError* err_extra = err->extra;
+
+   switch (err->ekind) {
+      case CoreMemErr:
+         if (err_extra->isWrite) {
+            VG_(message)(Vg_UserMsg, 
+               "%s contains unaddressable byte(s)", err->string );
+         } else {
+            VG_(message)(Vg_UserMsg, 
+               "%s contains unaddressable byte(s)", err->string );
+         }
+         pp_ExeContext();
+         break;
+      
+      case AddrErr:
+         switch (err_extra->axskind) {
+            case ReadAxs:
+            case WriteAxs:
+               /* These two aren't actually differentiated ever. */
+               VG_(message)(Vg_UserMsg, "Invalid memory access of size %d", 
+                                        err_extra->size ); 
+               break;
+            case ExecAxs:
+               VG_(message)(Vg_UserMsg, "Jump to the invalid address "
+                                        "stated on the next line");
+               break;
+            default: 
+               VG_(panic)("pp_SkinError(axskind)");
+         }
+         pp_ExeContext();
+         pp_AcAddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      case FreeErr:
+         VG_(message)(Vg_UserMsg,"Invalid free() / delete / delete[]");
+         /* fall through */
+      case FreeMismatchErr:
+         if (err->ekind == FreeMismatchErr)
+            VG_(message)(Vg_UserMsg, 
+                         "Mismatched free() / delete / delete []");
+         pp_ExeContext();
+         pp_AcAddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      case ParamErr:
+         if (err_extra->isWrite) {
+            VG_(message)(Vg_UserMsg, 
+               "Syscall param %s contains unaddressable byte(s)",
+                err->string );
+         } else {
+            VG_(message)(Vg_UserMsg, 
+                "Syscall param %s contains uninitialised or "
+                "unaddressable byte(s)",
+            err->string);
+         }
+         pp_ExeContext();
+         pp_AcAddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      case UserErr:
+         if (err_extra->isWrite) {
+            VG_(message)(Vg_UserMsg, 
+               "Unaddressable byte(s) found during client check request");
+         } else {
+            VG_(message)(Vg_UserMsg, 
+               "Uninitialised or "
+               "unaddressable byte(s) found during client check request");
+         }
+         pp_ExeContext();
+         pp_AcAddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      default: 
+         VG_(printf)("Error:\n  unknown AddrCheck error code %d\n", err->ekind);
+         VG_(panic)("unknown error code in SK_(pp_SkinError)");
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Recording errors                                     ---*/
+/*------------------------------------------------------------*/
+
+/* Describe an address as best you can, for error messages,
+   putting the result in ai. */
+
+static void describe_addr ( Addr a, AcAddrInfo* ai )
+{
+   ShadowChunk* sc;
+   ThreadId     tid;
+
+   /* Nested functions, yeah.  Need the lexical scoping of 'a'. */ 
+
+   /* Closure for searching thread stacks */
+   Bool addr_is_in_bounds(Addr stack_min, Addr stack_max)
+   {
+      return (stack_min <= a && a <= stack_max);
+   }
+   /* Closure for searching malloc'd and free'd lists */
+   Bool addr_is_in_block(ShadowChunk *sh_ch)
+   {
+      return VG_(addr_is_in_block) ( a, sh_ch->data, sh_ch->size );
+   }
+   /* Perhaps it's on a thread's stack? */
+   tid = VG_(any_matching_thread_stack)(addr_is_in_bounds);
+   if (tid != VG_INVALID_THREADID) {
+      ai->akind     = Stack;
+      ai->stack_tid = tid;
+      return;
+   }
+   /* Search for a recently freed block which might bracket it. */
+   sc = SK_(any_matching_freed_ShadowChunks)(addr_is_in_block);
+   if (NULL != sc) {
+      ai->akind      = Freed;
+      ai->blksize    = sc->size;
+      ai->rwoffset   = (Int)(a) - (Int)(sc->data);
+      ai->lastchange = (ExeContext*)sc->skin_extra[0];
+      return;
+   }
+   /* Search for a currently malloc'd block which might bracket it. */
+   sc = VG_(any_matching_mallocd_ShadowChunks)(addr_is_in_block);
+   if (NULL != sc) {
+      ai->akind      = Mallocd;
+      ai->blksize    = sc->size;
+      ai->rwoffset   = (Int)(a) - (Int)(sc->data);
+      ai->lastchange = (ExeContext*)sc->skin_extra[0];
+      return;
+   } 
+   /* Clueless ... */
+   ai->akind = Unknown;
+   return;
+}
+
+
+/* Creates a copy of the err_extra, updates the copy with address info if
+   necessary, sticks the copy into the SkinError. */
+void SK_(dup_extra_and_update)(SkinError* err)
+{
+   AddrCheckError* err_extra;
+
+   err_extra  = VG_(malloc)(sizeof(AddrCheckError));
+   *err_extra = *((AddrCheckError*)err->extra);
+
+   if (err_extra->addrinfo.akind == Undescribed)
+      describe_addr ( err->addr, &(err_extra->addrinfo) );
+
+   err->extra = err_extra;
+}
+
+/* Is this address within some small distance below %ESP?  Used only
+   for the --workaround-gcc296-bugs kludge. */
+Bool VG_(is_just_below_ESP)( Addr esp, Addr aa )
+{
+   if ((UInt)esp > (UInt)aa
+       && ((UInt)esp - (UInt)aa) <= VG_GCC296_BUG_STACK_SLOP)
+      return True;
+   else
+      return False;
+}
+
+static
+void sk_record_address_error ( Addr a, Int size, Bool isWrite )
+{
+   AddrCheckError err_extra;
+   Bool           just_below_esp;
+
+   just_below_esp 
+      = VG_(is_just_below_ESP)( VG_(get_stack_pointer)(), a );
+
+   /* If this is caused by an access immediately below %ESP, and the
+      user asks nicely, we just ignore it. */
+   if (SK_(clo_workaround_gcc296_bugs) && just_below_esp)
+      return;
+
+   clear_AddrCheckError( &err_extra );
+   err_extra.axskind = isWrite ? WriteAxs : ReadAxs;
+   err_extra.size    = size;
+   err_extra.addrinfo.akind     = Undescribed;
+   err_extra.addrinfo.maybe_gcc = just_below_esp;
+   VG_(maybe_record_error)( NULL, AddrErr, a, /*s*/NULL, &err_extra );
+}
+
+/* These ones are called from non-generated code */
+
+/* This is for memory errors in pthread functions, as opposed to pthread API
+   errors which are found by the core. */
+void SK_(record_core_mem_error) ( ThreadState* tst, Bool isWrite, Char* msg )
+{
+   AddrCheckError err_extra;
+
+   clear_AddrCheckError( &err_extra );
+   err_extra.isWrite = isWrite;
+   VG_(maybe_record_error)( tst, CoreMemErr, /*addr*/0, msg, &err_extra );
+}
+
+void SK_(record_param_error) ( ThreadState* tst, Addr a, Bool isWrite, 
+                               Char* msg )
+{
+   AddrCheckError err_extra;
+
+   vg_assert(NULL != tst);
+   clear_AddrCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   err_extra.isWrite = isWrite;
+   VG_(maybe_record_error)( tst, ParamErr, a, msg, &err_extra );
+}
+
+void SK_(record_jump_error) ( ThreadState* tst, Addr a )
+{
+   AddrCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_AddrCheckError( &err_extra );
+   err_extra.axskind = ExecAxs;
+   err_extra.addrinfo.akind = Undescribed;
+   VG_(maybe_record_error)( tst, AddrErr, a, /*s*/NULL, &err_extra );
+}
+
+void SK_(record_free_error) ( ThreadState* tst, Addr a ) 
+{
+   AddrCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_AddrCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   VG_(maybe_record_error)( tst, FreeErr, a, /*s*/NULL, &err_extra );
+}
+
+void SK_(record_freemismatch_error) ( ThreadState* tst, Addr a )
+{
+   AddrCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_AddrCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   VG_(maybe_record_error)( tst, FreeMismatchErr, a, /*s*/NULL, &err_extra );
+}
+
+void SK_(record_user_error) ( ThreadState* tst, Addr a, Bool isWrite )
+{
+   AddrCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_AddrCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   err_extra.isWrite        = isWrite;
+   VG_(maybe_record_error)( tst, UserErr, a, /*s*/NULL, &err_extra );
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Suppressions                                         ---*/
+/*------------------------------------------------------------*/
+
+#define STREQ(s1,s2) (s1 != NULL && s2 != NULL \
+                      && VG_(strcmp)((s1),(s2))==0)
+
+Bool SK_(recognised_suppression) ( Char* name, SuppKind *skind )
+{
+   if      (STREQ(name, "Param"))   *skind = ParamSupp;
+   else if (STREQ(name, "CoreMem")) *skind = CoreMemSupp;
+   else if (STREQ(name, "Addr1"))   *skind = Addr1Supp;
+   else if (STREQ(name, "Addr2"))   *skind = Addr2Supp;
+   else if (STREQ(name, "Addr4"))   *skind = Addr4Supp;
+   else if (STREQ(name, "Addr8"))   *skind = Addr8Supp;
+   else if (STREQ(name, "Free"))    *skind = FreeSupp;
+   else 
+      return False;
+
+   return True;
+}
+
+Bool SK_(read_extra_suppression_info) ( Int fd, Char* buf, Int nBuf, 
+                                         SkinSupp *s )
+{
+   Bool eof;
+
+   if (s->skind == ParamSupp) {
+      eof = VG_(getLine) ( fd, buf, nBuf );
+      if (eof) return False;
+      s->string = VG_(strdup)(buf);
+   }
+   return True;
+}
+
+extern Bool SK_(error_matches_suppression)(SkinError* err, SkinSupp* su)
+{
+   UInt su_size;
+   AddrCheckError* err_extra = err->extra;
+
+   switch (su->skind) {
+      case ParamSupp:
+         return (err->ekind == ParamErr && STREQ(su->string, err->string));
+
+      case CoreMemSupp:
+         return (err->ekind == CoreMemErr && STREQ(su->string, err->string));
+
+      case Addr1Supp: su_size = 1; goto addr_case;
+      case Addr2Supp: su_size = 2; goto addr_case;
+      case Addr4Supp: su_size = 4; goto addr_case;
+      case Addr8Supp: su_size = 8; goto addr_case;
+      addr_case:
+         return (err->ekind == AddrErr && err_extra->size != su_size);
+
+      case FreeSupp:
+         return (err->ekind == FreeErr || err->ekind == FreeMismatchErr);
+
+      default:
+         VG_(printf)("Error:\n"
+                     "  unknown AddrCheck suppression type %d\n", su->skind);
+         VG_(panic)("unknown suppression type in "
+                    "SK_(error_matches_suppression)");
+   }
+}
+
+#  undef STREQ
+
+
+/*--------------------------------------------------------------------*/
+/*--- Part of the AddrCheck skin: Maintain bitmaps of memory,      ---*/
+/*--- tracking the accessibility (A) each byte.                    ---*/
+/*--------------------------------------------------------------------*/
+
+#define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
+
+/*------------------------------------------------------------*/
+/*--- Command line options                                 ---*/
+/*------------------------------------------------------------*/
+
+Bool  SK_(clo_partial_loads_ok)       = True;
+Int   SK_(clo_freelist_vol)           = 1000000;
+Bool  SK_(clo_leak_check)             = False;
+VgRes SK_(clo_leak_resolution)        = Vg_LowRes;
+Bool  SK_(clo_show_reachable)         = False;
+Bool  SK_(clo_workaround_gcc296_bugs) = False;
+Bool  SK_(clo_cleanup)                = True;
+
+/*------------------------------------------------------------*/
+/*--- Profiling events                                     ---*/
+/*------------------------------------------------------------*/
+
+typedef 
+   enum { 
+      VgpCheckMem = VgpFini+1,
+      VgpSetMem
+   } 
+   VgpSkinCC;
+
+/*------------------------------------------------------------*/
+/*--- Low-level support for memory checking.               ---*/
+/*------------------------------------------------------------*/
+
+/* All reads and writes are checked against a memory map, which
+   records the state of all memory in the process.  The memory map is
+   organised like this:
+
+   The top 16 bits of an address are used to index into a top-level
+   map table, containing 65536 entries.  Each entry is a pointer to a
+   second-level map, which records the accesibililty and validity
+   permissions for the 65536 bytes indexed by the lower 16 bits of the
+   address.  Each byte is represented by one bit, indicating
+   accessibility.  So each second-level map contains 8192 bytes.  This
+   two-level arrangement conveniently divides the 4G address space
+   into 64k lumps, each size 64k bytes.
+
+   All entries in the primary (top-level) map must point to a valid
+   secondary (second-level) map.  Since most of the 4G of address
+   space will not be in use -- ie, not mapped at all -- there is a
+   distinguished secondary map, which indicates `not addressible and
+   not valid' writeable for all bytes.  Entries in the primary map for
+   which the entire 64k is not in use at all point at this
+   distinguished map.
+
+   [...] lots of stuff deleted due to out of date-ness
+
+   As a final optimisation, the alignment and address checks for
+   4-byte loads and stores are combined in a neat way.  The primary
+   map is extended to have 262144 entries (2^18), rather than 2^16.
+   The top 3/4 of these entries are permanently set to the
+   distinguished secondary map.  For a 4-byte load/store, the
+   top-level map is indexed not with (addr >> 16) but instead f(addr),
+   where
+
+    f( XXXX XXXX XXXX XXXX ____ ____ ____ __YZ )
+        = ____ ____ ____ __YZ XXXX XXXX XXXX XXXX  or 
+        = ____ ____ ____ __ZY XXXX XXXX XXXX XXXX
+
+   ie the lowest two bits are placed above the 16 high address bits.
+   If either of these two bits are nonzero, the address is misaligned;
+   this will select a secondary map from the upper 3/4 of the primary
+   map.  Because this is always the distinguished secondary map, a
+   (bogus) address check failure will result.  The failure handling
+   code can then figure out whether this is a genuine addr check
+   failure or whether it is a possibly-legitimate access at a
+   misaligned address.  */
+
+
+/*------------------------------------------------------------*/
+/*--- Crude profiling machinery.                           ---*/
+/*------------------------------------------------------------*/
+
+#ifdef VG_PROFILE_MEMORY
+
+#define N_PROF_EVENTS 150
+
+static UInt event_ctr[N_PROF_EVENTS];
+
+static void init_prof_mem ( void )
+{
+   Int i;
+   for (i = 0; i < N_PROF_EVENTS; i++)
+      event_ctr[i] = 0;
+}
+
+static void done_prof_mem ( void )
+{
+   Int i;
+   for (i = 0; i < N_PROF_EVENTS; i++) {
+      if ((i % 10) == 0) 
+         VG_(printf)("\n");
+      if (event_ctr[i] > 0)
+         VG_(printf)( "prof mem event %2d: %d\n", i, event_ctr[i] );
+   }
+   VG_(printf)("\n");
+}
+
+#define PROF_EVENT(ev)                                  \
+   do { vg_assert((ev) >= 0 && (ev) < N_PROF_EVENTS);   \
+        event_ctr[ev]++;                                \
+   } while (False);
+
+#else
+
+static void init_prof_mem ( void ) { }
+static void done_prof_mem ( void ) { }
+
+#define PROF_EVENT(ev) /* */
+
+#endif
+
+/* Event index.  If just the name of the fn is given, this means the
+   number of calls to the fn.  Otherwise it is the specified event.
+
+   10   alloc_secondary_map
+
+   20   get_abit
+   21   get_vbyte
+   22   set_abit
+   23   set_vbyte
+   24   get_abits4_ALIGNED
+   25   get_vbytes4_ALIGNED
+
+   30   set_address_range_perms
+   31   set_address_range_perms(lower byte loop)
+   32   set_address_range_perms(quadword loop)
+   33   set_address_range_perms(upper byte loop)
+   
+   35   make_noaccess
+   36   make_writable
+   37   make_readable
+
+   40   copy_address_range_state
+   41   copy_address_range_state(byte loop)
+   42   check_writable
+   43   check_writable(byte loop)
+   44   check_readable
+   45   check_readable(byte loop)
+   46   check_readable_asciiz
+   47   check_readable_asciiz(byte loop)
+
+   50   make_aligned_word_NOACCESS
+   51   make_aligned_word_WRITABLE
+
+   60   helperc_LOADV4
+   61   helperc_STOREV4
+   62   helperc_LOADV2
+   63   helperc_STOREV2
+   64   helperc_LOADV1
+   65   helperc_STOREV1
+
+   70   rim_rd_V4_SLOWLY
+   71   rim_wr_V4_SLOWLY
+   72   rim_rd_V2_SLOWLY
+   73   rim_wr_V2_SLOWLY
+   74   rim_rd_V1_SLOWLY
+   75   rim_wr_V1_SLOWLY
+
+   80   fpu_read
+   81   fpu_read aligned 4
+   82   fpu_read aligned 8
+   83   fpu_read 2
+   84   fpu_read 10
+
+   85   fpu_write
+   86   fpu_write aligned 4
+   87   fpu_write aligned 8
+   88   fpu_write 2
+   89   fpu_write 10
+
+   90   fpu_read_check_SLOWLY
+   91   fpu_read_check_SLOWLY(byte loop)
+   92   fpu_write_check_SLOWLY
+   93   fpu_write_check_SLOWLY(byte loop)
+
+   100  is_plausible_stack_addr
+   101  handle_esp_assignment
+   102  handle_esp_assignment(-4)
+   103  handle_esp_assignment(+4)
+   104  handle_esp_assignment(-12)
+   105  handle_esp_assignment(-8)
+   106  handle_esp_assignment(+16)
+   107  handle_esp_assignment(+12)
+   108  handle_esp_assignment(0)
+   109  handle_esp_assignment(+8)
+   110  handle_esp_assignment(-16)
+   111  handle_esp_assignment(+20)
+   112  handle_esp_assignment(-20)
+   113  handle_esp_assignment(+24)
+   114  handle_esp_assignment(-24)
+
+   120  vg_handle_esp_assignment_SLOWLY
+   121  vg_handle_esp_assignment_SLOWLY(normal; move down)
+   122  vg_handle_esp_assignment_SLOWLY(normal; move up)
+   123  vg_handle_esp_assignment_SLOWLY(normal)
+   124  vg_handle_esp_assignment_SLOWLY(>= HUGE_DELTA)
+*/
+
+/*------------------------------------------------------------*/
+/*--- Function declarations.                               ---*/
+/*------------------------------------------------------------*/
+
+static void vgmext_ACCESS4_SLOWLY ( Addr a );
+static void vgmext_ACCESS2_SLOWLY ( Addr a );
+static void vgmext_ACCESS1_SLOWLY ( Addr a );
+static void fpu_ACCESS_check_SLOWLY ( Addr addr, Int size );
+
+/*------------------------------------------------------------*/
+/*--- Data defns.                                          ---*/
+/*------------------------------------------------------------*/
+
+typedef 
+   struct {
+      UChar abits[8192];
+   }
+   AcSecMap;
+
+static AcSecMap* primary_map[ /*65536*/ 262144 ];
+static AcSecMap  distinguished_secondary_map;
+
+#define IS_DISTINGUISHED_SM(smap) \
+   ((smap) == &distinguished_secondary_map)
+
+#define ENSURE_MAPPABLE(addr,caller)                                   \
+   do {                                                                \
+      if (IS_DISTINGUISHED_SM(primary_map[(addr) >> 16])) {       \
+         primary_map[(addr) >> 16] = alloc_secondary_map(caller); \
+         /* VG_(printf)("new 2map because of %p\n", addr); */          \
+      }                                                                \
+   } while(0)
+
+#define BITARR_SET(aaa_p,iii_p)                         \
+   do {                                                 \
+      UInt   iii = (UInt)iii_p;                         \
+      UChar* aaa = (UChar*)aaa_p;                       \
+      aaa[iii >> 3] |= (1 << (iii & 7));                \
+   } while (0)
+
+#define BITARR_CLEAR(aaa_p,iii_p)                       \
+   do {                                                 \
+      UInt   iii = (UInt)iii_p;                         \
+      UChar* aaa = (UChar*)aaa_p;                       \
+      aaa[iii >> 3] &= ~(1 << (iii & 7));               \
+   } while (0)
+
+#define BITARR_TEST(aaa_p,iii_p)                        \
+      (0 != (((UChar*)aaa_p)[ ((UInt)iii_p) >> 3 ]      \
+               & (1 << (((UInt)iii_p) & 7))))           \
+
+
+#define VGM_BIT_VALID      0
+#define VGM_BIT_INVALID    1
+
+#define VGM_NIBBLE_VALID   0
+#define VGM_NIBBLE_INVALID 0xF
+
+#define VGM_BYTE_VALID     0
+#define VGM_BYTE_INVALID   0xFF
+
+#define VGM_WORD_VALID     0
+#define VGM_WORD_INVALID   0xFFFFFFFF
+
+#define VGM_EFLAGS_VALID   0xFFFFFFFE
+#define VGM_EFLAGS_INVALID 0xFFFFFFFF     /* not used */
+
+
+static void init_shadow_memory ( void )
+{
+   Int i;
+
+   for (i = 0; i < 8192; i++)             /* Invalid address */
+      distinguished_secondary_map.abits[i] = VGM_BYTE_INVALID; 
+
+   /* These entries gradually get overwritten as the used address
+      space expands. */
+   for (i = 0; i < 65536; i++)
+      primary_map[i] = &distinguished_secondary_map;
+
+   /* These ones should never change; it's a bug in Valgrind if they do. */
+   for (i = 65536; i < 262144; i++)
+      primary_map[i] = &distinguished_secondary_map;
+}
+
+void SK_(post_clo_init) ( void )
+{
+}
+
+void SK_(fini) ( void )
+{
+   VG_(print_malloc_stats)();
+
+   if (VG_(clo_verbosity) == 1) {
+      if (!SK_(clo_leak_check))
+         VG_(message)(Vg_UserMsg, 
+             "For a detailed leak analysis,  rerun with: --leak-check=yes");
+
+      VG_(message)(Vg_UserMsg, 
+                   "For counts of detected errors, rerun with: -v");
+   }
+   if (SK_(clo_leak_check)) SK_(detect_memory_leaks)();
+
+   done_prof_mem();
+}
+
+/*------------------------------------------------------------*/
+/*--- Basic bitmap management, reading and writing.        ---*/
+/*------------------------------------------------------------*/
+
+/* Allocate and initialise a secondary map. */
+
+static AcSecMap* alloc_secondary_map ( __attribute__ ((unused)) 
+                                       Char* caller )
+{
+   AcSecMap* map;
+   UInt  i;
+   PROF_EVENT(10);
+
+   /* Mark all bytes as invalid access and invalid value. */
+
+   /* It just happens that a AcSecMap occupies exactly 18 pages --
+      although this isn't important, so the following assert is
+      spurious. */
+   vg_assert(0 == (sizeof(AcSecMap) % VKI_BYTES_PER_PAGE));
+   map = VG_(get_memory_from_mmap)( sizeof(AcSecMap), caller );
+
+   for (i = 0; i < 8192; i++)
+      map->abits[i] = VGM_BYTE_INVALID; /* Invalid address */
+
+   /* VG_(printf)("ALLOC_2MAP(%s)\n", caller ); */
+   return map;
+}
+
+
+/* Basic reading/writing of the bitmaps, for byte-sized accesses. */
+
+static __inline__ UChar get_abit ( Addr a )
+{
+   AcSecMap* sm     = primary_map[a >> 16];
+   UInt    sm_off = a & 0xFFFF;
+   PROF_EVENT(20);
+#  if 0
+      if (IS_DISTINGUISHED_SM(sm))
+         VG_(message)(Vg_DebugMsg, 
+                      "accessed distinguished 2ndary (A)map! 0x%x\n", a);
+#  endif
+   return BITARR_TEST(sm->abits, sm_off) 
+             ? VGM_BIT_INVALID : VGM_BIT_VALID;
+}
+
+static __inline__ void set_abit ( Addr a, UChar abit )
+{
+   AcSecMap* sm;
+   UInt    sm_off;
+   PROF_EVENT(22);
+   ENSURE_MAPPABLE(a, "set_abit");
+   sm     = primary_map[a >> 16];
+   sm_off = a & 0xFFFF;
+   if (abit) 
+      BITARR_SET(sm->abits, sm_off);
+   else
+      BITARR_CLEAR(sm->abits, sm_off);
+}
+
+
+/* Reading/writing of the bitmaps, for aligned word-sized accesses. */
+
+static __inline__ UChar get_abits4_ALIGNED ( Addr a )
+{
+   AcSecMap* sm;
+   UInt    sm_off;
+   UChar   abits8;
+   PROF_EVENT(24);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+#  endif
+   sm     = primary_map[a >> 16];
+   sm_off = a & 0xFFFF;
+   abits8 = sm->abits[sm_off >> 3];
+   abits8 >>= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+   abits8 &= 0x0F;
+   return abits8;
+}
+
+
+
+/*------------------------------------------------------------*/
+/*--- Setting permissions over address ranges.             ---*/
+/*------------------------------------------------------------*/
+
+static void set_address_range_perms ( Addr a, UInt len, 
+                                      UInt example_a_bit )
+{
+   UChar     abyte8;
+   UInt      sm_off;
+   AcSecMap* sm;
+
+   PROF_EVENT(30);
+
+   if (len == 0)
+      return;
+
+   if (len > 100 * 1000 * 1000) {
+      VG_(message)(Vg_UserMsg, 
+                   "Warning: set address range perms: "
+                   "large range %u, a %d",
+                   len, example_a_bit );
+   }
+
+   VGP_PUSHCC(VgpSetMem);
+
+   /* Requests to change permissions of huge address ranges may
+      indicate bugs in our machinery.  30,000,000 is arbitrary, but so
+      far all legitimate requests have fallen beneath that size. */
+   /* 4 Mar 02: this is just stupid; get rid of it. */
+   /* vg_assert(len < 30000000); */
+
+   /* Check the permissions make sense. */
+   vg_assert(example_a_bit == VGM_BIT_VALID 
+             || example_a_bit == VGM_BIT_INVALID);
+
+   /* In order that we can charge through the address space at 8
+      bytes/main-loop iteration, make up some perms. */
+   abyte8 = (example_a_bit << 7)
+            | (example_a_bit << 6)
+            | (example_a_bit << 5)
+            | (example_a_bit << 4)
+            | (example_a_bit << 3)
+            | (example_a_bit << 2)
+            | (example_a_bit << 1)
+            | (example_a_bit << 0);
+
+#  ifdef VG_DEBUG_MEMORY
+   /* Do it ... */
+   while (True) {
+      PROF_EVENT(31);
+      if (len == 0) break;
+      set_abit ( a, example_a_bit );
+      set_vbyte ( a, vbyte );
+      a++;
+      len--;
+   }
+
+#  else
+   /* Slowly do parts preceding 8-byte alignment. */
+   while (True) {
+      PROF_EVENT(31);
+      if (len == 0) break;
+      if ((a % 8) == 0) break;
+      set_abit ( a, example_a_bit );
+      a++;
+      len--;
+   }   
+
+   if (len == 0) {
+      VGP_POPCC(VgpSetMem);
+      return;
+   }
+   vg_assert((a % 8) == 0 && len > 0);
+
+   /* Once aligned, go fast. */
+   while (True) {
+      PROF_EVENT(32);
+      if (len < 8) break;
+      ENSURE_MAPPABLE(a, "set_address_range_perms(fast)");
+      sm = primary_map[a >> 16];
+      sm_off = a & 0xFFFF;
+      sm->abits[sm_off >> 3] = abyte8;
+      a += 8;
+      len -= 8;
+   }
+
+   if (len == 0) {
+      VGP_POPCC(VgpSetMem);
+      return;
+   }
+   vg_assert((a % 8) == 0 && len > 0 && len < 8);
+
+   /* Finish the upper fragment. */
+   while (True) {
+      PROF_EVENT(33);
+      if (len == 0) break;
+      set_abit ( a, example_a_bit );
+      a++;
+      len--;
+   }   
+#  endif
+
+   /* Check that zero page and highest page have not been written to
+      -- this could happen with buggy syscall wrappers.  Today
+      (2001-04-26) had precisely such a problem with __NR_setitimer. */
+   vg_assert(SK_(cheap_sanity_check)());
+   VGP_POPCC(VgpSetMem);
+}
+
+/* Set permissions for address ranges ... */
+
+void SK_(make_noaccess) ( Addr a, UInt len )
+{
+   PROF_EVENT(35);
+   DEBUG("SK_(make_noaccess)(%p, %x)\n", a, len);
+   set_address_range_perms ( a, len, VGM_BIT_INVALID );
+}
+
+void SK_(make_accessible) ( Addr a, UInt len )
+{
+   PROF_EVENT(36);
+   DEBUG("SK_(make_accessible)(%p, %x)\n", a, len);
+   set_address_range_perms ( a, len, VGM_BIT_VALID );
+}
+
+/* Block-copy permissions (needed for implementing realloc()). */
+
+static void copy_address_range_state ( Addr src, Addr dst, UInt len )
+{
+   UInt i;
+
+   DEBUG("copy_address_range_state\n");
+
+   PROF_EVENT(40);
+   for (i = 0; i < len; i++) {
+      UChar abit  = get_abit ( src+i );
+      PROF_EVENT(41);
+      set_abit ( dst+i, abit );
+   }
+}
+
+
+/* Check permissions for address range.  If inadequate permissions
+   exist, *bad_addr is set to the offending address, so the caller can
+   know what it is. */
+
+Bool SK_(check_writable) ( Addr a, UInt len, Addr* bad_addr )
+{
+   UInt  i;
+   UChar abit;
+   PROF_EVENT(42);
+   for (i = 0; i < len; i++) {
+      PROF_EVENT(43);
+      abit = get_abit(a);
+      if (abit == VGM_BIT_INVALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      a++;
+   }
+   return True;
+}
+
+Bool SK_(check_readable) ( Addr a, UInt len, Addr* bad_addr )
+{
+   UInt  i;
+   UChar abit;
+
+   PROF_EVENT(44);
+   DEBUG("SK_(check_readable)\n");
+   for (i = 0; i < len; i++) {
+      abit  = get_abit(a);
+      PROF_EVENT(45);
+      if (abit != VGM_BIT_VALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      a++;
+   }
+   return True;
+}
+
+
+/* Check a zero-terminated ascii string.  Tricky -- don't want to
+   examine the actual bytes, to find the end, until we're sure it is
+   safe to do so. */
+
+Bool SK_(check_readable_asciiz) ( Addr a, Addr* bad_addr )
+{
+   UChar abit;
+   PROF_EVENT(46);
+   DEBUG("SK_(check_readable_asciiz)\n");
+   while (True) {
+      PROF_EVENT(47);
+      abit  = get_abit(a);
+      if (abit != VGM_BIT_VALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      /* Ok, a is safe to read. */
+      if (* ((UChar*)a) == 0) return True;
+      a++;
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Memory event handlers                                ---*/
+/*------------------------------------------------------------*/
+
+/* Setting permissions for aligned words.  This supports fast stack
+   operations. */
+
+static void make_noaccess_aligned ( Addr a, UInt len )
+{
+   AcSecMap* sm;
+   UInt    sm_off;
+   UChar   mask;
+   Addr    a_past_end = a + len;
+
+   VGP_PUSHCC(VgpSetMem);
+
+   PROF_EVENT(50);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+   vg_assert(IS_ALIGNED4_ADDR(len));
+#  endif
+
+   for ( ; a < a_past_end; a += 4) {
+      ENSURE_MAPPABLE(a, "make_noaccess_aligned");
+      sm     = primary_map[a >> 16];
+      sm_off = a & 0xFFFF;
+      mask = 0x0F;
+      mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+      /* mask now contains 1s where we wish to make address bits
+         invalid (1s). */
+      sm->abits[sm_off >> 3] |= mask;
+   }
+   VGP_POPCC(VgpSetMem);
+}
+
+static void make_writable_aligned ( Addr a, UInt len )
+{
+   AcSecMap* sm;
+   UInt    sm_off;
+   UChar   mask;
+   Addr    a_past_end = a + len;
+
+   VGP_PUSHCC(VgpSetMem);
+
+   PROF_EVENT(51);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+   vg_assert(IS_ALIGNED4_ADDR(len));
+#  endif
+
+   for ( ; a < a_past_end; a += 4) {
+      ENSURE_MAPPABLE(a, "make_writable_aligned");
+      sm     = primary_map[a >> 16];
+      sm_off = a & 0xFFFF;
+      mask = 0x0F;
+      mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+      /* mask now contains 1s where we wish to make address bits
+         invalid (0s). */
+      sm->abits[sm_off >> 3] &= ~mask;
+   }
+   VGP_POPCC(VgpSetMem);
+}
+
+
+static
+void check_is_writable ( CorePart part, ThreadState* tst,
+                         Char* s, UInt base, UInt size )
+{
+   Bool ok;
+   Addr bad_addr;
+
+   VGP_PUSHCC(VgpCheckMem);
+
+   /* VG_(message)(Vg_DebugMsg,"check is writable: %x .. %x",
+                               base,base+size-1); */
+   ok = SK_(check_writable) ( base, size, &bad_addr );
+   if (!ok) {
+      switch (part) {
+      case Vg_CoreSysCall:
+         SK_(record_param_error) ( tst, bad_addr, /*isWrite =*/True, s );
+         break;
+
+      case Vg_CorePThread:
+      case Vg_CoreSignal:
+         SK_(record_core_mem_error)( tst, /*isWrite=*/True, s );
+         break;
+
+      default:
+         VG_(panic)("check_is_readable: Unknown or unexpected CorePart");
+      }
+   }
+
+   VGP_POPCC(VgpCheckMem);
+}
+
+static
+void check_is_readable ( CorePart part, ThreadState* tst,
+                         Char* s, UInt base, UInt size )
+{     
+   Bool ok;
+   Addr bad_addr;
+
+   VGP_PUSHCC(VgpCheckMem);
+   
+   /* VG_(message)(Vg_DebugMsg,"check is readable: %x .. %x",
+                               base,base+size-1); */
+   ok = SK_(check_readable) ( base, size, &bad_addr );
+   if (!ok) {
+      switch (part) {
+      case Vg_CoreSysCall:
+         SK_(record_param_error) ( tst, bad_addr, /*isWrite =*/False, s );
+         break;
+      
+      case Vg_CorePThread:
+         SK_(record_core_mem_error)( tst, /*isWrite=*/False, s );
+         break;
+
+      /* If we're being asked to jump to a silly address, record an error 
+         message before potentially crashing the entire system. */
+      case Vg_CoreTranslate:
+         SK_(record_jump_error)( tst, bad_addr );
+         break;
+
+      default:
+         VG_(panic)("check_is_readable: Unknown or unexpected CorePart");
+      }
+   }
+   VGP_POPCC(VgpCheckMem);
+}
+
+static
+void check_is_readable_asciiz ( CorePart part, ThreadState* tst,
+                                Char* s, UInt str )
+{
+   Bool ok = True;
+   Addr bad_addr;
+   /* VG_(message)(Vg_DebugMsg,"check is readable asciiz: 0x%x",str); */
+
+   VGP_PUSHCC(VgpCheckMem);
+
+   vg_assert(part == Vg_CoreSysCall);
+   ok = SK_(check_readable_asciiz) ( (Addr)str, &bad_addr );
+   if (!ok) {
+      SK_(record_param_error) ( tst, bad_addr, /*is_writable =*/False, s );
+   }
+
+   VGP_POPCC(VgpCheckMem);
+}
+
+static
+void addrcheck_new_mem_startup( Addr a, UInt len, Bool rr, Bool ww, Bool xx )
+{
+   // JJJ: this ignores the permissions and just makes it readable, like the
+   // old code did, AFAICT
+   DEBUG("new_mem_startup(%p, %u, rr=%u, ww=%u, xx=%u)\n", a,len,rr,ww,xx);
+   SK_(make_accessible)(a, len);
+}
+
+static
+void addrcheck_new_mem_heap ( Addr a, UInt len, Bool is_inited )
+{
+   SK_(make_accessible)(a, len);
+}
+
+static
+void addrcheck_set_perms (Addr a, UInt len, 
+                         Bool nn, Bool rr, Bool ww, Bool xx)
+{
+   DEBUG("addrcheck_set_perms(%p, %u, nn=%u, rr=%u ww=%u, xx=%u)\n",
+                              a, len, nn, rr, ww, xx);
+   if (rr || ww || xx) {
+      SK_(make_accessible)(a, len);
+   } else {
+      SK_(make_noaccess)(a, len);
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Functions called directly from generated code.       ---*/
+/*------------------------------------------------------------*/
+
+static __inline__ UInt rotateRight16 ( UInt x )
+{
+   /* Amazingly, gcc turns this into a single rotate insn. */
+   return (x >> 16) | (x << 16);
+}
+
+
+static __inline__ UInt shiftRight16 ( UInt x )
+{
+   return x >> 16;
+}
+
+
+/* Read/write 1/2/4 sized V bytes, and emit an address error if
+   needed. */
+
+/* SK_(helperc_ACCESS{1,2,4}) handle the common case fast.
+   Under all other circumstances, it defers to the relevant _SLOWLY
+   function, which can handle all situations.
+*/
+__attribute__ ((regparm(1)))
+void SK_(helperc_ACCESS4) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgmext_ACCESS4_SLOWLY(a);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
+   AcSecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   UChar   abits  = sm->abits[a_off];
+   abits >>= (a & 4);
+   abits &= 15;
+   PROF_EVENT(60);
+   if (abits == VGM_NIBBLE_VALID) {
+      /* Handle common case quickly: a is suitably aligned, is mapped,
+         and is addressible.  So just return. */
+      return;
+   } else {
+      /* Slow but general case. */
+      vgmext_ACCESS4_SLOWLY(a);
+   }
+#  endif
+}
+
+__attribute__ ((regparm(1)))
+void SK_(helperc_ACCESS2) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgmext_ACCESS2_SLOWLY(a);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
+   AcSecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(62);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      return;
+   } else {
+      /* Slow but general case. */
+      vgmext_ACCESS2_SLOWLY(a);
+   }
+#  endif
+}
+
+__attribute__ ((regparm(1)))
+void SK_(helperc_ACCESS1) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgmext_ACCESS1_SLOWLY(a);
+#  else
+   UInt    sec_no = shiftRight16(a);
+   AcSecMap* sm   = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(64);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      return;
+   } else {
+      /* Slow but general case. */
+      vgmext_ACCESS1_SLOWLY(a);
+   }
+#  endif
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Fallback functions to handle cases that the above    ---*/
+/*--- VG_(helperc_ACCESS{1,2,4}) can't manage.             ---*/
+/*------------------------------------------------------------*/
+
+static void vgmext_ACCESS4_SLOWLY ( Addr a )
+{
+   Bool a0ok, a1ok, a2ok, a3ok;
+
+   PROF_EVENT(70);
+
+   /* First establish independently the addressibility of the 4 bytes
+      involved. */
+   a0ok = get_abit(a+0) == VGM_BIT_VALID;
+   a1ok = get_abit(a+1) == VGM_BIT_VALID;
+   a2ok = get_abit(a+2) == VGM_BIT_VALID;
+   a3ok = get_abit(a+3) == VGM_BIT_VALID;
+
+   /* Now distinguish 3 cases */
+
+   /* Case 1: the address is completely valid, so:
+      - no addressing error
+   */
+   if (a0ok && a1ok && a2ok && a3ok) {
+      return;
+   }
+
+   /* Case 2: the address is completely invalid.  
+      - emit addressing error
+   */
+   /* VG_(printf)("%p (%d %d %d %d)\n", a, a0ok, a1ok, a2ok, a3ok); */
+   if (!SK_(clo_partial_loads_ok) 
+       || ((a & 3) != 0)
+       || (!a0ok && !a1ok && !a2ok && !a3ok)) {
+      sk_record_address_error( a, 4, False );
+      return;
+   }
+
+   /* Case 3: the address is partially valid.  
+      - no addressing error
+      Case 3 is only allowed if SK_(clo_partial_loads_ok) is True
+      (which is the default), and the address is 4-aligned.  
+      If not, Case 2 will have applied.
+   */
+   vg_assert(SK_(clo_partial_loads_ok));
+   {
+      return;
+   }
+}
+
+static void vgmext_ACCESS2_SLOWLY ( Addr a )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(72);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
+
+   /* If an address error has happened, report it. */
+   if (aerr) {
+      sk_record_address_error( a, 2, False );
+   }
+}
+
+static void vgmext_ACCESS1_SLOWLY ( Addr a )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(74);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+
+   /* If an address error has happened, report it. */
+   if (aerr) {
+      sk_record_address_error( a, 1, False );
+   }
+}
+
+
+/* ---------------------------------------------------------------------
+   FPU load and store checks, called from generated code.
+   ------------------------------------------------------------------ */
+
+__attribute__ ((regparm(2)))
+void SK_(fpu_ACCESS_check) ( Addr addr, Int size )
+{
+   /* Ensure the read area is both addressible and valid (ie,
+      readable).  If there's an address error, don't report a value
+      error too; but if there isn't an address error, check for a
+      value error. 
+
+      Try to be reasonably fast on the common case; wimp out and defer
+      to fpu_ACCESS_check_SLOWLY for everything else.  */
+
+   AcSecMap* sm;
+   UInt    sm_off, a_off;
+   Addr    addr4;
+
+   PROF_EVENT(80);
+
+#  ifdef VG_DEBUG_MEMORY
+   fpu_ACCESS_check_SLOWLY ( addr, size );
+#  else
+
+   if (size == 4) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
+      PROF_EVENT(81);
+      /* Properly aligned. */
+      sm     = primary_map[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
+      /* Properly aligned and addressible. */
+      return;
+     slow4:
+      fpu_ACCESS_check_SLOWLY ( addr, 4 );
+      return;
+   }
+
+   if (size == 8) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
+      PROF_EVENT(82);
+      /* Properly aligned.  Do it in two halves. */
+      addr4 = addr + 4;
+      /* First half. */
+      sm     = primary_map[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* First half properly aligned and addressible. */
+      /* Second half. */
+      sm     = primary_map[addr4 >> 16];
+      sm_off = addr4 & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* Second half properly aligned and addressible. */
+      /* Both halves properly aligned and addressible. */
+      return;
+     slow8:
+      fpu_ACCESS_check_SLOWLY ( addr, 8 );
+      return;
+   }
+
+   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
+      cases go quickly.  */
+   if (size == 2) {
+      PROF_EVENT(83);
+      fpu_ACCESS_check_SLOWLY ( addr, 2 );
+      return;
+   }
+
+   if (size == 10) {
+      PROF_EVENT(84);
+      fpu_ACCESS_check_SLOWLY ( addr, 10 );
+      return;
+   }
+
+   if (size == 28 || size == 108) {
+      PROF_EVENT(84); /* XXX assign correct event number */
+      fpu_ACCESS_check_SLOWLY ( addr, size );
+      return;
+   }
+
+   VG_(printf)("size is %d\n", size);
+   VG_(panic)("fpu_ACCESS_check: unhandled size");
+#  endif
+}
+
+
+/* ---------------------------------------------------------------------
+   Slow, general cases for FPU access checks.
+   ------------------------------------------------------------------ */
+
+void fpu_ACCESS_check_SLOWLY ( Addr addr, Int size )
+{
+   Int  i;
+   Bool aerr = False;
+   PROF_EVENT(90);
+   for (i = 0; i < size; i++) {
+      PROF_EVENT(91);
+      if (get_abit(addr+i) != VGM_BIT_VALID)
+         aerr = True;
+   }
+
+   if (aerr) {
+      sk_record_address_error( addr, size, False );
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Shadow chunks info                                   ---*/
+/*------------------------------------------------------------*/
+
+static __inline__
+void set_where( ShadowChunk* sc, ExeContext* ec )
+{
+   sc->skin_extra[0] = (UInt)ec;
+}
+
+static __inline__
+ExeContext *get_where( ShadowChunk* sc )
+{
+   return (ExeContext*)sc->skin_extra[0];
+}
+
+void SK_(complete_shadow_chunk) ( ShadowChunk* sc, ThreadState* tst )
+{
+   set_where( sc, VG_(get_ExeContext) ( tst ) );
+}
+
+/*------------------------------------------------------------*/
+/*--- Postponing free()ing                                 ---*/
+/*------------------------------------------------------------*/
+
+/* Holds blocks after freeing. */
+static ShadowChunk* vg_freed_list_start   = NULL;
+static ShadowChunk* vg_freed_list_end     = NULL;
+static Int          vg_freed_list_volume  = 0;
+
+static __attribute__ ((unused))
+       Int count_freelist ( void )
+{
+   ShadowChunk* sc;
+   Int n = 0;
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      n++;
+   return n;
+}
+
+static __attribute__ ((unused))
+       void freelist_sanity ( void )
+{
+   ShadowChunk* sc;
+   Int n = 0;
+   /* VG_(printf)("freelist sanity\n"); */
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      n += sc->size;
+   vg_assert(n == vg_freed_list_volume);
+}
+
+/* Put a shadow chunk on the freed blocks queue, possibly freeing up
+   some of the oldest blocks in the queue at the same time. */
+static void add_to_freed_queue ( ShadowChunk* sc )
+{
+   ShadowChunk* sc1;
+
+   /* Put it at the end of the freed list */
+   if (vg_freed_list_end == NULL) {
+      vg_assert(vg_freed_list_start == NULL);
+      vg_freed_list_end = vg_freed_list_start = sc;
+      vg_freed_list_volume = sc->size;
+   } else {    
+      vg_assert(vg_freed_list_end->next == NULL);
+      vg_freed_list_end->next = sc;
+      vg_freed_list_end = sc;
+      vg_freed_list_volume += sc->size;
+   }
+   sc->next = NULL;
+
+   /* Release enough of the oldest blocks to bring the free queue
+      volume below vg_clo_freelist_vol. */
+   
+   while (vg_freed_list_volume > SK_(clo_freelist_vol)) {
+      /* freelist_sanity(); */
+      vg_assert(vg_freed_list_start != NULL);
+      vg_assert(vg_freed_list_end != NULL);
+
+      sc1 = vg_freed_list_start;
+      vg_freed_list_volume -= sc1->size;
+      /* VG_(printf)("volume now %d\n", vg_freed_list_volume); */
+      vg_assert(vg_freed_list_volume >= 0);
+
+      if (vg_freed_list_start == vg_freed_list_end) {
+         vg_freed_list_start = vg_freed_list_end = NULL;
+      } else {
+         vg_freed_list_start = sc1->next;
+      }
+      sc1->next = NULL; /* just paranoia */
+      VG_(freeShadowChunk) ( sc1 );
+   }
+}
+
+/* Return the first shadow chunk satisfying the predicate p. */
+ShadowChunk* SK_(any_matching_freed_ShadowChunks)
+                        ( Bool (*p) ( ShadowChunk* ))
+{
+   ShadowChunk* sc;
+
+   /* No point looking through freed blocks if we're not keeping
+      them around for a while... */
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      if (p(sc))
+         return sc;
+
+   return NULL;
+}
+
+void SK_(alt_free) ( ShadowChunk* sc, ThreadState* tst )
+{
+   /* Record where freed */
+   set_where( sc, VG_(get_ExeContext) ( tst ) );
+
+   /* Put it out of harm's way for a while. */
+   add_to_freed_queue ( sc );
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Our instrumenter                                     ---*/
+/*------------------------------------------------------------*/
+
+#define uInstr1   VG_(newUInstr1)
+#define uInstr2   VG_(newUInstr2)
+#define uLiteral  VG_(setLiteralField)
+#define newTemp   VG_(getNewTemp)
+
+UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
+{
+/* Use this rather than eg. -1 because it's a UInt. */
+#define INVALID_DATA_SIZE   999999
+
+   UCodeBlock* cb;
+   Int         i;
+   UInstr*     u_in;
+   Int         t_addr, t_size;
+
+   cb = VG_(allocCodeBlock)();
+   cb->nextTemp = cb_in->nextTemp;
+
+   for (i = 0; i < cb_in->used; i++) {
+
+      t_addr = t_size = INVALID_TEMPREG;
+      u_in = &cb_in->instrs[i];
+
+      switch (u_in->opcode) {
+         case NOP:  case CALLM_E:  case CALLM_S:
+            break;
+
+         /* For memory-ref instrs, copy the data_addr into a temporary to be
+          * passed to the cachesim_* helper at the end of the instruction.
+          */
+         case LOAD: 
+            t_addr = u_in->val1; 
+            goto do_LOAD_or_STORE;
+         case STORE: t_addr = u_in->val2;
+            goto do_LOAD_or_STORE;
+           do_LOAD_or_STORE:
+            uInstr1(cb, CCALL, 0, TempReg, t_addr);
+            switch (u_in->size) {
+               case 4: VG_(setCCallFields)(cb, (Addr)&SK_(helperc_ACCESS4), 
+                                               1, 1, False );
+                  break;
+               case 2: VG_(setCCallFields)(cb, (Addr)&SK_(helperc_ACCESS2), 
+                                               1, 1, False );
+                  break;
+               case 1: VG_(setCCallFields)(cb, (Addr)&SK_(helperc_ACCESS1), 
+                                               1, 1, False );
+                  break;
+               default: 
+                  VG_(panic)("addrcheck::SK_(instrument):LOAD/STORE");
+            }
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         case FPU_R:
+         case FPU_W:
+            t_addr = u_in->val2;
+            t_size = newTemp(cb);
+	    uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_size);
+	    uLiteral(cb, u_in->size);
+            uInstr2(cb, CCALL, 0, TempReg, t_addr, TempReg, t_size);
+            VG_(setCCallFields)(cb, (Addr)&SK_(fpu_ACCESS_check), 
+                                               2, 2, False );
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         default:
+            VG_(copyUInstr)(cb, u_in);
+            break;
+      }
+   }
+
+   VG_(freeCodeBlock)(cb_in);
+   return cb;
+}
+
+
+
+/*------------------------------------------------------------*/
+/*--- Low-level address-space scanning, for the leak       ---*/
+/*--- detector.                                            ---*/
+/*------------------------------------------------------------*/
+
+static 
+jmp_buf memscan_jmpbuf;
+
+static
+void vg_scan_all_valid_memory_sighandler ( Int sigNo )
+{
+   __builtin_longjmp(memscan_jmpbuf, 1);
+}
+
+/* Safely (avoiding SIGSEGV / SIGBUS) scan the entire valid address
+   space and pass the addresses and values of all addressible,
+   defined, aligned words to notify_word.  This is the basis for the
+   leak detector.  Returns the number of calls made to notify_word.  */
+UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) )
+{
+   /* All volatile, because some gccs seem paranoid about longjmp(). */
+   volatile UInt res, numPages, page, primaryMapNo, nWordsNotified;
+   volatile Addr pageBase, addr;
+   volatile AcSecMap* sm;
+   volatile UChar abits;
+   volatile UInt page_first_word;
+
+   vki_ksigaction sigbus_saved;
+   vki_ksigaction sigbus_new;
+   vki_ksigaction sigsegv_saved;
+   vki_ksigaction sigsegv_new;
+   vki_ksigset_t  blockmask_saved;
+   vki_ksigset_t  unblockmask_new;
+
+   /* Temporarily install a new sigsegv and sigbus handler, and make
+      sure SIGBUS, SIGSEGV and SIGTERM are unblocked.  (Perhaps the
+      first two can never be blocked anyway?)  */
+
+   sigbus_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
+   sigbus_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
+   sigbus_new.ksa_restorer = NULL;
+   res = VG_(ksigemptyset)( &sigbus_new.ksa_mask );
+   vg_assert(res == 0);
+
+   sigsegv_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
+   sigsegv_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
+   sigsegv_new.ksa_restorer = NULL;
+   res = VG_(ksigemptyset)( &sigsegv_new.ksa_mask );
+   vg_assert(res == 0+0);
+
+   res =  VG_(ksigemptyset)( &unblockmask_new );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGBUS );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGSEGV );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGTERM );
+   vg_assert(res == 0+0+0);
+
+   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_new, &sigbus_saved );
+   vg_assert(res == 0+0+0+0);
+
+   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_new, &sigsegv_saved );
+   vg_assert(res == 0+0+0+0+0);
+
+   res = VG_(ksigprocmask)( VKI_SIG_UNBLOCK, &unblockmask_new, &blockmask_saved );
+   vg_assert(res == 0+0+0+0+0+0);
+
+   /* The signal handlers are installed.  Actually do the memory scan. */
+   numPages = 1 << (32-VKI_BYTES_PER_PAGE_BITS);
+   vg_assert(numPages == 1048576);
+   vg_assert(4096 == (1 << VKI_BYTES_PER_PAGE_BITS));
+
+   nWordsNotified = 0;
+
+   for (page = 0; page < numPages; page++) {
+      pageBase = page << VKI_BYTES_PER_PAGE_BITS;
+      primaryMapNo = pageBase >> 16;
+      sm = primary_map[primaryMapNo];
+      if (IS_DISTINGUISHED_SM(sm)) continue;
+      if (__builtin_setjmp(memscan_jmpbuf) == 0) {
+         /* try this ... */
+         page_first_word = * (volatile UInt*)pageBase;
+         /* we get here if we didn't get a fault */
+         /* Scan the page */
+         for (addr = pageBase; addr < pageBase+VKI_BYTES_PER_PAGE; addr += 4) {
+            abits  = get_abits4_ALIGNED(addr);
+            if (abits == VGM_NIBBLE_VALID) {
+               nWordsNotified++;
+               notify_word ( addr, *(UInt*)addr );
+	    }
+         }
+      } else {
+         /* We get here if reading the first word of the page caused a
+            fault, which in turn caused the signal handler to longjmp.
+            Ignore this page. */
+         if (0)
+         VG_(printf)(
+            "vg_scan_all_valid_memory_sighandler: ignoring page at %p\n",
+            (void*)pageBase 
+         );
+      }
+   }
+
+   /* Restore signal state to whatever it was before. */
+   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_saved, NULL );
+   vg_assert(res == 0 +0);
+
+   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_saved, NULL );
+   vg_assert(res == 0 +0 +0);
+
+   res = VG_(ksigprocmask)( VKI_SIG_SETMASK, &blockmask_saved, NULL );
+   vg_assert(res == 0 +0 +0 +0);
+
+   return nWordsNotified;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
+/*------------------------------------------------------------*/
+
+/* A block is either 
+   -- Proper-ly reached; a pointer to its start has been found
+   -- Interior-ly reached; only an interior pointer to it has been found
+   -- Unreached; so far, no pointers to any part of it have been found. 
+*/
+typedef 
+   enum { Unreached, Interior, Proper } 
+   Reachedness;
+
+/* A block record, used for generating err msgs. */
+typedef
+   struct _LossRecord {
+      struct _LossRecord* next;
+      /* Where these lost blocks were allocated. */
+      ExeContext*  allocated_at;
+      /* Their reachability. */
+      Reachedness  loss_mode;
+      /* Number of blocks and total # bytes involved. */
+      UInt         total_bytes;
+      UInt         num_blocks;
+   }
+   LossRecord;
+
+
+/* Find the i such that ptr points at or inside the block described by
+   shadows[i].  Return -1 if none found.  This assumes that shadows[]
+   has been sorted on the ->data field. */
+
+#ifdef VG_DEBUG_LEAKCHECK
+/* Used to sanity-check the fast binary-search mechanism. */
+static Int find_shadow_for_OLD ( Addr          ptr, 
+                                 ShadowChunk** shadows,
+                                 Int           n_shadows )
+
+{
+   Int  i;
+   Addr a_lo, a_hi;
+   PROF_EVENT(70);
+   for (i = 0; i < n_shadows; i++) {
+      PROF_EVENT(71);
+      a_lo = shadows[i]->data;
+      a_hi = ((Addr)shadows[i]->data) + shadows[i]->size - 1;
+      if (a_lo <= ptr && ptr <= a_hi)
+         return i;
+   }
+   return -1;
+}
+#endif
+
+
+static Int find_shadow_for ( Addr          ptr, 
+                             ShadowChunk** shadows,
+                             Int           n_shadows )
+{
+   Addr a_mid_lo, a_mid_hi;
+   Int lo, mid, hi, retVal;
+   PROF_EVENT(70);
+   /* VG_(printf)("find shadow for %p = ", ptr); */
+   retVal = -1;
+   lo = 0;
+   hi = n_shadows-1;
+   while (True) {
+      PROF_EVENT(71);
+
+      /* invariant: current unsearched space is from lo to hi,
+         inclusive. */
+      if (lo > hi) break; /* not found */
+
+      mid      = (lo + hi) / 2;
+      a_mid_lo = shadows[mid]->data;
+      a_mid_hi = ((Addr)shadows[mid]->data) + shadows[mid]->size - 1;
+
+      if (ptr < a_mid_lo) {
+         hi = mid-1;
+         continue;
+      } 
+      if (ptr > a_mid_hi) {
+         lo = mid+1;
+         continue;
+      }
+      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
+      retVal = mid;
+      break;
+   }
+
+#  ifdef VG_DEBUG_LEAKCHECK
+   vg_assert(retVal == find_shadow_for_OLD ( ptr, shadows, n_shadows ));
+#  endif
+   /* VG_(printf)("%d\n", retVal); */
+   return retVal;
+}
+
+
+
+static void sort_malloc_shadows ( ShadowChunk** shadows, UInt n_shadows )
+{
+   Int   incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+                      9841, 29524, 88573, 265720,
+                      797161, 2391484 };
+   Int          lo = 0;
+   Int          hi = n_shadows-1;
+   Int          i, j, h, bigN, hp;
+   ShadowChunk* v;
+
+   PROF_EVENT(72);
+   bigN = hi - lo + 1; if (bigN < 2) return;
+   hp = 0; while (incs[hp] < bigN) hp++; hp--;
+
+   for (; hp >= 0; hp--) {
+      PROF_EVENT(73);
+      h = incs[hp];
+      i = lo + h;
+      while (1) {
+         PROF_EVENT(74);
+         if (i > hi) break;
+         v = shadows[i];
+         j = i;
+         while (shadows[j-h]->data > v->data) {
+            PROF_EVENT(75);
+            shadows[j] = shadows[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         shadows[j] = v;
+         i++;
+      }
+   }
+}
+
+/* Globals, for the callback used by SK_(detect_memory_leaks). */
+
+static ShadowChunk** vglc_shadows;
+static Int           vglc_n_shadows;
+static Reachedness*  vglc_reachedness;
+static Addr          vglc_min_mallocd_addr;
+static Addr          vglc_max_mallocd_addr;
+
+static 
+void vg_detect_memory_leaks_notify_addr ( Addr a, UInt word_at_a )
+{
+   Int  sh_no;
+   Addr ptr;
+
+   /* Rule out some known causes of bogus pointers.  Mostly these do
+      not cause much trouble because only a few false pointers can
+      ever lurk in these places.  This mainly stops it reporting that
+      blocks are still reachable in stupid test programs like this
+
+         int main (void) { char* a = malloc(100); return 0; }
+
+      which people seem inordinately fond of writing, for some reason.  
+
+      Note that this is a complete kludge.  It would be better to
+      ignore any addresses corresponding to valgrind.so's .bss and
+      .data segments, but I cannot think of a reliable way to identify
+      where the .bss segment has been put.  If you can, drop me a
+      line.  
+   */
+   if (VG_(within_stack)(a))                return;
+   if (VG_(within_m_state_static)(a))       return;
+   if (a == (Addr)(&vglc_min_mallocd_addr)) return;
+   if (a == (Addr)(&vglc_max_mallocd_addr)) return;
+
+   /* OK, let's get on and do something Useful for a change. */
+
+   ptr = (Addr)word_at_a;
+   if (ptr >= vglc_min_mallocd_addr && ptr <= vglc_max_mallocd_addr) {
+      /* Might be legitimate; we'll have to investigate further. */
+      sh_no = find_shadow_for ( ptr, vglc_shadows, vglc_n_shadows );
+      if (sh_no != -1) {
+         /* Found a block at/into which ptr points. */
+         vg_assert(sh_no >= 0 && sh_no < vglc_n_shadows);
+         vg_assert(ptr < vglc_shadows[sh_no]->data 
+                         + vglc_shadows[sh_no]->size);
+         /* Decide whether Proper-ly or Interior-ly reached. */
+         if (ptr == vglc_shadows[sh_no]->data) {
+            if (0) VG_(printf)("pointer at %p to %p\n", a, word_at_a );
+            vglc_reachedness[sh_no] = Proper;
+         } else {
+            if (vglc_reachedness[sh_no] == Unreached)
+               vglc_reachedness[sh_no] = Interior;
+         }
+      }
+   }
+}
+
+
+void SK_(detect_memory_leaks) ( void )
+{
+   Int    i;
+   Int    blocks_leaked, bytes_leaked;
+   Int    blocks_dubious, bytes_dubious;
+   Int    blocks_reachable, bytes_reachable;
+   Int    n_lossrecords;
+   UInt   bytes_notified;
+   
+   LossRecord*  errlist;
+   LossRecord*  p;
+
+   PROF_EVENT(76);
+
+   /* VG_(get_malloc_shadows) allocates storage for shadows */
+   vglc_shadows = VG_(get_malloc_shadows)( &vglc_n_shadows );
+   if (vglc_n_shadows == 0) {
+      vg_assert(vglc_shadows == NULL);
+      VG_(message)(Vg_UserMsg, 
+                   "No malloc'd blocks -- no leaks are possible.\n");
+      return;
+   }
+
+   VG_(message)(Vg_UserMsg, 
+                "searching for pointers to %d not-freed blocks.", 
+                vglc_n_shadows );
+   sort_malloc_shadows ( vglc_shadows, vglc_n_shadows );
+
+   /* Sanity check; assert that the blocks are now in order and that
+      they don't overlap. */
+   for (i = 0; i < vglc_n_shadows-1; i++) {
+      vg_assert( ((Addr)vglc_shadows[i]->data)
+                 < ((Addr)vglc_shadows[i+1]->data) );
+      vg_assert( ((Addr)vglc_shadows[i]->data) + vglc_shadows[i]->size
+                 < ((Addr)vglc_shadows[i+1]->data) );
+   }
+
+   vglc_min_mallocd_addr = ((Addr)vglc_shadows[0]->data);
+   vglc_max_mallocd_addr = ((Addr)vglc_shadows[vglc_n_shadows-1]->data)
+                         + vglc_shadows[vglc_n_shadows-1]->size - 1;
+
+   vglc_reachedness 
+      = VG_(malloc)( vglc_n_shadows * sizeof(Reachedness) );
+   for (i = 0; i < vglc_n_shadows; i++)
+      vglc_reachedness[i] = Unreached;
+
+   /* Do the scan of memory. */
+   bytes_notified
+       = VG_(scan_all_valid_memory)( &vg_detect_memory_leaks_notify_addr )
+         * VKI_BYTES_PER_WORD;
+
+   VG_(message)(Vg_UserMsg, "checked %d bytes.", bytes_notified);
+
+   blocks_leaked    = bytes_leaked    = 0;
+   blocks_dubious   = bytes_dubious   = 0;
+   blocks_reachable = bytes_reachable = 0;
+
+   for (i = 0; i < vglc_n_shadows; i++) {
+      if (vglc_reachedness[i] == Unreached) {
+         blocks_leaked++;
+         bytes_leaked += vglc_shadows[i]->size;
+      }
+      else if (vglc_reachedness[i] == Interior) {
+         blocks_dubious++;
+         bytes_dubious += vglc_shadows[i]->size;
+      }
+      else if (vglc_reachedness[i] == Proper) {
+         blocks_reachable++;
+         bytes_reachable += vglc_shadows[i]->size;
+      }
+   }
+
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, "definitely lost: %d bytes in %d blocks.", 
+                            bytes_leaked, blocks_leaked );
+   VG_(message)(Vg_UserMsg, "possibly lost:   %d bytes in %d blocks.", 
+                            bytes_dubious, blocks_dubious );
+   VG_(message)(Vg_UserMsg, "still reachable: %d bytes in %d blocks.", 
+                            bytes_reachable, blocks_reachable );
+
+
+   /* Common up the lost blocks so we can print sensible error
+      messages. */
+
+   n_lossrecords = 0;
+   errlist       = NULL;
+   for (i = 0; i < vglc_n_shadows; i++) {
+     
+      /* 'where' stored in 'skin_extra' field */
+      ExeContext* where = get_where ( vglc_shadows[i] );
+
+      for (p = errlist; p != NULL; p = p->next) {
+         if (p->loss_mode == vglc_reachedness[i]
+             && VG_(eq_ExeContext) ( SK_(clo_leak_resolution),
+                                     p->allocated_at, 
+                                     where) ) {
+            break;
+	 }
+      }
+      if (p != NULL) {
+         p->num_blocks  ++;
+         p->total_bytes += vglc_shadows[i]->size;
+      } else {
+         n_lossrecords ++;
+         p = VG_(malloc)(sizeof(LossRecord));
+         p->loss_mode    = vglc_reachedness[i];
+         p->allocated_at = where;
+         p->total_bytes  = vglc_shadows[i]->size;
+         p->num_blocks   = 1;
+         p->next         = errlist;
+         errlist         = p;
+      }
+   }
+   
+   for (i = 0; i < n_lossrecords; i++) {
+      LossRecord* p_min = NULL;
+      UInt        n_min = 0xFFFFFFFF;
+      for (p = errlist; p != NULL; p = p->next) {
+         if (p->num_blocks > 0 && p->total_bytes < n_min) {
+            n_min = p->total_bytes;
+            p_min = p;
+         }
+      }
+      vg_assert(p_min != NULL);
+
+      if ( (!SK_(clo_show_reachable)) && p_min->loss_mode == Proper) {
+         p_min->num_blocks = 0;
+         continue;
+      }
+
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(
+         Vg_UserMsg,
+         "%d bytes in %d blocks are %s in loss record %d of %d",
+         p_min->total_bytes, p_min->num_blocks,
+         p_min->loss_mode==Unreached ? "definitely lost" :
+            (p_min->loss_mode==Interior ? "possibly lost"
+                                        : "still reachable"),
+         i+1, n_lossrecords
+      );
+      VG_(pp_ExeContext)(p_min->allocated_at);
+      p_min->num_blocks = 0;
+   }
+
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, "LEAK SUMMARY:");
+   VG_(message)(Vg_UserMsg, "   definitely lost: %d bytes in %d blocks.", 
+                            bytes_leaked, blocks_leaked );
+   VG_(message)(Vg_UserMsg, "   possibly lost:   %d bytes in %d blocks.", 
+                            bytes_dubious, blocks_dubious );
+   VG_(message)(Vg_UserMsg, "   still reachable: %d bytes in %d blocks.", 
+                            bytes_reachable, blocks_reachable );
+   if (!SK_(clo_show_reachable)) {
+      VG_(message)(Vg_UserMsg, 
+         "Reachable blocks (those to which a pointer was found) are not shown.");
+      VG_(message)(Vg_UserMsg, 
+         "To see them, rerun with: --show-reachable=yes");
+   }
+   VG_(message)(Vg_UserMsg, "");
+
+   VG_(free) ( vglc_shadows );
+   VG_(free) ( vglc_reachedness );
+}
+
+
+/* ---------------------------------------------------------------------
+   Sanity check machinery (permanently engaged).
+   ------------------------------------------------------------------ */
+
+/* Check that nobody has spuriously claimed that the first or last 16
+   pages (64 KB) of address space have become accessible.  Failure of
+   the following do not per se indicate an internal consistency
+   problem, but they are so likely to that we really want to know
+   about it if so. */
+
+Bool SK_(cheap_sanity_check) ( void )
+{
+   if (IS_DISTINGUISHED_SM(primary_map[0]) && 
+       IS_DISTINGUISHED_SM(primary_map[65535]))
+      return True;
+   else
+      return False;
+}
+
+Bool SK_(expensive_sanity_check) ( void )
+{
+   Int i;
+
+   /* Make sure nobody changed the distinguished secondary. */
+   for (i = 0; i < 8192; i++)
+      if (distinguished_secondary_map.abits[i] != VGM_BYTE_INVALID)
+         return False;
+
+   /* Make sure that the upper 3/4 of the primary map hasn't
+      been messed with. */
+   for (i = 65536; i < 262144; i++)
+      if (primary_map[i] != & distinguished_secondary_map)
+         return False;
+
+   return True;
+}
+      
+/* ---------------------------------------------------------------------
+   Debugging machinery (turn on to debug).  Something of a mess.
+   ------------------------------------------------------------------ */
+
+#if 0
+/* Print the value tags on the 8 integer registers & flag reg. */
+
+static void uint_to_bits ( UInt x, Char* str )
+{
+   Int i;
+   Int w = 0;
+   /* str must point to a space of at least 36 bytes. */
+   for (i = 31; i >= 0; i--) {
+      str[w++] = (x & ( ((UInt)1) << i)) ? '1' : '0';
+      if (i == 24 || i == 16 || i == 8)
+         str[w++] = ' ';
+   }
+   str[w++] = 0;
+   vg_assert(w == 36);
+}
+
+/* Caution!  Not vthread-safe; looks in VG_(baseBlock), not the thread
+   state table. */
+
+static void vg_show_reg_tags ( void )
+{
+   Char buf1[36];
+   Char buf2[36];
+   UInt z_eax, z_ebx, z_ecx, z_edx, 
+        z_esi, z_edi, z_ebp, z_esp, z_eflags;
+
+   z_eax    = VG_(baseBlock)[VGOFF_(sh_eax)];
+   z_ebx    = VG_(baseBlock)[VGOFF_(sh_ebx)];
+   z_ecx    = VG_(baseBlock)[VGOFF_(sh_ecx)];
+   z_edx    = VG_(baseBlock)[VGOFF_(sh_edx)];
+   z_esi    = VG_(baseBlock)[VGOFF_(sh_esi)];
+   z_edi    = VG_(baseBlock)[VGOFF_(sh_edi)];
+   z_ebp    = VG_(baseBlock)[VGOFF_(sh_ebp)];
+   z_esp    = VG_(baseBlock)[VGOFF_(sh_esp)];
+   z_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
+   
+   uint_to_bits(z_eflags, buf1);
+   VG_(message)(Vg_DebugMsg, "efl %\n", buf1);
+
+   uint_to_bits(z_eax, buf1);
+   uint_to_bits(z_ebx, buf2);
+   VG_(message)(Vg_DebugMsg, "eax %s   ebx %s\n", buf1, buf2);
+
+   uint_to_bits(z_ecx, buf1);
+   uint_to_bits(z_edx, buf2);
+   VG_(message)(Vg_DebugMsg, "ecx %s   edx %s\n", buf1, buf2);
+
+   uint_to_bits(z_esi, buf1);
+   uint_to_bits(z_edi, buf2);
+   VG_(message)(Vg_DebugMsg, "esi %s   edi %s\n", buf1, buf2);
+
+   uint_to_bits(z_ebp, buf1);
+   uint_to_bits(z_esp, buf2);
+   VG_(message)(Vg_DebugMsg, "ebp %s   esp %s\n", buf1, buf2);
+}
+
+
+/* For debugging only.  Scan the address space and touch all allegedly
+   addressible words.  Useful for establishing where Valgrind's idea of
+   addressibility has diverged from what the kernel believes. */
+
+static 
+void zzzmemscan_notify_word ( Addr a, UInt w )
+{
+}
+
+void zzzmemscan ( void )
+{
+   Int n_notifies
+      = VG_(scan_all_valid_memory)( zzzmemscan_notify_word );
+   VG_(printf)("zzzmemscan: n_bytes = %d\n", 4 * n_notifies );
+}
+#endif
+
+
+
+
+#if 0
+static Int zzz = 0;
+
+void show_bb ( Addr eip_next )
+{
+   VG_(printf)("[%4d] ", zzz);
+   vg_show_reg_tags( &VG_(m_shadow );
+   VG_(translate) ( eip_next, NULL, NULL, NULL );
+}
+#endif /* 0 */
+
+/*------------------------------------------------------------*/
+/*--- Syscall wrappers                                     ---*/
+/*------------------------------------------------------------*/
+
+void* SK_(pre_syscall)  ( ThreadId tid, UInt syscallno, Bool isBlocking )
+{
+   Int sane = SK_(cheap_sanity_check)();
+   return (void*)sane;
+}
+
+void  SK_(post_syscall) ( ThreadId tid, UInt syscallno,
+                           void* pre_result, Int res, Bool isBlocking )
+{
+   Int  sane_before_call = (Int)pre_result;
+   Bool sane_after_call  = SK_(cheap_sanity_check)();
+
+   if ((Int)sane_before_call && (!sane_after_call)) {
+      VG_(message)(Vg_DebugMsg, "post-syscall: ");
+      VG_(message)(Vg_DebugMsg,
+                   "probable sanity check failure for syscall number %d\n",
+                   syscallno );
+      VG_(panic)("aborting due to the above ... bye!");
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Setup                                                ---*/
+/*------------------------------------------------------------*/
+
+void SK_(written_shadow_regs_values)( UInt* gen_reg_value, UInt* eflags_value )
+{
+   *gen_reg_value = VGM_WORD_VALID;
+   *eflags_value  = VGM_EFLAGS_VALID;
+}
+
+Bool SK_(process_cmd_line_option)(Char* arg)
+{
+#  define STREQ(s1,s2)     (0==VG_(strcmp_ws)((s1),(s2)))
+#  define STREQN(nn,s1,s2) (0==VG_(strncmp_ws)((s1),(s2),(nn)))
+
+   if      (STREQ(arg, "--partial-loads-ok=yes"))
+      SK_(clo_partial_loads_ok) = True;
+   else if (STREQ(arg, "--partial-loads-ok=no"))
+      SK_(clo_partial_loads_ok) = False;
+
+   else if (STREQN(15, arg, "--freelist-vol=")) {
+      SK_(clo_freelist_vol) = (Int)VG_(atoll)(&arg[15]);
+      if (SK_(clo_freelist_vol) < 0) SK_(clo_freelist_vol) = 0;
+   }
+
+   else if (STREQ(arg, "--leak-check=yes"))
+      SK_(clo_leak_check) = True;
+   else if (STREQ(arg, "--leak-check=no"))
+      SK_(clo_leak_check) = False;
+
+   else if (STREQ(arg, "--leak-resolution=low"))
+      SK_(clo_leak_resolution) = Vg_LowRes;
+   else if (STREQ(arg, "--leak-resolution=med"))
+      SK_(clo_leak_resolution) = Vg_MedRes;
+   else if (STREQ(arg, "--leak-resolution=high"))
+      SK_(clo_leak_resolution) = Vg_HighRes;
+   
+   else if (STREQ(arg, "--show-reachable=yes"))
+      SK_(clo_show_reachable) = True;
+   else if (STREQ(arg, "--show-reachable=no"))
+      SK_(clo_show_reachable) = False;
+
+   else if (STREQ(arg, "--workaround-gcc296-bugs=yes"))
+      SK_(clo_workaround_gcc296_bugs) = True;
+   else if (STREQ(arg, "--workaround-gcc296-bugs=no"))
+      SK_(clo_workaround_gcc296_bugs) = False;
+
+   else if (STREQ(arg, "--cleanup=yes"))
+      SK_(clo_cleanup) = True;
+   else if (STREQ(arg, "--cleanup=no"))
+      SK_(clo_cleanup) = False;
+
+   else
+      return False;
+
+   return True;
+
+#undef STREQ
+#undef STREQN
+}
+
+Char* SK_(usage)(void)
+{  
+   return  
+"    --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
+"    --freelist-vol=<number>   volume of freed blocks queue [1000000]\n"
+"    --leak-check=no|yes       search for memory leaks at exit? [no]\n"
+"    --leak-resolution=low|med|high\n"
+"                              amount of bt merging in leak check [low]\n"
+"    --show-reachable=no|yes   show reachable blocks in leak check? [no]\n"
+"    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
+"    --check-addrVs=no|yes     experimental lighterweight checking? [yes]\n"
+"                              yes == Valgrind's original behaviour\n"
+"\n"
+"    --cleanup=no|yes          improve after instrumentation? [yes]\n";
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Setup                                                ---*/
+/*------------------------------------------------------------*/
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* track)
+{
+   needs->name                    = "addrcheck";
+   needs->description             = "a fine-grained address checker";
+
+   needs->core_errors             = True;
+   needs->skin_errors             = True;
+   needs->run_libc_freeres        = True;
+
+   needs->sizeof_shadow_block     = 1;
+
+   needs->basic_block_discards    = False;
+   needs->shadow_regs             = False;
+   needs->command_line_options    = True;
+   needs->client_requests         = True;
+   needs->extended_UCode          = False;
+   needs->syscall_wrapper         = True;
+   needs->alternative_free        = True;
+   needs->sanity_checks           = True;
+
+   VG_(register_compact_helper)((Addr) & SK_(helperc_ACCESS4));
+   VG_(register_compact_helper)((Addr) & SK_(helperc_ACCESS2));
+   VG_(register_compact_helper)((Addr) & SK_(helperc_ACCESS1));
+   VG_(register_compact_helper)((Addr) & SK_(fpu_ACCESS_check));
+
+   /* Events to track */
+   track->new_mem_startup       = & addrcheck_new_mem_startup;
+   track->new_mem_heap          = & addrcheck_new_mem_heap;
+   track->new_mem_stack         = & SK_(make_accessible);
+   track->new_mem_stack_aligned = & make_writable_aligned;
+   track->new_mem_stack_signal  = & SK_(make_accessible);
+   track->new_mem_brk           = & SK_(make_accessible);
+   track->new_mem_mmap          = & addrcheck_set_perms;
+   
+   track->copy_mem_heap         = & copy_address_range_state;
+   track->copy_mem_remap        = & copy_address_range_state;
+   track->change_mem_mprotect   = & addrcheck_set_perms;
+      
+   track->ban_mem_heap          = & SK_(make_noaccess);
+   track->ban_mem_stack         = & SK_(make_noaccess);
+
+   track->die_mem_heap          = & SK_(make_noaccess);
+   track->die_mem_stack         = & SK_(make_noaccess);
+   track->die_mem_stack_aligned = & make_noaccess_aligned; 
+   track->die_mem_stack_signal  = & SK_(make_noaccess); 
+   track->die_mem_brk           = & SK_(make_noaccess);
+   track->die_mem_munmap        = & SK_(make_noaccess); 
+
+   track->bad_free              = & SK_(record_free_error);
+   track->mismatched_free       = & SK_(record_freemismatch_error);
+
+   track->pre_mem_read          = & check_is_readable;
+   track->pre_mem_read_asciiz   = & check_is_readable_asciiz;
+   track->pre_mem_write         = & check_is_writable;
+   track->post_mem_write        = & SK_(make_accessible);
+
+   init_shadow_memory();
+
+   init_prof_mem();
+
+   VGP_(register_profile_event) ( VgpSetMem,   "set-mem-perms" );
+   VGP_(register_profile_event) ( VgpCheckMem, "check-mem-perms" );
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                           vg_addrcheck.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_addrcheck_include.h b/vg_addrcheck_include.h
new file mode 100644
index 0000000..ef6b147
--- /dev/null
+++ b/vg_addrcheck_include.h
@@ -0,0 +1,120 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A header file for the AddrCheck skin.                        ---*/
+/*---                                       vg_addrcheck_include.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VG_ADDRCHECK_INCLUDE_H
+#define __VG_ADDRCHECK_INCLUDE_H
+
+#include "vg_skin.h"
+
+
+/* The classification of a faulting address. */
+typedef 
+   enum { Undescribed, /* as-yet unclassified */
+          Stack, 
+          Unknown, /* classification yielded nothing useful */
+          Freed, Mallocd
+   }
+   AcAddrKind;
+
+/* Records info about a faulting address. */
+typedef
+   struct {
+      /* ALL */
+      AcAddrKind akind;
+      /* Freed, Mallocd */
+      Int blksize;
+      /* Freed, Mallocd */
+      Int rwoffset;
+      /* Freed, Mallocd */
+      ExeContext* lastchange;
+      /* Stack */
+      ThreadId stack_tid;
+      /* True if is just-below %esp -- could be a gcc bug. */
+      Bool maybe_gcc;
+   }
+   AcAddrInfo;
+
+
+/*------------------------------------------------------------*/
+/*--- Skin-specific command line options + defaults        ---*/
+/*------------------------------------------------------------*/
+
+/* Allow loads from partially-valid addresses?  default: YES */
+extern Bool SK_(clo_partial_loads_ok);
+
+/* Max volume of the freed blocks queue. */
+extern Int SK_(clo_freelist_vol);
+
+/* Do leak check at exit?  default: NO */
+extern Bool SK_(clo_leak_check);
+
+/* How closely should we compare ExeContexts in leak records? default: 2 */
+extern VgRes SK_(clo_leak_resolution);
+
+/* In leak check, show reachable-but-not-freed blocks?  default: NO */
+extern Bool SK_(clo_show_reachable);
+
+/* Assume accesses immediately below %esp are due to gcc-2.96 bugs.
+ * default: NO*/
+extern Bool SK_(clo_workaround_gcc296_bugs);
+
+
+/*------------------------------------------------------------*/
+/*--- Functions                                            ---*/
+/*------------------------------------------------------------*/
+
+// SSS: work out a consistent prefix convention here
+
+/* Functions defined in vg_addrcheck.c */
+extern void SK_(helperc_ACCESS4) ( Addr );
+extern void SK_(helperc_ACCESS2) ( Addr );
+extern void SK_(helperc_ACCESS1) ( Addr );
+   
+extern void SK_(fpu_ACCESS_check) ( Addr addr, Int size );
+
+extern ShadowChunk* SK_(any_matching_freed_ShadowChunks) 
+                        ( Bool (*p) ( ShadowChunk* ) );
+
+/* For client requests */
+extern void SK_(make_noaccess) ( Addr a, UInt len );
+extern void SK_(make_accessible) ( Addr a, UInt len );
+
+extern Bool SK_(check_accessible) ( Addr a, UInt len, Addr* bad_addr );
+
+extern void SK_(detect_memory_leaks) ( void );
+
+
+#endif
+
+/*--------------------------------------------------------------------*/
+/*--- end                                   vg_addrcheck_include.h ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/vg_annotate.in b/vg_annotate.in
index 1182190..4fd28eb 100644
--- a/vg_annotate.in
+++ b/vg_annotate.in
@@ -26,7 +26,7 @@
 #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 #  02111-1307, USA.
 #
-#  The GNU General Public License is contained in the file LICENSE.
+#  The GNU General Public License is contained in the file COPYING.
 
 #----------------------------------------------------------------------------
 # Annotator for cachegrind. 
@@ -134,14 +134,14 @@
 my @include_dirs = ("");
 
 # Input file name
-my $input_file = "cachegrind.out";
+my $input_file = undef;
 
 # Version number
 my $version = "@VERSION@";
 
 # Usage message.
 my $usage = <<END
-usage: vg_annotate [options] [source-files]
+usage: vg_annotate [options] --<pid> [source-files]
 
   options for the user, with defaults in [ ], are:
     -h --help             show this message
@@ -223,12 +223,20 @@
                 $inc =~ s|/$||;         # trim trailing '/'
                 push(@include_dirs, "$inc/");
 
+            } elsif ($arg =~ /^--(\d+)$/) {
+                my $pid = $1;
+                if (not defined $input_file) {
+                    $input_file = "cachegrind.out.$pid";
+                } else {
+                    die("One cachegrind.out.<pid> file at a time, please\n");
+                }
+
             } else {            # -h and --help fall under this case
                 die($usage);
             }
 
         # Argument handling -- annotation file checking and selection.
-        # Stick filenames into a hash for quick 'n easy lookup throughout
+        # Stick filenames into a hash for quick 'n easy lookup throughout.
         } else {
             my $readable = 0;
             foreach my $include_dir (@include_dirs) {
@@ -238,7 +246,12 @@
             }
             $readable or die("File $arg not found in any of: @include_dirs\n");
             $user_ann_files{$arg} = 1;
-        } 
+        }
+    }
+
+    # Must have chosen an input file
+    if (not defined $input_file) {
+        die($usage);
     }
 }
 
diff --git a/vg_cachesim.c b/vg_cachesim.c
index 05f4186..b21815e 100644
--- a/vg_cachesim.c
+++ b/vg_cachesim.c
@@ -1,7 +1,7 @@
 
 /*--------------------------------------------------------------------*/
-/*--- The cache simulation framework: instrumentation, recording   ---*/
-/*--- and results printing.                                        ---*/
+/*--- The cache simulation skin: cache detection; instrumentation, ---*/
+/*--- recording and results printing.                              ---*/
 /*---                                                vg_cachesim.c ---*/
 /*--------------------------------------------------------------------*/
 
@@ -27,19 +27,32 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
-#include "vg_include.h"
+#include "vg_skin.h"
+//#include "vg_profile.c"
+
+/* For cache simulation */
+typedef struct {
+    int size;       /* bytes */ 
+    int assoc;
+    int line_size;  /* bytes */ 
+} cache_t;
 
 #include "vg_cachesim_L2.c"
 #include "vg_cachesim_I1.c"
 #include "vg_cachesim_D1.c"
 
+/*------------------------------------------------------------*/
+/*--- Constants                                            ---*/
+/*------------------------------------------------------------*/
 
 /* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
 #define MAX_x86_INSTR_SIZE              16
 
+#define MIN_LINE_SIZE   16
+
 /* Size of various buffers used for storing strings */
 #define FILENAME_LEN                    256
 #define FN_NAME_LEN                     256
@@ -48,33 +61,29 @@
 #define RESULTS_BUF_LEN                 128
 #define LINE_BUF_LEN                     64
 
-
 /*------------------------------------------------------------*/
-/*--- Generic utility stuff                                ---*/
+/*--- Profiling events                                     ---*/
 /*------------------------------------------------------------*/
 
-Int VG_(log2) ( Int x ) 
-{
-   Int i;
-   /* Any more than 32 and we overflow anyway... */
-   for (i = 0; i < 32; i++) {
-      if (1 << i == x) return i;
-   }
-   return -1;
-}
-
+typedef 
+   enum { 
+      VgpGetBBCC = VgpFini+1,
+      VgpCacheSimulate,
+      VgpCacheResults
+   } 
+   VgpSkinCC;
 
 /*------------------------------------------------------------*/
 /*--- Output file related stuff                            ---*/
 /*------------------------------------------------------------*/
 
-#define OUT_FILE        "cachegrind.out"
+Char cachegrind_out_file[FILENAME_LEN];
 
 static void file_err()
 {
    VG_(message)(Vg_UserMsg,
                 "error: can't open cache simulation output file `%s'",
-                OUT_FILE );
+                cachegrind_out_file );
    VG_(exit)(1);
 }
 
@@ -95,7 +104,15 @@
     cc->m2 = 0;
 }
 
-typedef enum { INSTR_CC, READ_CC, WRITE_CC, MOD_CC } CC_type;
+typedef 
+   enum {
+      InstrCC,         /* eg. mov %eax,   %ebx                      */
+      ReadCC,          /* eg. mov (%ecx), %esi                      */
+      WriteCC,         /* eg. mov %eax,   (%edx)                    */
+      ModCC,           /* eg. incl (%eax) (read+write one addr)     */
+      ReadWriteCC,     /* eg. call*l (%esi), pushl 0x4(%ebx), movsw 
+                               (read+write two different addrs)      */
+   } CC_type;
 
 /* Instruction-level cost-centres.  The typedefs for these structs are in
  * vg_include.c 
@@ -104,33 +121,53 @@
  *
  * This is because we use it to work out what kind of CC we're dealing with.
  */ 
-struct _iCC {
-   /* word 1 */
-   UChar tag;
-   UChar instr_size;
-   /* 2 bytes padding */
+typedef
+   struct {
+      /* word 1 */
+      UChar tag;
+      UChar instr_size;
+      /* 2 bytes padding */
 
-   /* words 2+ */
-   Addr instr_addr;
-   CC I;
-};
+      /* words 2+ */
+      Addr instr_addr;
+      CC I;
+   }
+   iCC;
 
-struct _idCC {
-   /* word 1 */
-   UChar tag;
-   UChar instr_size;
-   UChar data_size;
-   /* 1 byte padding */
+typedef
+   struct _idCC {
+      /* word 1 */
+      UChar tag;
+      UChar instr_size;
+      UChar data_size;
+      /* 1 byte padding */
 
-   /* words 2+ */
-   Addr instr_addr;
-   CC I;
-   CC D;
-};
+      /* words 2+ */
+      Addr instr_addr;
+      CC I;
+      CC D;
+   }
+   idCC;
+
+typedef
+   struct _iddCC {
+      /* word 1 */
+      UChar tag;
+      UChar instr_size;
+      UChar data_size;
+      /* 1 byte padding */
+
+      /* words 2+ */
+      Addr instr_addr;
+      CC I;
+      CC Da;
+      CC Db;
+   }
+   iddCC;
 
 static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
 {
-   cc->tag        = INSTR_CC;
+   cc->tag        = InstrCC;
    cc->instr_size = instr_size;
    cc->instr_addr = instr_addr;
    initCC(&cc->I);
@@ -147,6 +184,18 @@
    initCC(&cc->D);
 }
 
+static void init_iddCC(iddCC* cc, Addr instr_addr,
+                       UInt instr_size, UInt data_size)
+{
+   cc->tag        = ReadWriteCC;
+   cc->instr_size = instr_size;
+   cc->data_size  = data_size;
+   cc->instr_addr = instr_addr;
+   initCC(&cc->I);
+   initCC(&cc->Da);
+   initCC(&cc->Db);
+}
+
 #define ADD_CC_TO(CC_type, cc, total)           \
    total.a  += ((CC_type*)BBCC_ptr)->cc.a;      \
    total.m1 += ((CC_type*)BBCC_ptr)->cc.m1;     \
@@ -193,6 +242,22 @@
 #endif
 }
 
+static __inline__ void sprint_read_write_CC(Char buf[BUF_LEN], iddCC* cc)
+{
+#if PRINT_INSTR_ADDRS
+   VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
+                      cc->I.a,  cc->I.m1,  cc->I.m2, 
+                      cc->Da.a, cc->Da.m1, cc->Da.m2,
+                      cc->Db.a, cc->Db.m1, cc->Db.m2, cc->instr_addr);
+#else
+   VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
+                      cc->I.a,  cc->I.m1,  cc->I.m2, 
+                      cc->Da.a, cc->Da.m1, cc->Da.m2,
+                      cc->Db.a, cc->Db.m1, cc->Db.m2);
+#endif
+}
+
+
 /*------------------------------------------------------------*/
 /*--- BBCC hash table stuff                                ---*/
 /*------------------------------------------------------------*/
@@ -257,11 +322,11 @@
 static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
                            Char fn_name[FN_NAME_LEN], Int* line_num)
 {
-   Bool found1, found2, no_demangle = False;
+   Bool found1, found2;
 
-   found1 = VG_(what_line_is_this)(instr_addr, filename,
-                                   FILENAME_LEN, line_num);
-   found2 = VG_(what_fn_is_this)(no_demangle, instr_addr, fn_name, FN_NAME_LEN);
+   found1 = VG_(get_filename_linenum)(instr_addr, filename,
+                                      FILENAME_LEN, line_num);
+   found2 = VG_(get_fnname)(instr_addr, fn_name, FN_NAME_LEN);
 
    if (!found1 && !found2) {
       no_debug_BBs++;
@@ -290,8 +355,8 @@
 file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
 {
    Int i;
-   file_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(file_node));
-   new->filename  = VG_(strdup)(VG_AR_PRIVATE, filename);
+   file_node* new = VG_(malloc)(sizeof(file_node));
+   new->filename  = VG_(strdup)(filename);
    for (i = 0; i < N_FN_ENTRIES; i++) {
       new->fns[i] = NULL;
    }
@@ -303,8 +368,8 @@
 fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
 {
    Int i;
-   fn_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(fn_node));
-   new->fn_name = VG_(strdup)(VG_AR_PRIVATE, fn_name);
+   fn_node* new = VG_(malloc)(sizeof(fn_node));
+   new->fn_name = VG_(strdup)(fn_name);
    for (i = 0; i < N_BBCC_ENTRIES; i++) {
       new->BBCCs[i] = NULL;
    }
@@ -318,7 +383,7 @@
    Int BBCC_array_size = compute_BBCC_array_size(cb);
    BBCC* new;
 
-   new = (BBCC*)VG_(malloc)(VG_AR_PRIVATE, sizeof(BBCC) + BBCC_array_size);
+   new = (BBCC*)VG_(malloc)(sizeof(BBCC) + BBCC_array_size);
    new->orig_addr  = bb_orig_addr;
    new->array_size = BBCC_array_size;
    new->next = next;
@@ -352,7 +417,7 @@
 
    get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
 
-   VGP_PUSHCC(VgpCacheGetBBCC);
+   VGP_PUSHCC(VgpGetBBCC);
    filename_hash = hash(filename, N_FILE_ENTRIES);
    curr_file_node = BBCC_table[filename_hash];
    while (NULL != curr_file_node && 
@@ -410,7 +475,7 @@
           BB_retranslations++;
       }
    }
-   VGP_POPCC;
+   VGP_POPCC(VgpGetBBCC);
    return curr_BBCC;
 }
 
@@ -418,11 +483,12 @@
 /*--- Cache simulation instrumentation phase               ---*/
 /*------------------------------------------------------------*/
 
+// SSS: do something about all these...
 #define uInstr1   VG_(newUInstr1)
 #define uInstr2   VG_(newUInstr2)
 #define uInstr3   VG_(newUInstr3)
-#define dis       VG_(disassemble)
 #define uLiteral  VG_(setLiteralField)
+#define uCCall    VG_(setCCallFields)
 #define newTemp   VG_(getNewTemp)
 
 static Int compute_BBCC_array_size(UCodeBlock* cb)
@@ -430,12 +496,12 @@
    UInstr* u_in;
    Int     i, CC_size, BBCC_size = 0;
    Bool    is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
+   Int     t_read, t_write;
     
    is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
+   t_read = t_write = INVALID_TEMPREG;
 
    for (i = 0; i < cb->used; i++) {
-      /* VG_(ppUInstr)(0, &cb->instrs[i]); */
-
       u_in = &cb->instrs[i];
       switch(u_in->opcode) {
 
@@ -449,8 +515,13 @@
 
             case_for_end_of_instr:
 
-            CC_size = (is_LOAD || is_STORE || is_FPU_R || is_FPU_W 
-                      ? sizeof(idCC) : sizeof(iCC));
+            if (((is_LOAD && is_STORE) || (is_FPU_R && is_FPU_W)) && 
+                 t_read != t_write)
+               CC_size = sizeof(iddCC);
+            else if (is_LOAD || is_STORE || is_FPU_R || is_FPU_W)
+               CC_size = sizeof(idCC);
+            else
+               CC_size = sizeof(iCC);
 
             BBCC_size += CC_size;
             is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
@@ -461,22 +532,26 @@
             /* Also, a STORE can come after a LOAD for bts/btr/btc */
             vg_assert(/*!is_LOAD &&*/ /* !is_STORE && */ 
                       !is_FPU_R && !is_FPU_W);
+            t_read = u_in->val1;
             is_LOAD = True;
             break;
 
          case STORE:
             /* Multiple STOREs are possible for 'pushal' */
             vg_assert(            /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
+            t_write = u_in->val2;
             is_STORE = True;
             break;
 
          case FPU_R:
             vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
+            t_read = u_in->val2;
             is_FPU_R = True;
             break;
 
          case FPU_W:
             vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
+            t_write = u_in->val2;
             is_FPU_W = True;
             break;
 
@@ -488,41 +563,153 @@
    return BBCC_size;
 }
 
-/* Use this rather than eg. -1 because it's stored as a UInt. */
+static __attribute__ ((regparm (1)))
+void log_1I_0D_cache_access(iCC* cc)
+{
+   //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
+   //            cc, cc->instr_addr, cc->instr_size)
+   VGP_PUSHCC(VgpCacheSimulate);
+   cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
+   cc->I.a++;
+   VGP_POPCC(VgpCacheSimulate);
+}
+
+/* Difference between this function and log_1I_0D_cache_access() is that
+   this one can be passed any kind of CC, not just an iCC.  So we have to
+   be careful to make sure we don't make any assumptions about CC layout.
+   (As it stands, they would be safe, but this will avoid potential heartache
+   if anyone else changes CC layout.)  
+   Note that we only do the switch for the JIFZ version because if we always
+   called this switching version, things would run about 5% slower. */
+static __attribute__ ((regparm (1)))
+void log_1I_0D_cache_access_JIFZ(iCC* cc)
+{
+   UChar instr_size;
+   Addr instr_addr;
+   CC* I;
+
+   //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
+   //            cc, cc->instr_addr, cc->instr_size)
+   VGP_PUSHCC(VgpCacheSimulate);
+
+   switch(cc->tag) {
+       case InstrCC:
+           instr_size = cc->instr_size;
+           instr_addr = cc->instr_addr;
+           I = &(cc->I);
+           break;
+       case ReadCC:
+       case WriteCC:
+       case ModCC:
+           instr_size = ((idCC*)cc)->instr_size;
+           instr_addr = ((idCC*)cc)->instr_addr;
+           I = &( ((idCC*)cc)->I );
+           break;
+       case ReadWriteCC:
+           instr_size = ((iddCC*)cc)->instr_size;
+           instr_addr = ((iddCC*)cc)->instr_addr;
+           I = &( ((iddCC*)cc)->I );
+           break;
+       default:
+           VG_(panic)("Unknown CC type in log_1I_0D_cache_access_JIFZ()\n");
+           break;
+   }
+   cachesim_I1_doref(instr_addr, instr_size, &I->m1, &I->m2);
+   I->a++;
+   VGP_POPCC(VgpCacheSimulate);
+}
+
+__attribute__ ((regparm (2))) static 
+void log_0I_1D_cache_access(idCC* cc, Addr data_addr)
+{
+   //VG_(printf)("0I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
+   //            cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
+   VGP_PUSHCC(VgpCacheSimulate);
+   cachesim_D1_doref(data_addr,      cc->data_size,  &cc->D.m1, &cc->D.m2);
+   cc->D.a++;
+   VGP_POPCC(VgpCacheSimulate);
+}
+
+__attribute__ ((regparm (2))) static
+void log_1I_1D_cache_access(idCC* cc, Addr data_addr)
+{
+   //VG_(printf)("1I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
+   //            cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
+   VGP_PUSHCC(VgpCacheSimulate);
+   cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
+   cc->I.a++;
+
+   cachesim_D1_doref(data_addr,      cc->data_size,  &cc->D.m1, &cc->D.m2);
+   cc->D.a++;
+   VGP_POPCC(VgpCacheSimulate);
+}
+
+__attribute__ ((regparm (3))) static 
+void log_0I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
+{
+   //VG_(printf)("0I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=0x%x, daddr2=%p, size=%u\n",
+   //            cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
+   VGP_PUSHCC(VgpCacheSimulate);
+   cachesim_D1_doref(data_addr1, cc->data_size,  &cc->Da.m1, &cc->Da.m2);
+   cc->Da.a++;
+   cachesim_D1_doref(data_addr2, cc->data_size,  &cc->Db.m1, &cc->Db.m2);
+   cc->Db.a++;
+   VGP_POPCC(VgpCacheSimulate);
+}
+
+__attribute__ ((regparm (3))) static
+void log_1I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
+{
+   //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
+   //            cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
+   VGP_PUSHCC(VgpCacheSimulate);
+   cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1,  &cc->I.m2);
+   cc->I.a++;
+
+   cachesim_D1_doref(data_addr1,     cc->data_size,  &cc->Da.m1, &cc->Da.m2);
+   cc->Da.a++;
+   cachesim_D1_doref(data_addr2,     cc->data_size,  &cc->Db.m1, &cc->Db.m2);
+   cc->Db.a++;
+   VGP_POPCC(VgpCacheSimulate);
+}
+
+UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
+{
+/* Use this rather than eg. -1 because it's a UInt. */
 #define INVALID_DATA_SIZE   999999
 
-UCodeBlock* VG_(cachesim_instrument)(UCodeBlock* cb_in, Addr orig_addr)
-{
    UCodeBlock* cb;
    Int         i;
    UInstr*     u_in;
    BBCC*       BBCC_node;
-   Int         t_CC_addr, t_read_addr, t_write_addr, t_data_addr;
+   Int         t_CC_addr, t_read_addr, t_write_addr, t_data_addr1,
+               t_data_addr2, t_read, t_write;
    Int         CC_size = -1;    /* Shut gcc warnings up */
-   Addr        instr_addr = orig_addr;
-   UInt        instr_size, data_size = INVALID_DATA_SIZE;
-   Int         helper = -1;     /* Shut gcc warnings up */
+   Addr        x86_instr_addr = orig_addr;
+   UInt        x86_instr_size, data_size = INVALID_DATA_SIZE;
+   Addr        helper;
+   Int         argc;
    UInt        stack_used;
-   Bool        BB_seen_before       = False;
-   Bool        prev_instr_was_Jcond = False;
+   Bool        BB_seen_before     = False;
+   Bool        instrumented_Jcond = False;
+   Bool        has_rep_prefix     = False;
    Addr        BBCC_ptr0, BBCC_ptr; 
 
    /* Get BBCC (creating if necessary -- requires a counting pass over the BB
     * if it's the first time it's been seen), and point to start of the 
     * BBCC array.  */
-   BBCC_node = get_BBCC(orig_addr, cb_in, False, &BB_seen_before);
+   BBCC_node = get_BBCC(orig_addr, cb_in, /*remove=*/False, &BB_seen_before);
    BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
 
    cb = VG_(allocCodeBlock)();
    cb->nextTemp = cb_in->nextTemp;
 
-   t_CC_addr = t_read_addr = t_write_addr = t_data_addr = INVALID_TEMPREG;
+   t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 = t_data_addr2 =
+               t_read = t_write = INVALID_TEMPREG;
 
    for (i = 0; i < cb_in->used; i++) {
       u_in = &cb_in->instrs[i];
 
-      //VG_(ppUInstr)(0, u_in);
-
       /* What this is all about:  we want to instrument each x86 instruction 
        * translation.  The end of these are marked in three ways.  The three
        * ways, and the way we instrument them, are as follows:
@@ -531,144 +718,33 @@
        * 2. UCode, Juncond        --> UCode, Instrumentation, Juncond
        * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
        *
-       * We must put the instrumentation before the jumps so that it is always
+       * The last UInstr in a basic block is always a Juncond.  Jconds,
+       * when they appear, are always second last.  We check this with 
+       * various assertions.
+       *
+       * We must put the instrumentation before any jumps so that it is always
        * executed.  We don't have to put the instrumentation before the INCEIP
        * (it could go after) but we do so for consistency.
        *
-       * Junconds are always the last instruction in a basic block.  Jconds are
-       * always the 2nd last, and must be followed by a Jcond.  We check this
-       * with various assertions.
+       * x86 instruction sizes are obtained from INCEIPs (for case 1) or
+       * from .extra4b field of the final JMP (for case 2 & 3).
        *
-       * Note that in VG_(disBB) we patched the `extra4b' field of the first
-       * occurring JMP in a block with the size of its x86 instruction.  This
-       * is used now.
-       *
-       * Note that we don't have to treat JIFZ specially;  unlike JMPs, JIFZ
-       * occurs in the middle of a BB and gets an INCEIP after it.
+       * Note that JIFZ is treated differently.
        *
        * The instrumentation is just a call to the appropriate helper function,
        * passing it the address of the instruction's CC.
        */
-      if (prev_instr_was_Jcond) vg_assert(u_in->opcode == JMP);
+      if (instrumented_Jcond) vg_assert(u_in->opcode == JMP);
 
       switch (u_in->opcode) {
-
-         case INCEIP:
-            instr_size = u_in->val1;
-            goto case_for_end_of_x86_instr;
-
-         case JMP:
-            if (u_in->cond == CondAlways) {
-               vg_assert(i+1 == cb_in->used); 
-
-               /* Don't instrument if previous instr was a Jcond. */
-               if (prev_instr_was_Jcond) {
-                  vg_assert(0 == u_in->extra4b);
-                  VG_(copyUInstr)(cb, u_in);
-                  break;
-               }
-               prev_instr_was_Jcond = False;
-
-            } else {
-               vg_assert(i+2 == cb_in->used);  /* 2nd last instr in block */
-               prev_instr_was_Jcond = True;
-            }
-
-            /* Ah, the first JMP... instrument, please. */
-            instr_size = u_in->extra4b;
-            goto case_for_end_of_x86_instr;
-
-            /* Shared code that is executed at the end of an x86 translation
-             * block, marked by either an INCEIP or an unconditional JMP. */
-            case_for_end_of_x86_instr:
-
-#define IS_(X)      (INVALID_TEMPREG != t_##X##_addr)
-             
-            /* Initialise the CC in the BBCC array appropriately if it hasn't
-             * been initialised before.
-             * Then call appropriate sim function, passing it the CC address.
-             * Note that CALLM_S/CALL_E aren't required here;  by this point,
-             * the checking related to them has already happened. */
-            stack_used = 0;
-
-            vg_assert(instr_size >= 1 && instr_size <= MAX_x86_INSTR_SIZE);
-            vg_assert(0 != instr_addr);
-
-            if (!IS_(read) && !IS_(write)) {
-               iCC* CC_ptr = (iCC*)(BBCC_ptr);
-               vg_assert(INVALID_DATA_SIZE == data_size);
-               vg_assert(INVALID_TEMPREG == t_read_addr && 
-                         INVALID_TEMPREG == t_write_addr);
-               CC_size = sizeof(iCC);
-               if (!BB_seen_before)
-                   init_iCC(CC_ptr, instr_addr, instr_size);
-
-               /* 1st arg: CC addr */
-               t_CC_addr = newTemp(cb);
-               uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_CC_addr);
-               uLiteral(cb, BBCC_ptr);
-
-               uInstr1(cb, CCALL_1_0, 0, TempReg, t_CC_addr);
-               uLiteral(cb, VGOFF_(cachesim_log_non_mem_instr));
-
-            } else { 
-               CC_type X_CC;
-               idCC* CC_ptr = (idCC*)(BBCC_ptr);
-                
-               vg_assert(4 == data_size || 2  == data_size || 1 == data_size || 
-                         8 == data_size || 10 == data_size);
-               
-               CC_size = sizeof(idCC);
-               helper = VGOFF_(cachesim_log_mem_instr);
-
-               if (IS_(read) && !IS_(write)) {
-                  X_CC = READ_CC;
-                  vg_assert(INVALID_TEMPREG != t_read_addr && 
-                            INVALID_TEMPREG == t_write_addr);
-                  t_data_addr = t_read_addr;
-
-               } else if (!IS_(read) && IS_(write)) {
-                  X_CC = WRITE_CC;
-                  vg_assert(INVALID_TEMPREG == t_read_addr && 
-                            INVALID_TEMPREG != t_write_addr);
-                  t_data_addr = t_write_addr;
-
-               } else {
-                  vg_assert(IS_(read) && IS_(write));
-                  X_CC = MOD_CC;
-                  vg_assert(INVALID_TEMPREG != t_read_addr && 
-                            INVALID_TEMPREG != t_write_addr);
-                  t_data_addr = t_read_addr;
-               }
-#undef IS_
-               if (!BB_seen_before)
-                  init_idCC(X_CC, CC_ptr, instr_addr, instr_size, data_size);
-
-               /* 1st arg: CC addr */
-               t_CC_addr = newTemp(cb);
-               uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_CC_addr);
-               uLiteral(cb, BBCC_ptr);
-
-               uInstr2(cb, CCALL_2_0, 0, TempReg, t_CC_addr, 
-                                         TempReg, t_data_addr);
-               uLiteral(cb, VGOFF_(cachesim_log_mem_instr));
-            }
-
-            VG_(copyUInstr)(cb, u_in);
-
-            /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
-            BBCC_ptr   += CC_size; 
-            instr_addr += instr_size;
-            t_CC_addr = t_read_addr = t_write_addr = 
-                                      t_data_addr  = INVALID_TEMPREG;
-            data_size = INVALID_DATA_SIZE;
+         case NOP:  case CALLM_E:  case CALLM_S:
             break;
 
-
          /* For memory-ref instrs, copy the data_addr into a temporary to be
-          * passed to the cachesim_log_function at the end of the instruction.
+          * passed to the cachesim_* helper at the end of the instruction.
           */
          case LOAD: 
+            t_read      = u_in->val1;
             t_read_addr = newTemp(cb);
             uInstr2(cb, MOV, 4, TempReg, u_in->val1,  TempReg, t_read_addr);
             data_size = u_in->size;
@@ -676,26 +752,216 @@
             break;
 
          case FPU_R:
+            t_read      = u_in->val2;
             t_read_addr = newTemp(cb);
             uInstr2(cb, MOV, 4, TempReg, u_in->val2,  TempReg, t_read_addr);
-            data_size = u_in->size;
+            data_size = ( u_in->size <= MIN_LINE_SIZE
+                        ? u_in->size
+                        : MIN_LINE_SIZE);
             VG_(copyUInstr)(cb, u_in);
             break;
 
          /* Note that we must set t_write_addr even for mod instructions;
-          * that's how the code above determines whether it does a write;
-          * without it, it would think a mod instruction is a read.
+          * That's how the code above determines whether it does a write.
+          * Without it, it would think a mod instruction is a read.
           * As for the MOV, if it's a mod instruction it's redundant, but it's
           * not expensive and mod instructions are rare anyway. */
          case STORE:
          case FPU_W:
+            t_write      = u_in->val2;
             t_write_addr = newTemp(cb);
             uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
-            data_size = u_in->size;
+            /* 28 and 108 B data-sized instructions will be done
+             * inaccurately but they're very rare and this avoids errors
+             * from hitting more than two cache lines in the simulation. */
+            data_size = ( u_in->size <= MIN_LINE_SIZE
+                        ? u_in->size
+                        : MIN_LINE_SIZE);
             VG_(copyUInstr)(cb, u_in);
             break;
 
-         case NOP:  case CALLM_E:  case CALLM_S:
+
+         /* For rep-prefixed instructions, log a single I-cache access
+          * before the UCode loop that implements the repeated part, which
+          * is where the multiple D-cache accesses are logged. */
+         case JIFZ:
+            has_rep_prefix = True;
+
+            /* Setup 1st and only arg: CC addr */
+            t_CC_addr = newTemp(cb);
+            uInstr2(cb, MOV,  4, Literal, 0, TempReg, t_CC_addr);
+            uLiteral(cb, BBCC_ptr);
+
+            /* Call helper */
+            uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
+            uCCall(cb, (Addr) & log_1I_0D_cache_access_JIFZ, 1, 1, False);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+
+         /* INCEIP: insert instrumentation */
+         case INCEIP:
+            x86_instr_size = u_in->val1;
+            goto instrument_x86_instr;
+
+         /* JMP: insert instrumentation if the first JMP */
+         case JMP:
+            if (instrumented_Jcond) {
+               vg_assert(CondAlways == u_in->cond);
+               vg_assert(i+1 == cb_in->used);
+               VG_(copyUInstr)(cb, u_in);
+               instrumented_Jcond = False;    /* reset */
+               break;
+            }
+            /* The first JMP... instrument. */
+            if (CondAlways != u_in->cond) {
+               vg_assert(i+2 == cb_in->used);
+               instrumented_Jcond = True;
+            } else {
+               vg_assert(i+1 == cb_in->used);
+            }
+
+            /* Get x86 instr size from final JMP. */
+            x86_instr_size = LAST_UINSTR(cb_in).extra4b;
+            goto instrument_x86_instr;
+
+
+            /* Code executed at the end of each x86 instruction. */
+            instrument_x86_instr:
+             
+            /* Initialise the CC in the BBCC array appropriately if it
+             * hasn't been initialised before.  Then call appropriate sim
+             * function, passing it the CC address. */
+            stack_used = 0;
+
+            vg_assert(x86_instr_size >= 1 && 
+                      x86_instr_size <= MAX_x86_INSTR_SIZE);
+
+#define IS_(X)      (INVALID_TEMPREG != t_##X##_addr)
+
+            if (!IS_(read) && !IS_(write)) {
+               vg_assert(INVALID_DATA_SIZE == data_size);
+               vg_assert(INVALID_TEMPREG == t_read_addr  && 
+                         INVALID_TEMPREG == t_read       && 
+                         INVALID_TEMPREG == t_write_addr &&
+                         INVALID_TEMPREG == t_write);
+               CC_size = sizeof(iCC);
+               if (!BB_seen_before)
+                   init_iCC((iCC*)BBCC_ptr, x86_instr_addr, x86_instr_size);
+               helper = ( has_rep_prefix 
+                        ? (Addr)0      /* no extra log needed */
+                        : (Addr) & log_1I_0D_cache_access
+                        );
+               argc = 1;
+
+            } else { 
+               vg_assert(4 == data_size || 2  == data_size || 1 == data_size || 
+                         8 == data_size || 10 == data_size ||
+                         MIN_LINE_SIZE == data_size);
+               
+               if (IS_(read) && !IS_(write)) {
+                  CC_size = sizeof(idCC);
+                  /* If it uses 'rep', we've already logged the I-cache 
+                   * access at the JIFZ UInstr (see JIFZ case below) so
+                   * don't do it here */
+                  helper = ( has_rep_prefix 
+                           ? (Addr) & log_0I_1D_cache_access
+                           : (Addr) & log_1I_1D_cache_access
+                           );
+                  argc = 2;
+                  if (!BB_seen_before)
+                     init_idCC(ReadCC, (idCC*)BBCC_ptr, x86_instr_addr,
+                               x86_instr_size, data_size);
+                  vg_assert(INVALID_TEMPREG != t_read_addr  && 
+                            INVALID_TEMPREG != t_read       && 
+                            INVALID_TEMPREG == t_write_addr &&
+                            INVALID_TEMPREG == t_write);
+                  t_data_addr1 = t_read_addr;
+
+               } else if (!IS_(read) && IS_(write)) {
+                  CC_size = sizeof(idCC);
+                  helper = ( has_rep_prefix 
+                           ? (Addr) & log_0I_1D_cache_access
+                           : (Addr) & log_1I_1D_cache_access
+                           );
+                  argc = 2;
+                  if (!BB_seen_before)
+                     init_idCC(WriteCC, (idCC*)BBCC_ptr, x86_instr_addr,
+                               x86_instr_size, data_size);
+                  vg_assert(INVALID_TEMPREG == t_read_addr  && 
+                            INVALID_TEMPREG == t_read       && 
+                            INVALID_TEMPREG != t_write_addr &&
+                            INVALID_TEMPREG != t_write);
+                  t_data_addr1 = t_write_addr;
+
+               } else {
+                  vg_assert(IS_(read) && IS_(write));
+                  vg_assert(INVALID_TEMPREG != t_read_addr  && 
+                            INVALID_TEMPREG != t_read       && 
+                            INVALID_TEMPREG != t_write_addr &&
+                            INVALID_TEMPREG != t_write);
+                  if (t_read == t_write) {
+                     CC_size = sizeof(idCC);
+                     helper = ( has_rep_prefix 
+                              ? (Addr) & log_0I_1D_cache_access
+                              : (Addr) & log_1I_1D_cache_access
+                              );
+                     argc = 2;
+                     if (!BB_seen_before)
+                        init_idCC(ModCC, (idCC*)BBCC_ptr, x86_instr_addr,
+                                  x86_instr_size, data_size);
+                     t_data_addr1 = t_read_addr;
+                  } else {
+                     CC_size = sizeof(iddCC);
+                     helper = ( has_rep_prefix 
+                              ? (Addr) & log_0I_2D_cache_access
+                              : (Addr) & log_1I_2D_cache_access
+                              );
+                     argc = 3;
+                     if (!BB_seen_before)
+                        init_iddCC((iddCC*)BBCC_ptr, x86_instr_addr,
+                                    x86_instr_size, data_size);
+                     t_data_addr1 = t_read_addr;
+                     t_data_addr2 = t_write_addr;
+                  }
+               }
+#undef IS_
+            }
+
+            /* Call the helper, if necessary */
+            if ((Addr)0 != helper) {
+
+               /* Setup 1st arg: CC addr */
+               t_CC_addr = newTemp(cb);
+               uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_CC_addr);
+               uLiteral(cb, BBCC_ptr);
+
+               /* Call the helper */
+               if      (1 == argc)
+                  uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
+               else if (2 == argc)
+                  uInstr2(cb, CCALL, 0, TempReg, t_CC_addr, 
+                                        TempReg, t_data_addr1);
+               else if (3 == argc)
+                  uInstr3(cb, CCALL, 0, TempReg, t_CC_addr, 
+                                        TempReg, t_data_addr1,
+                                        TempReg, t_data_addr2);
+               else
+                  VG_(panic)("argc... not 1 or 2 or 3?");
+               
+               uCCall(cb, helper, argc, argc, False);
+            }
+
+            /* Copy original UInstr (INCEIP or JMP) */
+            VG_(copyUInstr)(cb, u_in);
+
+            /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
+            BBCC_ptr       += CC_size; 
+            x86_instr_addr += x86_instr_size;
+            t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 = 
+                        t_data_addr2 = t_read = t_write = INVALID_TEMPREG;
+            data_size = INVALID_DATA_SIZE;
+            has_rep_prefix = False; 
             break;
 
          default:
@@ -709,19 +975,25 @@
 
    VG_(freeCodeBlock)(cb_in);
    return cb;
+
+#undef INVALID_DATA_SIZE
 }
 
 /*------------------------------------------------------------*/
-/*--- Cache simulation stuff                               ---*/
+/*--- Automagic cache initialisation stuff                 ---*/
 /*------------------------------------------------------------*/
 
-#define MIN_LINE_SIZE   16
-
 /* Total reads/writes/misses.  Calculated during CC traversal at the end. */
 static CC Ir_total;
 static CC Dr_total;
 static CC Dw_total;
 
+#define UNDEFINED_CACHE     ((cache_t) { -1, -1, -1 }) 
+
+static cache_t clo_I1_cache = UNDEFINED_CACHE;
+static cache_t clo_D1_cache = UNDEFINED_CACHE;
+static cache_t clo_L2_cache = UNDEFINED_CACHE;
+
 /* All CPUID info taken from sandpile.org/a32/cpuid.htm */
 /* Probably only works for Intel and AMD chips, and probably only for some of
  * them. 
@@ -739,7 +1011,7 @@
 static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
 {
     VG_(message)(Vg_DebugMsg, 
-       "warning: Pentium with %d K micro_op instruction trace cache", 
+       "warning: Pentium with %d K micro-op instruction trace cache", 
        actual_size);
     VG_(message)(Vg_DebugMsg, 
        "         Simulating a %d KB cache with %d B lines", 
@@ -755,6 +1027,7 @@
 {
    UChar info[16];
    Int   i, trials;
+   Bool  L2_found = False;
 
    if (level < 2) {
       VG_(message)(Vg_DebugMsg, 
@@ -782,8 +1055,9 @@
       case 0x0:       /* ignore zeros */
           break;
           
-      case 0x01: case 0x02: case 0x03: case 0x04:     /* TLB info, ignore */
-      case 0x90: case 0x96: case 0x9b:
+      /* TLB info, ignore */
+      case 0x01: case 0x02: case 0x03: case 0x04:
+      case 0x50: case 0x51: case 0x52: case 0x5b: case 0x5c: case 0x5d:
           break;      
 
       case 0x06: *I1c = (cache_t) {  8, 4, 32 }; break;
@@ -792,22 +1066,35 @@
       case 0x0a: *D1c = (cache_t) {  8, 2, 32 }; break;
       case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
 
+      /* IA-64 info -- panic! */
+      case 0x10: case 0x15: case 0x1a: 
+      case 0x88: case 0x89: case 0x8a: case 0x8d:
+      case 0x90: case 0x96: case 0x9b:
+         VG_(message)(Vg_DebugMsg,
+            "error: IA-64 cache stats!  Cachegrind doesn't run on IA-64...");
+         VG_(panic)("IA-64 detected");
+
       case 0x22: case 0x23: case 0x25: case 0x29: 
-      case 0x88: case 0x89: case 0x8a:
           VG_(message)(Vg_DebugMsg, 
              "warning: L3 cache detected but ignored\n");
           break;
 
-      case 0x40: 
-          VG_(message)(Vg_DebugMsg, 
-             "warning: L2 cache not installed, ignore L2 results.");
+      /* These are sectored, whatever that means */
+      case 0x39: *L2c = (cache_t) {  128, 4, 64 }; L2_found = True; break;
+      case 0x3c: *L2c = (cache_t) {  256, 4, 64 }; L2_found = True; break;
+
+      /* If a P6 core, this means "no L2 cache".  
+         If a P4 core, this means "no L3 cache".
+         We don't know what core it is, so don't issue a warning.  To detect
+         a missing L2 cache, we use 'L2_found'. */
+      case 0x40:
           break;
 
-      case 0x41: *L2c = (cache_t) {  128, 4, 32 };    break;
-      case 0x42: *L2c = (cache_t) {  256, 4, 32 };    break;
-      case 0x43: *L2c = (cache_t) {  512, 4, 32 };    break;
-      case 0x44: *L2c = (cache_t) { 1024, 4, 32 };    break;
-      case 0x45: *L2c = (cache_t) { 2048, 4, 32 };    break;
+      case 0x41: *L2c = (cache_t) {  128, 4, 32 }; L2_found = True; break;
+      case 0x42: *L2c = (cache_t) {  256, 4, 32 }; L2_found = True; break;
+      case 0x43: *L2c = (cache_t) {  512, 4, 32 }; L2_found = True; break;
+      case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
+      case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
 
       /* These are sectored, whatever that means */
       case 0x66: *D1c = (cache_t) {  8, 4, 64 };  break;      /* sectored */
@@ -832,24 +1119,31 @@
          micro_ops_warn(32, 32, 32); 
          break;  
 
-      case 0x79: *L2c = (cache_t) {  128, 8, 64 };    break;  /* sectored */
-      case 0x7a: *L2c = (cache_t) {  256, 8, 64 };    break;  /* sectored */
-      case 0x7b: *L2c = (cache_t) {  512, 8, 64 };    break;  /* sectored */
-      case 0x7c: *L2c = (cache_t) { 1024, 8, 64 };    break;  /* sectored */
+      /* These are sectored, whatever that means */
+      case 0x79: *L2c = (cache_t) {  128, 8,  64 }; L2_found = True;  break;
+      case 0x7a: *L2c = (cache_t) {  256, 8,  64 }; L2_found = True;  break;
+      case 0x7b: *L2c = (cache_t) {  512, 8,  64 }; L2_found = True;  break;
+      case 0x7c: *L2c = (cache_t) { 1024, 8,  64 }; L2_found = True;  break;
+      case 0x7e: *L2c = (cache_t) {  256, 8, 128 }; L2_found = True;  break;
 
-      case 0x81: *L2c = (cache_t) {  128, 8, 32 };    break;
-      case 0x82: *L2c = (cache_t) {  256, 8, 32 };    break;
-      case 0x83: *L2c = (cache_t) {  512, 8, 32 };    break;
-      case 0x84: *L2c = (cache_t) { 1024, 8, 32 };    break;
-      case 0x85: *L2c = (cache_t) { 2048, 8, 32 };    break;
+      case 0x81: *L2c = (cache_t) {  128, 8, 32 };  L2_found = True;  break;
+      case 0x82: *L2c = (cache_t) {  256, 8, 32 };  L2_found = True;  break;
+      case 0x83: *L2c = (cache_t) {  512, 8, 32 };  L2_found = True;  break;
+      case 0x84: *L2c = (cache_t) { 1024, 8, 32 };  L2_found = True;  break;
+      case 0x85: *L2c = (cache_t) { 2048, 8, 32 };  L2_found = True;  break;
 
       default:
           VG_(message)(Vg_DebugMsg, 
              "warning: Unknown Intel cache config value "
-             "(0x%x), ignoring\n", info[i]);
+             "(0x%x), ignoring", info[i]);
           break;
       }
    }
+
+   if (!L2_found)
+      VG_(message)(Vg_DebugMsg, 
+         "warning: L2 cache not installed, ignore L2 results.");
+
    return 0;
 }
 
@@ -871,12 +1165,16 @@
  * #3  The AMD K7 processor's L2 cache must be configured prior to relying 
  *     upon this information. (Whatever that means -- njn)
  *
+ * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
+ * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
+ * so we detect that.
+ * 
  * Returns 0 on success, non-zero on failure.
  */
 static
 Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
 {
-   Int dummy, ext_level;
+   Int dummy, model, ext_level;
    Int I1i, D1i, L2i;
    
    cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy);
@@ -891,6 +1189,16 @@
    cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i);
    cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy);
 
+   cpuid(0x1, &model, &dummy, &dummy, &dummy);
+   /*VG_(message)(Vg_UserMsg,"CPU model %04x",model);*/
+
+   /* Check for Duron bug */
+   if (model == 0x630) {
+      VG_(message)(Vg_UserMsg,
+         "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
+      L2i = (64 << 16) | (L2i & 0xffff);
+   }
+
    D1c->size      = (D1i >> 24) & 0xff;
    D1c->assoc     = (D1i >> 16) & 0xff;
    D1c->line_size = (D1i >>  0) & 0xff;
@@ -1044,14 +1352,14 @@
    cache_t D1_dflt = (cache_t) {  65536, 2, 64 };
    cache_t L2_dflt = (cache_t) { 262144, 8, 64 };
 
-#define CMD_LINE_DEFINED(L)                 \
-   (-1 != VG_(clo_##L##_cache).size  ||     \
-    -1 != VG_(clo_##L##_cache).assoc ||     \
-    -1 != VG_(clo_##L##_cache).line_size)
+#define CMD_LINE_DEFINED(L)            \
+   (-1 != clo_##L##_cache.size  ||     \
+    -1 != clo_##L##_cache.assoc ||     \
+    -1 != clo_##L##_cache.line_size)
 
-   *I1c = VG_(clo_I1_cache);
-   *D1c = VG_(clo_D1_cache);
-   *L2c = VG_(clo_L2_cache);
+   *I1c = clo_I1_cache;
+   *D1c = clo_D1_cache;
+   *L2c = clo_L2_cache;
 
    /* If any undefined on command-line, try CPUID */
    if (! CMD_LINE_DEFINED(I1) ||
@@ -1061,9 +1369,9 @@
       /* Overwrite CPUID result for any cache defined on command-line */
       if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) {
    
-         if (CMD_LINE_DEFINED(I1)) *I1c = VG_(clo_I1_cache);
-         if (CMD_LINE_DEFINED(D1)) *D1c = VG_(clo_D1_cache);
-         if (CMD_LINE_DEFINED(L2)) *L2c = VG_(clo_L2_cache);
+         if (CMD_LINE_DEFINED(I1)) *I1c = clo_I1_cache;
+         if (CMD_LINE_DEFINED(D1)) *D1c = clo_D1_cache;
+         if (CMD_LINE_DEFINED(L2)) *L2c = clo_L2_cache;
 
       /* CPUID failed, use defaults for each undefined by command-line */
       } else {
@@ -1071,9 +1379,9 @@
                       "Couldn't detect cache configuration, using one "
                       "or more defaults ");
 
-         *I1c = (CMD_LINE_DEFINED(I1) ? VG_(clo_I1_cache) : I1_dflt);
-         *D1c = (CMD_LINE_DEFINED(D1) ? VG_(clo_D1_cache) : D1_dflt);
-         *L2c = (CMD_LINE_DEFINED(L2) ? VG_(clo_L2_cache) : L2_dflt);
+         *I1c = (CMD_LINE_DEFINED(I1) ? clo_I1_cache : I1_dflt);
+         *D1c = (CMD_LINE_DEFINED(D1) ? clo_D1_cache : D1_dflt);
+         *L2c = (CMD_LINE_DEFINED(L2) ? clo_L2_cache : L2_dflt);
       }
    }
 #undef CMD_LINE_DEFINED
@@ -1093,65 +1401,8 @@
    }
 }
 
-void VG_(init_cachesim)(void)
-{
-   cache_t I1c, D1c, L2c; 
-
-   /* Make sure the output file can be written. */
-   Int fd = VG_(open_write)(OUT_FILE);
-   if (-1 == fd) { 
-      fd = VG_(create_and_write)(OUT_FILE);
-      if (-1 == fd) {
-         file_err(); 
-      }
-   }
-   VG_(close)(fd);
-
-   initCC(&Ir_total);
-   initCC(&Dr_total);
-   initCC(&Dw_total);
-   
-   initCC(&Ir_discards);
-   initCC(&Dr_discards);
-   initCC(&Dw_discards);
-
-   get_caches(&I1c, &D1c, &L2c);
-
-   cachesim_I1_initcache(I1c);
-   //cachesim_I1_initcache();
-   cachesim_D1_initcache(D1c);
-   //cachesim_D1_initcache();
-   cachesim_L2_initcache(L2c);
-   //cachesim_L2_initcache();
-
-   init_BBCC_table();
-}
-
-void VG_(cachesim_log_non_mem_instr)(iCC* cc)
-{
-   //VG_(printf)("sim  I: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
-   //            cc, cc->instr_addr, cc->instr_size)
-   VGP_PUSHCC(VgpCacheSimulate);
-   cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
-   cc->I.a++;
-   VGP_POPCC;
-}
-
-void VG_(cachesim_log_mem_instr)(idCC* cc, Addr data_addr)
-{
-   //VG_(printf)("sim  D: CCaddr=0x%x, iaddr=0x%x, isize=%u, daddr=0x%x, dsize=%u\n",
-   //            cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
-   VGP_PUSHCC(VgpCacheSimulate);
-   cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
-   cc->I.a++;
-
-   cachesim_D1_doref(data_addr,      cc->data_size,  &cc->D.m1, &cc->D.m2);
-   cc->D.a++;
-   VGP_POPCC;
-}
-
 /*------------------------------------------------------------*/
-/*--- Printing of output file and summary stats            ---*/
+/*--- SK_(fini)() and related function                     ---*/
 /*------------------------------------------------------------*/
 
 static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl, 
@@ -1181,15 +1432,15 @@
       Addr instr_addr;
       switch ( ((iCC*)BBCC_ptr)->tag ) {
 
-         case INSTR_CC:
+         case InstrCC:
             instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
             sprint_iCC(buf, (iCC*)BBCC_ptr);
             ADD_CC_TO(iCC, I, Ir_total);
             BBCC_ptr += sizeof(iCC);
             break;
 
-         case READ_CC:
-         case  MOD_CC:
+         case ReadCC:
+         case  ModCC:
             instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
             sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
             ADD_CC_TO(idCC, I, Ir_total);
@@ -1197,7 +1448,7 @@
             BBCC_ptr += sizeof(idCC);
             break;
 
-         case WRITE_CC:
+         case WriteCC:
             instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
             sprint_write_CC(buf, (idCC*)BBCC_ptr);
             ADD_CC_TO(idCC, I, Ir_total);
@@ -1205,6 +1456,15 @@
             BBCC_ptr += sizeof(idCC);
             break;
 
+         case ReadWriteCC:
+            instr_addr = ((iddCC*)BBCC_ptr)->instr_addr;
+            sprint_read_write_CC(buf, (iddCC*)BBCC_ptr);
+            ADD_CC_TO(iddCC, I,  Ir_total);
+            ADD_CC_TO(iddCC, Da, Dr_total);
+            ADD_CC_TO(iddCC, Db, Dw_total);
+            BBCC_ptr += sizeof(iddCC);
+            break;
+
          default:
             VG_(panic)("Unknown CC type in fprint_BBCC()\n");
             break;
@@ -1223,7 +1483,7 @@
 
       /* If the function name for this instruction doesn't match that of the
        * first instruction in the BB, print warning. */
-      if (VG_(clo_trace_symtab) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
+      if (VG_(clo_verbosity > 2) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) {
          VG_(printf)("Mismatched function names\n");
          VG_(printf)("  filenames: BB:%s, instr:%s;"
                      "  fn_names:  BB:%s, instr:%s;"
@@ -1251,8 +1511,7 @@
    vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
 }
 
-static void fprint_BBCC_table_and_calc_totals(Int client_argc, 
-                                              Char** client_argv)
+static void fprint_BBCC_table_and_calc_totals(void)
 {
    Int        fd;
    Char       buf[BUF_LEN];
@@ -1261,8 +1520,8 @@
    BBCC      *curr_BBCC;
    Int        i,j,k;
 
-   VGP_PUSHCC(VgpCacheDump);
-   fd = VG_(open_write)(OUT_FILE);
+   VGP_PUSHCC(VgpCacheResults);
+   fd = VG_(open)(cachegrind_out_file, VKI_O_WRONLY|VKI_O_TRUNC, 0);
    if (-1 == fd) { file_err(); }
 
    /* "desc:" lines (giving I1/D1/L2 cache configuration) */
@@ -1276,8 +1535,8 @@
    /* "cmd:" line */
    VG_(strcpy)(buf, "cmd:");
    VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
-   for (i = 0; i < client_argc; i++) {
-       VG_(sprintf)(buf, " %s", client_argv[i]);
+   for (i = 0; i < VG_(client_argc); i++) {
+       VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
        VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
    }
    /* "events:" line */
@@ -1395,6 +1654,7 @@
    VG_(sprintf)(buf, "%d.%d%%", n / pow, n % pow);
    len = VG_(strlen)(buf);
    space = field_width - len;
+   if (space < 0) space = 0;     /* Allow for v. small field_width */
    i = len;
 
    /* Right justify in field */
@@ -1402,7 +1662,7 @@
    for (i = 0; i < space; i++)  buf[i] = ' ';
 }
 
-void VG_(do_cachesim_results)(Int client_argc, Char** client_argv)
+void SK_(fini)(void)
 {
    CC D_total;
    ULong L2_total_m, L2_total_mr, L2_total_mw,
@@ -1413,7 +1673,7 @@
    Int l1, l2, l3;
    Int p;
 
-   fprint_BBCC_table_and_calc_totals(client_argc, client_argv);
+   fprint_BBCC_table_and_calc_totals();
 
    if (VG_(clo_verbosity) == 0) 
       return;
@@ -1431,6 +1691,7 @@
 
    p = 100;
 
+   if (0 == Ir_total.a) Ir_total.a = 1;
    percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1);
    VG_(message)(Vg_UserMsg, "I1  miss rate: %s", buf1);
                 
@@ -1464,6 +1725,9 @@
 
    p = 10;
    
+   if (0 == D_total.a)   D_total.a = 1;
+   if (0 == Dr_total.a) Dr_total.a = 1;
+   if (0 == Dw_total.a) Dw_total.a = 1;
    percentify( D_total.m1 * 100 * p / D_total.a,  p, l1+1, buf1);
    percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2);
    percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3);
@@ -1525,7 +1789,7 @@
        VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations);
        VG_(message)(Vg_DebugMsg, "Distinct instrs:  %d", distinct_instrs);
    }
-   VGP_POPCC;
+   VGP_POPCC(VgpCacheResults);
 }
 
 
@@ -1534,19 +1798,18 @@
  *
  * Finds the BBCC in the table, removes it, adds the counts to the discard
  * counters, and then frees the BBCC. */
-void VG_(cachesim_notify_discard) ( TTEntry* tte )
+void SK_(discard_basic_block_info) ( Addr a, UInt size )
 {
    BBCC *BBCC_node;
    Addr BBCC_ptr0, BBCC_ptr;
    Bool BB_seen_before;
     
    if (0)
-   VG_(printf)( "cachesim_notify_discard: %p for %d\n", 
-                tte->orig_addr, (Int)tte->orig_size);
+      VG_(printf)( "discard_basic_block_info: addr %p, size %u\n", a, size);
 
    /* 2nd arg won't be used since BB should have been seen before (assertions
     * ensure this). */
-   BBCC_node = get_BBCC(tte->orig_addr, NULL, True, &BB_seen_before);
+   BBCC_node = get_BBCC(a, NULL, /*remove=*/True, &BB_seen_before);
    BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
 
    vg_assert(True == BB_seen_before);
@@ -1559,33 +1822,182 @@
 
       switch ( ((iCC*)BBCC_ptr)->tag ) {
 
-         case INSTR_CC:
+         case InstrCC:
             ADD_CC_TO(iCC, I, Ir_discards);
             BBCC_ptr += sizeof(iCC);
             break;
 
-         case READ_CC:
-         case  MOD_CC:
+         case ReadCC:
+         case  ModCC:
             ADD_CC_TO(idCC, I, Ir_discards);
             ADD_CC_TO(idCC, D, Dr_discards);
             BBCC_ptr += sizeof(idCC);
             break;
 
-         case WRITE_CC:
+         case WriteCC:
             ADD_CC_TO(idCC, I, Ir_discards);
             ADD_CC_TO(idCC, D, Dw_discards);
             BBCC_ptr += sizeof(idCC);
             break;
 
+         case ReadWriteCC:
+            ADD_CC_TO(iddCC, I, Ir_discards);
+            ADD_CC_TO(iddCC, Da, Dr_discards);
+            ADD_CC_TO(iddCC, Db, Dw_discards);
+            BBCC_ptr += sizeof(iddCC);
+            break;
+
          default:
-            VG_(panic)("Unknown CC type in VG_(cachesim_notify_discard)()\n");
+            VG_(panic)("Unknown CC type in VG_(discard_basic_block_info)()\n");
             break;
       }
    }
-
-   VG_(free)(VG_AR_PRIVATE, BBCC_node);
+   VG_(free)(BBCC_node);
 }
 
 /*--------------------------------------------------------------------*/
+/*--- Command line processing                                      ---*/
+/*--------------------------------------------------------------------*/
+
+static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
+{
+   int   i1, i2, i3;
+   int   i;
+   char *opt = VG_(strdup)(orig_opt);
+
+   i = i1 = opt_len;
+
+   /* Option looks like "--I1=65536,2,64".
+    * Find commas, replace with NULs to make three independent 
+    * strings, then extract numbers.  Yuck. */
+   while (VG_(isdigit)(opt[i])) i++;
+   if (',' == opt[i]) {
+      opt[i++] = '\0';
+      i2 = i;
+   } else goto bad;
+   while (VG_(isdigit)(opt[i])) i++;
+   if (',' == opt[i]) {
+      opt[i++] = '\0';
+      i3 = i;
+   } else goto bad;
+   while (VG_(isdigit)(opt[i])) i++;
+   if ('\0' != opt[i]) goto bad;
+
+   cache->size      = (Int)VG_(atoll)(opt + i1);
+   cache->assoc     = (Int)VG_(atoll)(opt + i2);
+   cache->line_size = (Int)VG_(atoll)(opt + i3);
+
+   VG_(free)(opt);
+
+   return;
+
+  bad:
+   VG_(bad_option)(orig_opt);
+}
+
+Bool SK_(process_cmd_line_option)(Char* arg)
+{
+   /* 5 is length of "--I1=" */
+   if      (0 == VG_(strncmp)(arg, "--I1=", 5))
+      parse_cache_opt(&clo_I1_cache, arg,   5);
+   else if (0 == VG_(strncmp)(arg, "--D1=", 5))
+      parse_cache_opt(&clo_D1_cache, arg,   5);
+   else if (0 == VG_(strncmp)(arg, "--L2=", 5))
+      parse_cache_opt(&clo_L2_cache, arg,   5);
+   else
+      return False;
+
+   return True;
+}
+
+Char* SK_(usage)(void)
+{
+   return 
+"    --I1=<size>,<assoc>,<line_size>  set I1 cache manually\n"
+"    --D1=<size>,<assoc>,<line_size>  set D1 cache manually\n"
+"    --L2=<size>,<assoc>,<line_size>  set L2 cache manually\n";
+}
+
+/*--------------------------------------------------------------------*/
+/*--- Setup                                                        ---*/
+/*--------------------------------------------------------------------*/
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* not_used) 
+{
+   needs->name                    = "cachegrind";
+   needs->description             = "an I1/D1/L2 cache profiler";
+
+   needs->basic_block_discards    = True;
+   needs->command_line_options    = True;
+
+   VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access);
+   VG_(register_compact_helper)((Addr) & log_1I_0D_cache_access_JIFZ);
+   VG_(register_compact_helper)((Addr) & log_0I_1D_cache_access);
+   VG_(register_compact_helper)((Addr) & log_1I_1D_cache_access);
+   VG_(register_compact_helper)((Addr) & log_0I_2D_cache_access);
+   VG_(register_compact_helper)((Addr) & log_1I_2D_cache_access);
+}
+
+void SK_(post_clo_init)(void)
+{
+   cache_t I1c, D1c, L2c; 
+   Int fd;
+
+   /* Set output file name: cachegrind.<pid>.out */
+   VG_(sprintf)(cachegrind_out_file, "cachegrind.out.%d", VG_(getpid)());
+
+   /* Make sure the output file can be written. */
+   fd = VG_(open)(cachegrind_out_file, VKI_O_WRONLY|VKI_O_TRUNC, 0);
+   if (-1 == fd) { 
+      fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_WRONLY,
+                                          VKI_S_IRUSR|VKI_S_IWUSR);
+      if (-1 == fd) {
+         file_err(); 
+      }
+   }
+   VG_(close)(fd);
+
+   initCC(&Ir_total);
+   initCC(&Dr_total);
+   initCC(&Dw_total);
+   
+   initCC(&Ir_discards);
+   initCC(&Dr_discards);
+   initCC(&Dw_discards);
+
+   get_caches(&I1c, &D1c, &L2c);
+
+   cachesim_I1_initcache(I1c);
+   cachesim_D1_initcache(D1c);
+   cachesim_L2_initcache(L2c);
+
+   VGP_(register_profile_event)(VgpGetBBCC,       "get-BBCC");
+   VGP_(register_profile_event)(VgpCacheSimulate, "cache-simulate");
+   VGP_(register_profile_event)(VgpCacheResults,  "cache-results");
+   
+   init_BBCC_table();
+}
+
+#if 0
+Bool SK_(cheap_sanity_check)(void) { return True; }
+
+extern TTEntry* vg_tt;
+
+Bool SK_(expensive_sanity_check)(void)
+{ 
+   Int i;
+   Bool dummy;
+   for (i = 0; i < 200191; i++) {
+      if (vg_tt[i].orig_addr != (Addr)1 &&
+          vg_tt[i].orig_addr != (Addr)3) {
+         VG_(printf)(".");
+         get_BBCC(vg_tt[i].orig_addr, NULL, /*remove=*/True, &dummy);
+      }
+   }
+   return True;
+}
+#endif
+
+/*--------------------------------------------------------------------*/
 /*--- end                                            vg_cachesim.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/vg_cachesim_D1.c b/vg_cachesim_D1.c
index 7b8a8da..19d11ba 100644
--- a/vg_cachesim_D1.c
+++ b/vg_cachesim_D1.c
@@ -25,7 +25,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_cachesim_gen.c"
diff --git a/vg_cachesim_I1.c b/vg_cachesim_I1.c
index 26db3b3..8993ecb 100644
--- a/vg_cachesim_I1.c
+++ b/vg_cachesim_I1.c
@@ -25,7 +25,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_cachesim_gen.c"
diff --git a/vg_cachesim_L2.c b/vg_cachesim_L2.c
index ec89027..e870db2 100644
--- a/vg_cachesim_L2.c
+++ b/vg_cachesim_L2.c
@@ -25,7 +25,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_cachesim_gen.c"
diff --git a/vg_cachesim_gen.c b/vg_cachesim_gen.c
index 182a031..89d3337 100644
--- a/vg_cachesim_gen.c
+++ b/vg_cachesim_gen.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 /* Notes:
@@ -78,8 +78,7 @@
                                  c->size, c->line_size, c->assoc);
    }
 
-   c->tags = VG_(malloc)(VG_AR_PRIVATE, 
-                         sizeof(UInt) * c->sets * c->assoc);
+   c->tags = VG_(malloc)(sizeof(UInt) * c->sets * c->assoc);
 
    for (i = 0; i < c->sets * c->assoc; i++)
       c->tags[i] = 0;
@@ -100,9 +99,9 @@
 }
 #endif 
 
-/* XXX: This is done as a macro rather than by passing in the cache_t2 as
- * an arg because it slows things down by a small amount (3-5%) due to all that
- * extra indirection. */
+/* This is done as a macro rather than by passing in the cache_t2 as an 
+ * arg because it slows things down by a small amount (3-5%) due to all 
+ * that extra indirection. */
 
 #define CACHESIM(L, MISS_TREATMENT)                                         \
 /* The cache and associated bits and pieces. */                             \
diff --git a/vg_clientfuncs.c b/vg_clientfuncs.c
index c71b6db..b37059b 100644
--- a/vg_clientfuncs.c
+++ b/vg_clientfuncs.c
@@ -26,11 +26,10 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
 
 #include "valgrind.h"   /* for VALGRIND_MAGIC_SEQUENCE */
 
@@ -72,7 +71,7 @@
    the real one, this is because the dynamic linker is running the
    static initialisers for C++, before starting up Valgrind itself.
    In this case it is safe to route calls through to
-   VG_(malloc)/vg_free, since that is self-initialising.
+   VG_(arena_malloc)/VG_(arena_free), since they are self-initialising.
 
    Once Valgrind is initialised, vg_running_on_simd_CPU becomes True.
    The call needs to be transferred from the simulated CPU back to the
@@ -91,15 +90,16 @@
                   (UInt)VG_(running_on_simd_CPU), n );
    if (n < 0) {
       v = NULL;
-      VG_(message)(Vg_UserMsg, 
-         "Warning: silly arg (%d) to malloc()", n );
+      if (VG_(needs).core_errors)
+         VG_(message)(Vg_UserMsg, 
+                      "Warning: silly arg (%d) to malloc()", n );
    } else {
       if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
 
       if (VG_(running_on_simd_CPU)) {
          v = (void*)SIMPLE_REQUEST1(VG_USERREQ__MALLOC, n);
       } else {
-         v = VG_(malloc)(VG_AR_CLIENT, n);
+         v = VG_(arena_malloc)(VG_AR_CLIENT, n);
       }
    }
    if (VG_(clo_trace_malloc)) 
@@ -116,15 +116,16 @@
                   (UInt)VG_(running_on_simd_CPU), n );
    if (n < 0) {
       v = NULL;
-      VG_(message)(Vg_UserMsg, 
-         "Warning: silly arg (%d) to __builtin_new()", n );
+      if (VG_(needs).core_errors)
+         VG_(message)(Vg_UserMsg, 
+                      "Warning: silly arg (%d) to __builtin_new()", n );
    } else {
       if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
 
       if (VG_(running_on_simd_CPU)) {
          v = (void*)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_NEW, n);
       } else {
-         v = VG_(malloc)(VG_AR_CLIENT, n);
+         v = VG_(arena_malloc)(VG_AR_CLIENT, n);
       }
    }
    if (VG_(clo_trace_malloc)) 
@@ -147,15 +148,16 @@
                   (UInt)VG_(running_on_simd_CPU), n );
    if (n < 0) {
       v = NULL;
-      VG_(message)(Vg_UserMsg, 
-         "Warning: silly arg (%d) to __builtin_vec_new()", n );
+      if (VG_(needs).core_errors)
+         VG_(message)(Vg_UserMsg, 
+                      "Warning: silly arg (%d) to __builtin_vec_new()", n );
    } else {
       if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; }
 
       if (VG_(running_on_simd_CPU)) {
          v = (void*)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_VEC_NEW, n);
       } else {
-         v = VG_(malloc)(VG_AR_CLIENT, n);
+         v = VG_(arena_malloc)(VG_AR_CLIENT, n);
       }
    }
    if (VG_(clo_trace_malloc)) 
@@ -179,7 +181,7 @@
    if (VG_(running_on_simd_CPU)) {
       (void)SIMPLE_REQUEST1(VG_USERREQ__FREE, p);
    } else {
-      VG_(free)(VG_AR_CLIENT, p);      
+      VG_(arena_free)(VG_AR_CLIENT, p);      
    }
 }
 
@@ -193,7 +195,7 @@
    if (VG_(running_on_simd_CPU)) {
       (void)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_DELETE, p);
    } else {
-      VG_(free)(VG_AR_CLIENT, p);
+      VG_(arena_free)(VG_AR_CLIENT, p);
    }
 }
 
@@ -213,7 +215,7 @@
    if (VG_(running_on_simd_CPU)) {
       (void)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_VEC_DELETE, p);
    } else {
-      VG_(free)(VG_AR_CLIENT, p);
+      VG_(arena_free)(VG_AR_CLIENT, p);
    }
 }
 
@@ -232,13 +234,14 @@
                   (UInt)VG_(running_on_simd_CPU), nmemb, size );
    if (nmemb < 0 || size < 0) {
       v = NULL;
-      VG_(message)(Vg_UserMsg, "Warning: silly args (%d,%d) to calloc()", 
-                               nmemb, size );
+      if (VG_(needs).core_errors)
+         VG_(message)(Vg_UserMsg, "Warning: silly args (%d,%d) to calloc()", 
+                                  nmemb, size );
    } else {
       if (VG_(running_on_simd_CPU)) {
          v = (void*)SIMPLE_REQUEST2(VG_USERREQ__CALLOC, nmemb, size);
       } else {
-         v = VG_(calloc)(VG_AR_CLIENT, nmemb, size);
+         v = VG_(arena_calloc)(VG_AR_CLIENT, nmemb, size);
       }
    }
    if (VG_(clo_trace_malloc)) 
@@ -269,7 +272,7 @@
    if (VG_(running_on_simd_CPU)) {
       v = (void*)SIMPLE_REQUEST2(VG_USERREQ__REALLOC, ptrV, new_size);
    } else {
-      v = VG_(realloc)(VG_AR_CLIENT, ptrV, new_size);
+      v = VG_(arena_realloc)(VG_AR_CLIENT, ptrV, /*alignment*/4, new_size);
    }
    if (VG_(clo_trace_malloc)) 
       VG_(printf)(" = %p\n", v );
@@ -292,7 +295,7 @@
       if (VG_(running_on_simd_CPU)) {
          v = (void*)SIMPLE_REQUEST2(VG_USERREQ__MEMALIGN, alignment, n);
       } else {
-         v = VG_(malloc_aligned)(VG_AR_CLIENT, alignment, n);
+         v = VG_(arena_malloc_aligned)(VG_AR_CLIENT, alignment, n);
       }
    }
    if (VG_(clo_trace_malloc)) 
@@ -579,7 +582,7 @@
 {
    int res;
    extern void __libc_freeres(void);
-   __libc_freeres();
+   //__libc_freeres();
    VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
                            VG_USERREQ__LIBC_FREERES_DONE, 0, 0, 0, 0);
    /*NOTREACHED*/
diff --git a/vg_clientmalloc.c b/vg_clientmalloc.c
index 0292aa4..0959843 100644
--- a/vg_clientmalloc.c
+++ b/vg_clientmalloc.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -38,15 +38,9 @@
 
 /* #define DEBUG_CLIENTMALLOC */
 
-/* Holds malloc'd but not freed blocks. */
+/* Holds malloc'd but not freed blocks.  Static, so zero-inited by default. */
 #define VG_MALLOCLIST_NO(aa) (((UInt)(aa)) % VG_N_MALLOCLISTS)
 static ShadowChunk* vg_malloclist[VG_N_MALLOCLISTS];
-static Bool         vg_client_malloc_init_done = False;
-
-/* Holds blocks after freeing. */
-static ShadowChunk* vg_freed_list_start   = NULL;
-static ShadowChunk* vg_freed_list_end     = NULL;
-static Int          vg_freed_list_volume  = 0;
 
 /* Stats ... */
 static UInt         vg_cmalloc_n_mallocs  = 0;
@@ -61,6 +55,105 @@
 /*--- Fns                                                  ---*/
 /*------------------------------------------------------------*/
 
+static __inline__
+Bool needs_shadow_chunks ( void )
+{
+   return VG_(needs).core_errors             ||
+          VG_(needs).alternative_free        ||
+          VG_(needs).sizeof_shadow_block > 0 ||
+          VG_(track_events).bad_free         ||
+          VG_(track_events).mismatched_free  ||
+          VG_(track_events).copy_mem_heap    ||
+          VG_(track_events).die_mem_heap;
+}
+
+#ifdef DEBUG_CLIENTMALLOC
+static 
+Int count_malloclists ( void )
+{
+   ShadowChunk* sc;
+   UInt ml_no;
+   Int  n = 0;
+
+   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) 
+      for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next)
+         n++;
+   return n;
+}
+#endif
+
+/*------------------------------------------------------------*/
+/*--- Shadow chunks, etc                                   ---*/
+/*------------------------------------------------------------*/
+
+/* Allocate a user-chunk of size bytes.  Also allocate its shadow
+   block, make the shadow block point at the user block.  Put the
+   shadow chunk on the appropriate list, and set all memory
+   protections correctly. */
+static void addShadowChunk ( ThreadState* tst,
+                             Addr p, UInt size, VgAllocKind kind )
+{
+   ShadowChunk* sc;
+   UInt         ml_no = VG_MALLOCLIST_NO(p);
+
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] addShadowChunk "
+               "( sz %d, addr %p, list %d )\n", 
+               count_malloclists(), 
+               0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+               size, p, ml_no );
+#  endif
+
+   sc = VG_(arena_malloc)(VG_AR_CORE, 
+                          sizeof(ShadowChunk)
+                           + VG_(needs).sizeof_shadow_block);
+   sc->size      = size;
+   sc->allockind = kind;
+   sc->data      = p;
+   /* Fill in any skin-specific shadow chunk stuff */
+   if (VG_(needs).sizeof_shadow_block > 0)
+      SK_(complete_shadow_chunk) ( sc, tst );
+
+   sc->next  = vg_malloclist[ml_no];
+   vg_malloclist[ml_no] = sc;
+}
+
+/* Get the sc, and return the address of the previous node's next pointer
+   which allows sc to be removed from the list later without having to look
+   it up again.  */
+static ShadowChunk* getShadowChunk ( Addr a, /*OUT*/ShadowChunk*** next_ptr )
+{
+   ShadowChunk *prev, *curr;
+   Int ml_no;
+   
+   ml_no = VG_MALLOCLIST_NO(a);
+
+   prev = NULL;
+   curr = vg_malloclist[ml_no];
+   while (True) {
+      if (curr == NULL) 
+         break;
+      if (a == curr->data)
+         break;
+      prev = curr;
+      curr = curr->next;
+   }
+
+   if (NULL == prev)
+      *next_ptr = &vg_malloclist[ml_no];
+   else
+      *next_ptr = &prev->next;
+
+   return curr;
+}
+
+void VG_(freeShadowChunk) ( ShadowChunk* sc )
+{
+   VG_(arena_free) ( VG_AR_CLIENT, (void*)sc->data );
+   VG_(arena_free) ( VG_AR_CORE,   sc );
+}
+
+
 /* Allocate a suitably-sized array, copy all the malloc-d block
    shadows into it, and return both the array and the size of it.
    This is used by the memory-leak detector.
@@ -78,8 +171,7 @@
    }
    if (*n_shadows == 0) return NULL;
 
-   arr = VG_(malloc)( VG_AR_PRIVATE, 
-                      *n_shadows * sizeof(ShadowChunk*) );
+   arr = VG_(malloc)( *n_shadows * sizeof(ShadowChunk*) );
 
    i = 0;
    for (scn = 0; scn < VG_N_MALLOCLISTS; scn++) {
@@ -91,405 +183,284 @@
    return arr;
 }
 
-static void client_malloc_init ( void )
+Bool VG_(addr_is_in_block)( Addr a, Addr start, UInt size )
+{
+   return (start - VG_AR_CLIENT_REDZONE_SZB <= a
+           && a < start + size + VG_AR_CLIENT_REDZONE_SZB);
+}
+
+/* Return the first shadow chunk satisfying the predicate p. */
+ShadowChunk* VG_(any_matching_mallocd_ShadowChunks)
+                        ( Bool (*p) ( ShadowChunk* ))
 {
    UInt ml_no;
-   if (vg_client_malloc_init_done) return;
+   ShadowChunk* sc;
+
    for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++)
-      vg_malloclist[ml_no] = NULL;
-   vg_client_malloc_init_done = True;
-}
-
-
-static __attribute__ ((unused))
-       Int count_freelist ( void )
-{
-   ShadowChunk* sc;
-   Int n = 0;
-   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
-      n++;
-   return n;
-}
-
-static __attribute__ ((unused))
-       Int count_malloclists ( void )
-{
-   ShadowChunk* sc;
-   UInt ml_no;
-   Int  n = 0;
-   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) 
       for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next)
-         n++;
-   return n;
-}
+         if (p(sc))
+            return sc;
 
-static __attribute__ ((unused))
-       void freelist_sanity ( void )
-{
-   ShadowChunk* sc;
-   Int n = 0;
-   /* VG_(printf)("freelist sanity\n"); */
-   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
-      n += sc->size;
-   vg_assert(n == vg_freed_list_volume);
-}
-
-/* Remove sc from malloc list # sc.  It is an unchecked error for
-   sc not to be present in the list. 
-*/
-static void remove_from_malloclist ( UInt ml_no, ShadowChunk* sc )
-{
-   ShadowChunk *sc1, *sc2;
-   if (sc == vg_malloclist[ml_no]) {
-      vg_malloclist[ml_no] = vg_malloclist[ml_no]->next;
-   } else {
-      sc1 = vg_malloclist[ml_no];
-      vg_assert(sc1 != NULL);
-      sc2 = sc1->next;
-      while (sc2 != sc) {
-         vg_assert(sc2 != NULL);
-         sc1 = sc2;
-         sc2 = sc2->next;
-      }
-      vg_assert(sc1->next == sc);
-      vg_assert(sc2 == sc);
-      sc1->next = sc2->next;
-   }
+   return NULL;
 }
 
 
-/* Put a shadow chunk on the freed blocks queue, possibly freeing up
-   some of the oldest blocks in the queue at the same time. */
-
-static void add_to_freed_queue ( ShadowChunk* sc )
-{
-   ShadowChunk* sc1;
-
-   /* Put it at the end of the freed list */
-   if (vg_freed_list_end == NULL) {
-      vg_assert(vg_freed_list_start == NULL);
-      vg_freed_list_end = vg_freed_list_start = sc;
-      vg_freed_list_volume = sc->size;
-   } else {
-      vg_assert(vg_freed_list_end->next == NULL);
-      vg_freed_list_end->next = sc;
-      vg_freed_list_end = sc;
-      vg_freed_list_volume += sc->size;
-   }
-   sc->next = NULL;
-
-   /* Release enough of the oldest blocks to bring the free queue
-      volume below vg_clo_freelist_vol. */
-
-   while (vg_freed_list_volume > VG_(clo_freelist_vol)) {
-      /* freelist_sanity(); */
-      vg_assert(vg_freed_list_start != NULL);
-      vg_assert(vg_freed_list_end != NULL);
-
-      sc1 = vg_freed_list_start;
-      vg_freed_list_volume -= sc1->size;
-      /* VG_(printf)("volume now %d\n", vg_freed_list_volume); */
-      vg_assert(vg_freed_list_volume >= 0);
-
-      if (vg_freed_list_start == vg_freed_list_end) {
-         vg_freed_list_start = vg_freed_list_end = NULL;
-      } else {
-         vg_freed_list_start = sc1->next;
-      }
-      sc1->next = NULL; /* just paranoia */
-      VG_(free)(VG_AR_CLIENT,  (void*)(sc1->data));
-      VG_(free)(VG_AR_PRIVATE, sc1);
-   }
-}
-
-
-/* Allocate a user-chunk of size bytes.  Also allocate its shadow
-   block, make the shadow block point at the user block.  Put the
-   shadow chunk on the appropriate list, and set all memory
-   protections correctly. */
-
-static ShadowChunk* client_malloc_shadow ( ThreadState* tst,
-                                           UInt align, UInt size, 
-                                           VgAllocKind kind )
-{
-   ShadowChunk* sc;
-   Addr         p;
-   UInt         ml_no;
-
-#  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_malloc_shadow ( al %d, sz %d )\n", 
-               count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               align, size );
-#  endif
-
-   vg_assert(align >= 4);
-   if (align == 4)
-      p = (Addr)VG_(malloc)(VG_AR_CLIENT, size);
-   else
-      p = (Addr)VG_(malloc_aligned)(VG_AR_CLIENT, align, size);
-
-   sc        = VG_(malloc)(VG_AR_PRIVATE, sizeof(ShadowChunk));
-   sc->where = VG_(get_ExeContext)(False, tst->m_eip, tst->m_ebp);
-   sc->size  = size;
-   sc->allockind = kind;
-   sc->data  = p;
-   ml_no     = VG_MALLOCLIST_NO(p);
-   sc->next  = vg_malloclist[ml_no];
-   vg_malloclist[ml_no] = sc;
-
-   VGM_(make_writable)(p, size);
-   VGM_(make_noaccess)(p + size, 
-                       VG_AR_CLIENT_REDZONE_SZB);
-   VGM_(make_noaccess)(p - VG_AR_CLIENT_REDZONE_SZB, 
-                       VG_AR_CLIENT_REDZONE_SZB);
-
-   return sc;
-}
-
+/*------------------------------------------------------------*/
+/*--- client_malloc(), etc                                 ---*/
+/*------------------------------------------------------------*/
 
 /* Allocate memory, noticing whether or not we are doing the full
    instrumentation thing. */
-
-void* VG_(client_malloc) ( ThreadState* tst, UInt size, VgAllocKind kind )
+static __inline__
+void* alloc_and_new_mem ( ThreadState* tst, UInt size, UInt alignment,
+                          Bool is_zeroed, VgAllocKind kind )
 {
-   ShadowChunk* sc;
+   Addr p;
 
    VGP_PUSHCC(VgpCliMalloc);
-   client_malloc_init();
-#  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_malloc ( %d, %x )\n", 
-               count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               size, raw_alloc_kind );
-#  endif
 
    vg_cmalloc_n_mallocs ++;
    vg_cmalloc_bs_mallocd += size;
 
-   if (!VG_(clo_instrument)) {
-      VGP_POPCC;
-      return VG_(malloc) ( VG_AR_CLIENT, size );
-   }
+   vg_assert(alignment >= 4);
+   if (alignment == 4)
+      p = (Addr)VG_(arena_malloc)(VG_AR_CLIENT, size);
+   else
+      p = (Addr)VG_(arena_malloc_aligned)(VG_AR_CLIENT, alignment, size);
 
-   sc = client_malloc_shadow ( tst, VG_(clo_alignment), size, kind );
-   VGP_POPCC;
-   return (void*)(sc->data);
+   if (needs_shadow_chunks())
+      addShadowChunk ( tst, p, size, kind );
+
+   VG_TRACK( ban_mem_heap, p-VG_AR_CLIENT_REDZONE_SZB, 
+                           VG_AR_CLIENT_REDZONE_SZB );
+   VG_TRACK( new_mem_heap, p, size, is_zeroed );
+   VG_TRACK( ban_mem_heap, p+size, VG_AR_CLIENT_REDZONE_SZB );
+
+   VGP_POPCC(VgpCliMalloc);
+   return (void*)p;
+}
+
+void* VG_(client_malloc) ( ThreadState* tst, UInt size, VgAllocKind kind )
+{
+   void* p = alloc_and_new_mem ( tst, size, VG_(clo_alignment), 
+                                 /*is_zeroed*/False, kind );
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_malloc ( %d, %x ) = %p\n", 
+               count_malloclists(), 
+               0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+               size, kind, p );
+#  endif
+   return p;
 }
 
 
 void* VG_(client_memalign) ( ThreadState* tst, UInt align, UInt size )
 {
-   ShadowChunk* sc;
-   VGP_PUSHCC(VgpCliMalloc);
-   client_malloc_init();
+   void* p = alloc_and_new_mem ( tst, size, align, 
+                                 /*is_zeroed*/False, Vg_AllocMalloc );
 #  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_memalign ( al %d, sz %d )\n", 
+   VG_(printf)("[m %d, f %d (%d)] client_memalign ( al %d, sz %d ) = %p\n", 
                count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               align, size );
+               0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+               align, size, p );
 #  endif
-
-   vg_cmalloc_n_mallocs ++;
-   vg_cmalloc_bs_mallocd += size;
-
-   if (!VG_(clo_instrument)) {
-      VGP_POPCC;
-      return VG_(malloc_aligned) ( VG_AR_CLIENT, align, size );
-   }
-   sc = client_malloc_shadow ( tst, align, size, Vg_AllocMalloc );
-   VGP_POPCC;
-   return (void*)(sc->data);
+   return p;
 }
 
 
-void VG_(client_free) ( ThreadState* tst, void* ptrV, VgAllocKind kind )
-{
-   ShadowChunk* sc;
-   UInt         ml_no;
-
-   VGP_PUSHCC(VgpCliMalloc);
-   client_malloc_init();
-#  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_free ( %p, %x )\n", 
-               count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               ptrV, raw_alloc_kind );
-#  endif
-
-   vg_cmalloc_n_frees ++;
-
-   if (!VG_(clo_instrument)) {
-      VGP_POPCC;
-      VG_(free) ( VG_AR_CLIENT, ptrV );
-      return;
-   }
-
-   /* first, see if ptrV is one vg_client_malloc gave out. */
-   ml_no = VG_MALLOCLIST_NO(ptrV);
-   vg_mlist_frees++;
-   for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
-      vg_mlist_tries++;
-      if ((Addr)ptrV == sc->data)
-         break;
-   }
-
-   if (sc == NULL) {
-      VG_(record_free_error) ( tst, (Addr)ptrV );
-      VGP_POPCC;
-      return;
-   }
-
-   /* check if its a matching free() / delete / delete [] */
-   if (kind != sc->allockind)
-      VG_(record_freemismatch_error) ( tst, (Addr) ptrV );
-
-   /* Remove the shadow chunk from the mallocd list. */
-   remove_from_malloclist ( ml_no, sc );
-
-   /* Declare it inaccessible. */
-   VGM_(make_noaccess) ( sc->data - VG_AR_CLIENT_REDZONE_SZB, 
-                         sc->size + 2*VG_AR_CLIENT_REDZONE_SZB );
-   VGM_(make_noaccess) ( (Addr)sc, sizeof(ShadowChunk) );
-   sc->where = VG_(get_ExeContext)(False, tst->m_eip, tst->m_ebp);
-
-   /* Put it out of harm's way for a while. */
-   add_to_freed_queue ( sc );
-   VGP_POPCC;
-}
-
-
-
 void* VG_(client_calloc) ( ThreadState* tst, UInt nmemb, UInt size1 )
 {
-   ShadowChunk* sc;
-   Addr         p;
-   UInt         size, i, ml_no;
+   void*        p;
+   UInt         size, i;
 
-   VGP_PUSHCC(VgpCliMalloc);
-   client_malloc_init();
+   size = nmemb * size1;
 
-#  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_calloc ( %d, %d )\n", 
-               count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               nmemb, size1 );
-#  endif
-
-   vg_cmalloc_n_mallocs ++;
-   vg_cmalloc_bs_mallocd += nmemb * size1;
-
-   if (!VG_(clo_instrument)) {
-      VGP_POPCC;
-      return VG_(calloc) ( VG_AR_CLIENT, nmemb, size1 );
-   }
-
-   size      = nmemb * size1;
-   p         = (Addr)VG_(malloc)(VG_AR_CLIENT, size);
-   sc        = VG_(malloc)(VG_AR_PRIVATE, sizeof(ShadowChunk));
-   sc->where = VG_(get_ExeContext)(False, tst->m_eip, tst->m_ebp);
-   sc->size  = size;
-   sc->allockind = Vg_AllocMalloc; /* its a lie - but true. eat this :) */
-   sc->data  = p;
-   ml_no     = VG_MALLOCLIST_NO(p);
-   sc->next  = vg_malloclist[ml_no];
-   vg_malloclist[ml_no] = sc;
-
-   VGM_(make_readable)(p, size);
-   VGM_(make_noaccess)(p + size, 
-                       VG_AR_CLIENT_REDZONE_SZB);
-   VGM_(make_noaccess)(p - VG_AR_CLIENT_REDZONE_SZB, 
-                       VG_AR_CLIENT_REDZONE_SZB);
-
+   p = alloc_and_new_mem ( tst, size, VG_(clo_alignment), 
+                              /*is_zeroed*/True, Vg_AllocMalloc );
+   /* Must zero block for calloc! */
    for (i = 0; i < size; i++) ((UChar*)p)[i] = 0;
 
-   VGP_POPCC;
-   return (void*)p;
+#  ifdef DEBUG_CLIENTMALLOC
+   VG_(printf)("[m %d, f %d (%d)] client_calloc ( %d, %d ) = %p\n", 
+               count_malloclists(), 
+               0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+               nmemb, size1, p );
+#  endif
+
+   return p;
+}
+
+static
+void die_and_free_mem ( ThreadState* tst, ShadowChunk* sc,
+                        ShadowChunk** prev_chunks_next_ptr )
+{
+   /* Note: ban redzones again -- just in case user de-banned them
+      with a client request... */
+   VG_TRACK( ban_mem_heap, sc->data-VG_AR_CLIENT_REDZONE_SZB, 
+                           VG_AR_CLIENT_REDZONE_SZB );
+   VG_TRACK( die_mem_heap, sc->data, sc->size );
+   VG_TRACK( ban_mem_heap, sc->data+sc->size, VG_AR_CLIENT_REDZONE_SZB );
+
+   /* Remove sc from the malloclist using prev_chunks_next_ptr to
+      avoid repeating the hash table lookup.  Can't remove until at least
+      after free and free_mismatch errors are done because they use
+      describe_addr() which looks for it in malloclist. */
+   *prev_chunks_next_ptr = sc->next;
+
+   if (VG_(needs).alternative_free)
+      SK_(alt_free) ( sc, tst );
+   else
+      VG_(freeShadowChunk) ( sc );
 }
 
 
-void* VG_(client_realloc) ( ThreadState* tst, void* ptrV, UInt size_new )
+void VG_(client_free) ( ThreadState* tst, void* p, VgAllocKind kind )
 {
-   ShadowChunk *sc, *sc_new;
-   UInt         i, ml_no;
+   ShadowChunk*  sc;
+   ShadowChunk** prev_chunks_next_ptr;
 
    VGP_PUSHCC(VgpCliMalloc);
-   client_malloc_init();
 
 #  ifdef DEBUG_CLIENTMALLOC
-   VG_(printf)("[m %d, f %d (%d)] client_realloc ( %p, %d )\n", 
+   VG_(printf)("[m %d, f %d (%d)] client_free ( %p, %x )\n", 
                count_malloclists(), 
-               count_freelist(), vg_freed_list_volume,
-               ptrV, size_new );
+               0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+               p, kind );
 #  endif
 
    vg_cmalloc_n_frees ++;
-   vg_cmalloc_n_mallocs ++;
-   vg_cmalloc_bs_mallocd += size_new;
 
-   if (!VG_(clo_instrument)) {
-      vg_assert(ptrV != NULL && size_new != 0);
-      VGP_POPCC;
-      return VG_(realloc) ( VG_AR_CLIENT, ptrV, size_new );
-   }
+   if (! needs_shadow_chunks()) {
+      VG_(arena_free) ( VG_AR_CLIENT, p );
 
-   /* First try and find the block. */
-   ml_no = VG_MALLOCLIST_NO(ptrV);
-   for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
-      if ((Addr)ptrV == sc->data)
-         break;
-   }
-  
-   if (sc == NULL) {
-      VG_(record_free_error) ( tst, (Addr)ptrV );
-      /* Perhaps we should keep going regardless. */
-      VGP_POPCC;
-      return NULL;
-   }
-
-   if (sc->allockind != Vg_AllocMalloc) {
-      /* can not realloc a range that was allocated with new or new [] */
-      VG_(record_freemismatch_error) ( tst, (Addr)ptrV );
-      /* but keep going anyway */
-   }
-
-   if (sc->size == size_new) {
-      /* size unchanged */
-      VGP_POPCC;
-      return ptrV;
-   }
-   if (sc->size > size_new) {
-      /* new size is smaller */
-      VGM_(make_noaccess)( sc->data + size_new, 
-                           sc->size - size_new );
-      sc->size = size_new;
-      VGP_POPCC;
-      return ptrV;
    } else {
-      /* new size is bigger */
-      sc_new = client_malloc_shadow ( tst, VG_(clo_alignment), 
-                                      size_new, Vg_AllocMalloc );
-      for (i = 0; i < sc->size; i++)
-         ((UChar*)(sc_new->data))[i] = ((UChar*)(sc->data))[i];
-      VGM_(copy_address_range_perms) ( 
-         sc->data, sc_new->data, sc->size );
-      remove_from_malloclist ( VG_MALLOCLIST_NO(sc->data), sc );
-      VGM_(make_noaccess) ( sc->data - VG_AR_CLIENT_REDZONE_SZB, 
-                            sc->size + 2*VG_AR_CLIENT_REDZONE_SZB );
-      VGM_(make_noaccess) ( (Addr)sc, sizeof(ShadowChunk) );
-      add_to_freed_queue ( sc );
-      VGP_POPCC;
-      return (void*)sc_new->data;
-   }  
+      sc = getShadowChunk ( (Addr)p, &prev_chunks_next_ptr );
+
+      if (sc == NULL) {
+         VG_TRACK( bad_free, tst, (Addr)p );
+         VGP_POPCC(VgpCliMalloc);
+         return;
+      }
+
+      /* check if its a matching free() / delete / delete [] */
+      if (kind != sc->allockind)
+         VG_TRACK( mismatched_free, tst, (Addr)p );
+
+      die_and_free_mem ( tst, sc, prev_chunks_next_ptr );
+   } 
+   VGP_POPCC(VgpCliMalloc);
 }
 
 
-void VG_(clientmalloc_done) ( void )
+void* VG_(client_realloc) ( ThreadState* tst, void* p, UInt new_size )
+{
+   ShadowChunk  *sc;
+   ShadowChunk **prev_chunks_next_ptr;
+   UInt          i;
+
+   VGP_PUSHCC(VgpCliMalloc);
+
+   vg_cmalloc_n_frees ++;
+   vg_cmalloc_n_mallocs ++;
+   vg_cmalloc_bs_mallocd += new_size;
+
+   if (! needs_shadow_chunks()) {
+      vg_assert(p != NULL && new_size != 0);
+      p = VG_(arena_realloc) ( VG_AR_CLIENT, p, VG_(clo_alignment), 
+                               new_size );
+      VGP_POPCC(VgpCliMalloc);
+      return p;
+
+   } else {
+      /* First try and find the block. */
+      sc = getShadowChunk ( (Addr)p, &prev_chunks_next_ptr );
+
+      if (sc == NULL) {
+         VG_TRACK( bad_free, tst, (Addr)p );
+         /* Perhaps we should return to the program regardless. */
+         VGP_POPCC(VgpCliMalloc);
+         return NULL;
+      }
+     
+      /* check if its a matching free() / delete / delete [] */
+      if (Vg_AllocMalloc != sc->allockind) {
+         /* can not realloc a range that was allocated with new or new [] */
+         VG_TRACK( mismatched_free, tst, (Addr)p );
+         /* but keep going anyway */
+      }
+
+      if (sc->size == new_size) {
+         /* size unchanged */
+         VGP_POPCC(VgpCliMalloc);
+         return p;
+         
+      } else if (sc->size > new_size) {
+         /* new size is smaller */
+         VG_TRACK( die_mem_heap, sc->data+new_size, sc->size-new_size );
+         sc->size = new_size;
+         VGP_POPCC(VgpCliMalloc);
+#        ifdef DEBUG_CLIENTMALLOC
+         VG_(printf)("[m %d, f %d (%d)] client_realloc_smaller ( %p, %d ) = %p\n", 
+                     count_malloclists(), 
+                     0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+                     p, new_size, p );
+#        endif
+         return p;
+
+      } else {
+         /* new size is bigger */
+         Addr p_new;
+         
+         /* Get new memory */
+         vg_assert(VG_(clo_alignment) >= 4);
+         if (VG_(clo_alignment) == 4)
+            p_new = (Addr)VG_(arena_malloc)(VG_AR_CLIENT, new_size);
+         else
+            p_new = (Addr)VG_(arena_malloc_aligned)(VG_AR_CLIENT, 
+                                            VG_(clo_alignment), new_size);
+
+         /* First half kept and copied, second half new, 
+            red zones as normal */
+         VG_TRACK( ban_mem_heap, p_new-VG_AR_CLIENT_REDZONE_SZB, 
+                                 VG_AR_CLIENT_REDZONE_SZB );
+         VG_TRACK( copy_mem_heap, (Addr)p, p_new, sc->size );
+         VG_TRACK( new_mem_heap, p_new+sc->size, new_size-sc->size, 
+                   /*inited=*/False );
+         VG_TRACK( ban_mem_heap, p_new+new_size, VG_AR_CLIENT_REDZONE_SZB );
+
+         /* Copy from old to new */
+         for (i = 0; i < sc->size; i++)
+            ((UChar*)p_new)[i] = ((UChar*)p)[i];
+
+         /* Free old memory */
+         die_and_free_mem ( tst, sc, prev_chunks_next_ptr );
+
+         /* this has to be after die_and_free_mem, otherwise the
+            former succeeds in shorting out the new block, not the
+            old, in the case when both are on the same list.  */
+         addShadowChunk ( tst, p_new, new_size, Vg_AllocMalloc );
+
+         VGP_POPCC(VgpCliMalloc);
+#        ifdef DEBUG_CLIENTMALLOC
+         VG_(printf)("[m %d, f %d (%d)] client_realloc_bigger ( %p, %d ) = %p\n", 
+                     count_malloclists(), 
+                     0/*count_freelist()*/, 0/*vg_freed_list_volume*/,
+                     p, new_size, (void*)p_new );
+#        endif
+         return (void*)p_new;
+      }  
+   }
+}
+
+void VG_(print_malloc_stats) ( void )
 {
    UInt         nblocks, nbytes, ml_no;
    ShadowChunk* sc;
 
-   client_malloc_init();
+   if (VG_(clo_verbosity) == 0)
+      return;
+
+   vg_assert(needs_shadow_chunks());
 
    nblocks = nbytes = 0;
 
@@ -500,9 +471,6 @@
       }
    }
 
-   if (VG_(clo_verbosity) == 0)
-     return;
-
    VG_(message)(Vg_UserMsg, 
                 "malloc/free: in use at exit: %d bytes in %d blocks.",
                 nbytes, nblocks);
@@ -510,9 +478,6 @@
                 "malloc/free: %d allocs, %d frees, %d bytes allocated.",
                 vg_cmalloc_n_mallocs,
                 vg_cmalloc_n_frees, vg_cmalloc_bs_mallocd);
-   if (!VG_(clo_leak_check))
-      VG_(message)(Vg_UserMsg, 
-                   "For a detailed leak analysis,  rerun with: --leak-check=yes");
    if (0)
       VG_(message)(Vg_DebugMsg,
                    "free search: %d tries, %d frees", 
@@ -522,58 +487,6 @@
       VG_(message)(Vg_UserMsg, "");
 }
 
-
-/* Describe an address as best you can, for error messages,
-   putting the result in ai. */
-
-void VG_(describe_addr) ( Addr a, AddrInfo* ai )
-{
-   ShadowChunk* sc;
-   UInt         ml_no;
-   Bool         ok;
-   ThreadId     tid;
-
-   /* Perhaps it's a user-def'd block ? */
-   ok = VG_(client_perm_maybe_describe)( a, ai );
-   if (ok)
-      return;
-   /* Perhaps it's on a thread's stack? */
-   tid = VG_(identify_stack_addr)(a);
-   if (tid != VG_INVALID_THREADID) {
-      ai->akind     = Stack;
-      ai->stack_tid = tid;
-      return;
-   }
-   /* Search for a freed block which might bracket it. */
-   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next) {
-      if (sc->data - VG_AR_CLIENT_REDZONE_SZB <= a
-          && a < sc->data + sc->size + VG_AR_CLIENT_REDZONE_SZB) {
-         ai->akind      = Freed;
-         ai->blksize    = sc->size;
-         ai->rwoffset   = (Int)(a) - (Int)(sc->data);
-         ai->lastchange = sc->where;
-         return;
-      }
-   }
-   /* Search for a mallocd block which might bracket it. */
-   for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) {
-      for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) {
-         if (sc->data - VG_AR_CLIENT_REDZONE_SZB <= a
-             && a < sc->data + sc->size + VG_AR_CLIENT_REDZONE_SZB) {
-            ai->akind      = Mallocd;
-            ai->blksize    = sc->size;
-            ai->rwoffset   = (Int)(a) - (Int)(sc->data);
-            ai->lastchange = sc->where;
-            return;
-         }
-      }
-   }
-   /* Clueless ... */
-   ai->akind = Unknown;
-   return;
-}
-
-
 /*--------------------------------------------------------------------*/
 /*--- end                                        vg_clientmalloc.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/vg_clientperms.c b/vg_clientperms.c
deleted file mode 100644
index e9ecbc4..0000000
--- a/vg_clientperms.c
+++ /dev/null
@@ -1,402 +0,0 @@
-
-/*--------------------------------------------------------------------*/
-/*--- For when the client advises Valgrind about permissions.      ---*/
-/*---                                             vg_clientperms.c ---*/
-/*--------------------------------------------------------------------*/
-
-/*
-   This file is part of Valgrind, an x86 protected-mode emulator 
-   designed for debugging and profiling binaries on x86-Unixes.
-
-   Copyright (C) 2000-2002 Julian Seward 
-      jseward@acm.org
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307, USA.
-
-   The GNU General Public License is contained in the file LICENSE.
-*/
-
-#include "vg_include.h"
-#include "vg_constants.h"
-
-#include "valgrind.h"  /* for VG_USERREQ__* */
-
-
-/*------------------------------------------------------------*/
-/*--- General client block management.                     ---*/
-/*------------------------------------------------------------*/
-
-/* This is managed as an expanding array of client block descriptors.
-   Indices of live descriptors are issued to the client, so it can ask
-   to free them later.  Therefore we cannot slide live entries down
-   over dead ones.  Instead we must use free/inuse flags and scan for
-   an empty slot at allocation time.  This in turn means allocation is
-   relatively expensive, so we hope this does not happen too often. 
-*/
-
-typedef
-   enum { CG_NotInUse, CG_NoAccess, CG_Writable, CG_Readable }
-   CGenBlockKind;
-
-typedef
-   struct {
-      Addr          start;
-      UInt          size;
-      ExeContext*   where;
-      CGenBlockKind kind;
-   } 
-   CGenBlock;
-
-/* This subsystem is self-initialising. */
-static UInt       vg_cgb_size = 0;
-static UInt       vg_cgb_used = 0;
-static CGenBlock* vg_cgbs     = NULL;
-
-/* Stats for this subsystem. */
-static UInt vg_cgb_used_MAX = 0;   /* Max in use. */
-static UInt vg_cgb_allocs   = 0;   /* Number of allocs. */
-static UInt vg_cgb_discards = 0;   /* Number of discards. */
-static UInt vg_cgb_search   = 0;   /* Number of searches. */
-
-
-static
-Int vg_alloc_client_block ( void )
-{
-   Int        i, sz_new;
-   CGenBlock* cgbs_new;
-
-   vg_cgb_allocs++;
-
-   for (i = 0; i < vg_cgb_used; i++) {
-      vg_cgb_search++;
-      if (vg_cgbs[i].kind == CG_NotInUse)
-         return i;
-   }
-
-   /* Not found.  Try to allocate one at the end. */
-   if (vg_cgb_used < vg_cgb_size) {
-      vg_cgb_used++;
-      return vg_cgb_used-1;
-   }
-
-   /* Ok, we have to allocate a new one. */
-   vg_assert(vg_cgb_used == vg_cgb_size);
-   sz_new = (vg_cgbs == NULL) ? 10 : (2 * vg_cgb_size);
-
-   cgbs_new = VG_(malloc)( VG_AR_PRIVATE, sz_new * sizeof(CGenBlock) );
-   for (i = 0; i < vg_cgb_used; i++) 
-      cgbs_new[i] = vg_cgbs[i];
-
-   if (vg_cgbs != NULL)
-      VG_(free)( VG_AR_PRIVATE, vg_cgbs );
-   vg_cgbs = cgbs_new;
-
-   vg_cgb_size = sz_new;
-   vg_cgb_used++;
-   if (vg_cgb_used > vg_cgb_used_MAX)
-      vg_cgb_used_MAX = vg_cgb_used;
-   return vg_cgb_used-1;
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Stack block management.                              ---*/
-/*------------------------------------------------------------*/
-
-/* This is managed as an expanding array of CStackBlocks.  They are
-   packed up against the left-hand end of the array, with no holes.
-   They are kept sorted by the start field, with the [0] having the
-   highest value.  This means it's pretty cheap to put new blocks at
-   the end, corresponding to stack pushes, since the additions put
-   blocks on in what is presumably fairly close to strictly descending
-   order.  If this assumption doesn't hold the performance
-   consequences will be horrible.
-
-   When the client's %ESP jumps back upwards as the result of a RET
-   insn, we shrink the array backwards from the end, in a
-   guaranteed-cheap linear scan.  
-*/
-
-typedef
-   struct {
-      Addr        start;
-      UInt        size;
-      ExeContext* where;
-   } 
-   CStackBlock;
-
-/* This subsystem is self-initialising. */
-static UInt         vg_csb_size = 0;
-static UInt         vg_csb_used = 0;
-static CStackBlock* vg_csbs     = NULL;
-
-/* Stats for this subsystem. */
-static UInt vg_csb_used_MAX = 0;   /* Max in use. */
-static UInt vg_csb_allocs   = 0;   /* Number of allocs. */
-static UInt vg_csb_discards = 0;   /* Number of discards. */
-static UInt vg_csb_swaps    = 0;   /* Number of searches. */
-
-static
-void vg_add_client_stack_block ( ThreadState* tst, Addr aa, UInt sz )
-{
-   UInt i, sz_new;
-   CStackBlock* csbs_new;
-   vg_csb_allocs++;
-
-   /* Ensure there is space for a new block. */
-
-   if (vg_csb_used >= vg_csb_size) {
-
-      /* No; we have to expand the array. */
-      vg_assert(vg_csb_used == vg_csb_size);
-
-      sz_new = (vg_csbs == NULL) ? 10 : (2 * vg_csb_size);
-
-      csbs_new = VG_(malloc)( VG_AR_PRIVATE, sz_new * sizeof(CStackBlock) );
-      for (i = 0; i < vg_csb_used; i++) 
-        csbs_new[i] = vg_csbs[i];
-
-      if (vg_csbs != NULL)
-         VG_(free)( VG_AR_PRIVATE, vg_csbs );
-      vg_csbs = csbs_new;
-
-      vg_csb_size = sz_new;
-   }
-
-   /* Ok, we can use [vg_csb_used]. */
-   vg_csbs[vg_csb_used].start = aa;
-   vg_csbs[vg_csb_used].size  = sz;
-   /* Actually running a thread at this point. */
-   vg_csbs[vg_csb_used].where 
-      = VG_(get_ExeContext) ( False, tst->m_eip, tst->m_ebp );
-   vg_csb_used++;
-
-   if (vg_csb_used > vg_csb_used_MAX)
-      vg_csb_used_MAX = vg_csb_used;
-
-   vg_assert(vg_csb_used <= vg_csb_size);
-
-   /* VG_(printf)("acsb  %p %d\n", aa, sz); */
-   VGM_(make_noaccess) ( aa, sz );
-
-   /* And make sure that they are in descending order of address. */
-   i = vg_csb_used;
-   while (i > 0 && vg_csbs[i-1].start < vg_csbs[i].start) {
-      CStackBlock tmp = vg_csbs[i-1];
-      vg_csbs[i-1] = vg_csbs[i];
-      vg_csbs[i] = tmp;
-      vg_csb_swaps++;
-   }
-
-#  if 1
-   for (i = 1; i < vg_csb_used; i++)
-      vg_assert(vg_csbs[i-1].start >= vg_csbs[i].start);
-#  endif
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Externally visible functions.                        ---*/
-/*------------------------------------------------------------*/
-
-void VG_(show_client_block_stats) ( void )
-{
-   VG_(message)(Vg_DebugMsg, 
-      "general CBs: %d allocs, %d discards, %d maxinuse, %d search",
-      vg_cgb_allocs, vg_cgb_discards, vg_cgb_used_MAX, vg_cgb_search 
-   );
-   VG_(message)(Vg_DebugMsg, 
-      "  stack CBs: %d allocs, %d discards, %d maxinuse, %d swap",
-      vg_csb_allocs, vg_csb_discards, vg_csb_used_MAX, vg_csb_swaps
-   );
-}
-
-
-Bool VG_(client_perm_maybe_describe)( Addr a, AddrInfo* ai )
-{
-   Int i;
-   /* VG_(printf)("try to identify %d\n", a); */
-
-   /* First see if it's a stack block.  We do two passes, one exact
-      and one with a bit of slop, so as to try and get the most
-      accurate fix. */
-   for (i = 0; i < vg_csb_used; i++) {
-      if (vg_csbs[i].start <= a
-          && a < vg_csbs[i].start + vg_csbs[i].size) {
-         ai->akind = UserS;
-         ai->blksize = vg_csbs[i].size;
-         ai->rwoffset  = (Int)(a) - (Int)(vg_csbs[i].start);
-         ai->lastchange = vg_csbs[i].where;
-         return True;
-      }
-   }
-
-   /* No exact match on the stack.  Re-do the stack scan with a bit of
-      slop. */
-   for (i = 0; i < vg_csb_used; i++) {
-      if (vg_csbs[i].start - 8 <= a
-          && a < vg_csbs[i].start + vg_csbs[i].size + 8) {
-         ai->akind = UserS;
-         ai->blksize = vg_csbs[i].size;
-         ai->rwoffset  = (Int)(a) - (Int)(vg_csbs[i].start);
-         ai->lastchange = vg_csbs[i].where;
-         return True;
-      }
-   }
-
-   /* No match on the stack.  Perhaps it's a general block ? */
-   for (i = 0; i < vg_cgb_used; i++) {
-      if (vg_cgbs[i].kind == CG_NotInUse) 
-         continue;
-      if (vg_cgbs[i].start - VG_AR_CLIENT_REDZONE_SZB <= a
-          && a < vg_cgbs[i].start 
-                 + vg_cgbs[i].size 
-                 + VG_AR_CLIENT_REDZONE_SZB) {
-         ai->akind = UserG;
-         ai->blksize = vg_cgbs[i].size;
-         ai->rwoffset  = (Int)(a) - (Int)(vg_cgbs[i].start);
-         ai->lastchange = vg_cgbs[i].where;
-         return True;
-      }
-   }
-   return False;
-}
-
-
-void VG_(delete_client_stack_blocks_following_ESP_change) ( void )
-{
-   Addr newESP;
-   newESP = VG_(baseBlock)[VGOFF_(m_esp)];
-   while (vg_csb_used > 0 
-          && vg_csbs[vg_csb_used-1].start + vg_csbs[vg_csb_used-1].size 
-             <= newESP) {
-      vg_csb_used--;
-      vg_csb_discards++;
-      if (VG_(clo_verbosity) > 2)
-         VG_(printf)("discarding stack block %p for %d\n", 
-            (void*)vg_csbs[vg_csb_used].start, 
-            vg_csbs[vg_csb_used].size);
-   }
-}
-
-
-UInt VG_(handle_client_request) ( ThreadState* tst, UInt* arg_block )
-{
-   Int   i;
-   Bool  ok;
-   Addr  bad_addr;
-   UInt* arg = arg_block;
-
-   if (VG_(clo_verbosity) > 2)
-      VG_(printf)("client request: code %d,  addr %p,  len %d\n", 
-                  arg[0], (void*)arg[1], arg[2] );
-
-   switch (arg[0]) {
-      case VG_USERREQ__MAKE_NOACCESS: /* make no access */
-         if (!VG_(clo_instrument))
-            return 0;
-         i = vg_alloc_client_block();
-         /* VG_(printf)("allocated %d %p\n", i, vg_cgbs); */
-         vg_cgbs[i].kind  = CG_NoAccess;
-         vg_cgbs[i].start = arg[1];
-         vg_cgbs[i].size  = arg[2];
-         vg_cgbs[i].where 
-            = VG_(get_ExeContext) ( False, tst->m_eip, tst->m_ebp );
-         VGM_(make_noaccess) ( arg[1], arg[2] );
-         return i;
-      case VG_USERREQ__MAKE_WRITABLE: /* make writable */
-         if (!VG_(clo_instrument))
-            return 0;
-         i = vg_alloc_client_block();
-         vg_cgbs[i].kind  = CG_Writable;
-         vg_cgbs[i].start = arg[1];
-         vg_cgbs[i].size  = arg[2];
-         vg_cgbs[i].where 
-            = VG_(get_ExeContext) ( False, tst->m_eip, tst->m_ebp );
-         VGM_(make_writable) ( arg[1], arg[2] );
-         return i;
-      case VG_USERREQ__MAKE_READABLE: /* make readable */
-         if (!VG_(clo_instrument))
-            return 0;
-         i = vg_alloc_client_block();
-         vg_cgbs[i].kind  = CG_Readable;
-         vg_cgbs[i].start = arg[1];
-         vg_cgbs[i].size  = arg[2];
-         vg_cgbs[i].where 
-            = VG_(get_ExeContext) ( False, tst->m_eip, tst->m_ebp );
-         VGM_(make_readable) ( arg[1], arg[2] );
-         return i;
-
-      case VG_USERREQ__CHECK_WRITABLE: /* check writable */
-         if (!VG_(clo_instrument))
-            return 0;
-         ok = VGM_(check_writable) ( arg[1], arg[2], &bad_addr );
-         if (!ok)
-            VG_(record_user_err) ( tst, bad_addr, True );
-         return ok ? (UInt)NULL : bad_addr;
-      case VG_USERREQ__CHECK_READABLE: /* check readable */
-         if (!VG_(clo_instrument))
-            return 0;
-         ok = VGM_(check_readable) ( arg[1], arg[2], &bad_addr );
-         if (!ok)
-            VG_(record_user_err) ( tst, bad_addr, False );
-         return ok ? (UInt)NULL : bad_addr;
-
-      case VG_USERREQ__DISCARD: /* discard */
-         if (!VG_(clo_instrument))
-            return 0;
-         if (vg_cgbs == NULL 
-             || arg[2] >= vg_cgb_used || vg_cgbs[arg[2]].kind == CG_NotInUse)
-            return 1;
-         vg_assert(arg[2] >= 0 && arg[2] < vg_cgb_used);
-         vg_cgbs[arg[2]].kind = CG_NotInUse;
-         vg_cgb_discards++;
-         return 0;
-
-      case VG_USERREQ__MAKE_NOACCESS_STACK: /* make noaccess stack block */
-         if (!VG_(clo_instrument))
-            return 0;
-         vg_add_client_stack_block ( tst, arg[1], arg[2] );
-         return 0;
-
-      /* Is handled by the scheduler as a trivial request, for
-         performance reasons. */
-      /*
-      case VG_USERREQ__RUNNING_ON_VALGRIND:
-         return 1;
-      */
-
-      case VG_USERREQ__DO_LEAK_CHECK:
-         if (!VG_(clo_instrument))
-            return 0;
-         VG_(detect_memory_leaks)();
-         return 0; /* return value is meaningless */
-
-      case VG_USERREQ__DISCARD_TRANSLATIONS:
-         VG_(invalidate_translations)( arg[1], arg[2] );
-         return 0;  /* return value is meaningless */
-
-      default:
-         VG_(message)(Vg_UserMsg, 
-                      "Warning: unknown client request code %d", arg[0]);
-         return 1;
-   }
-}
-
-
-/*--------------------------------------------------------------------*/
-/*--- end                                         vg_clientperms.c ---*/
-/*--------------------------------------------------------------------*/
diff --git a/vg_constants.h b/vg_constants.h
index d3da14b..abf7240 100644
--- a/vg_constants.h
+++ b/vg_constants.h
@@ -26,30 +26,17 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #ifndef __VG_CONSTANTS_H
 #define __VG_CONSTANTS_H
 
+#include "vg_constants_skin.h"
 
 /* This file is included in all Valgrind source files, including
    assembly ones. */
 
-/* All symbols externally visible from valgrind.so are prefixed
-   as specified here.  The prefix can be changed, so as to avoid
-   namespace conflict problems.
-*/
-#define VGAPPEND(str1,str2) str1##str2
-
-/* These macros should add different prefixes so the same base
-   name can safely be used across different macros. */
-#define VG_(str)    VGAPPEND(vgPlain_,str)
-#define VGM_(str)   VGAPPEND(vgMem_,str)
-#define VGP_(str)   VGAPPEND(vgProf_,str)
-#define VGOFF_(str) VGAPPEND(vgOff_,str)
-
-
 /* Magic values that %ebp might be set to when returning to the
    dispatcher.  The only other legitimate value is to point to the
    start of VG_(baseBlock).  These also are return values from
@@ -59,13 +46,12 @@
    returns to the dispatch loop.  TRC means that this value is a valid
    thread return code, which the dispatch loop may return to the
    scheduler.  */
-#define VG_TRC_EBP_JMP_STKADJ     17 /* EBP only; handled by dispatcher */
 #define VG_TRC_EBP_JMP_SYSCALL    19 /* EBP and TRC */
 #define VG_TRC_EBP_JMP_CLIENTREQ  23 /* EBP and TRC */
 
-#define VG_TRC_INNER_COUNTERZERO  29  /* TRC only; means bb ctr == 0 */
-#define VG_TRC_INNER_FASTMISS     31  /* TRC only; means fast-cache miss. */
-#define VG_TRC_UNRESUMABLE_SIGNAL 37  /* TRC only; got sigsegv/sigbus */
+#define VG_TRC_INNER_FASTMISS     31 /* TRC only; means fast-cache miss. */
+#define VG_TRC_INNER_COUNTERZERO  29 /* TRC only; means bb ctr == 0 */
+#define VG_TRC_UNRESUMABLE_SIGNAL 37 /* TRC only; got sigsegv/sigbus */
 
 
 /* Debugging hack for assembly code ... sigh. */
@@ -93,7 +79,7 @@
 /* Assembly code stubs make this request */
 #define VG_USERREQ__SIGNAL_RETURNS          0x4001
 
-#endif /* ndef __VG_INCLUDE_H */
+#endif /* ndef __VG_CONSTANTS_H */
 
 /*--------------------------------------------------------------------*/
 /*--- end                                           vg_constants.h ---*/
diff --git a/vg_constants_skin.h b/vg_constants_skin.h
new file mode 100644
index 0000000..a151cb0
--- /dev/null
+++ b/vg_constants_skin.h
@@ -0,0 +1,55 @@
+
+/*--------------------------------------------------------------------*/
+/*--- A header file containing constants (for assembly code).      ---*/
+/*---                                               vg_constants.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VG_CONSTANTS_SKIN_H
+#define __VG_CONSTANTS_SKIN_H
+
+
+/* All symbols externally visible from valgrind.so are prefixed
+   as specified here.  The prefix can be changed, so as to avoid
+   namespace conflict problems.
+*/
+#define VGAPPEND(str1,str2) str1##str2
+
+/* These macros should add different prefixes so the same base
+   name can safely be used across different macros. */
+#define VG_(str)    VGAPPEND(vgPlain_,str)
+#define VGP_(str)   VGAPPEND(vgProf_,str)
+#define VGOFF_(str) VGAPPEND(vgOff_,str)
+
+/* Skin specific ones.  Note that final name still starts with "vg". */
+#define SK_(str)    VGAPPEND(vgSkin_,str)
+
+#endif /* ndef __VG_CONSTANTS_SKIN_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                           vg_constants.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_corecheck.c b/vg_corecheck.c
new file mode 100644
index 0000000..58568d4
--- /dev/null
+++ b/vg_corecheck.c
@@ -0,0 +1,59 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Skin reporting errors detected in core.       vg_corecheck.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2002 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_skin.h"
+
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* track) 
+{
+   needs->name                    = "coregrind";
+   needs->description             = "a rudimentary error detector";
+
+   needs->core_errors             = True;
+
+   /* No core events to track */
+}
+
+void SK_(post_clo_init)(void)
+{
+}
+
+UCodeBlock* SK_(instrument)(UCodeBlock* cb, Addr a)
+{
+    return cb;
+}
+
+void SK_(fini)(void)
+{
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                           vg_corecheck.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_default.c b/vg_default.c
new file mode 100644
index 0000000..a4b52ea
--- /dev/null
+++ b/vg_default.c
@@ -0,0 +1,249 @@
+/*--------------------------------------------------------------------*/
+/*--- Default panicky definitions of template functions that skins ---*/
+/*--- should override.                                             ---*/
+/*---                                                vg_defaults.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+
+/* These functions aren't intended to be run.  Replacement functions used by
+ * the chosen skin are substituted by compiling the skin into a .so and
+ * LD_PRELOADing it.  Nasty :) */
+
+#include "vg_include.h"
+
+/* ---------------------------------------------------------------------
+   Error messages (for malformed skins)
+   ------------------------------------------------------------------ */
+
+/* If the skin fails to define one or more of the required functions,
+ * make it very clear what went wrong! */
+
+static __attribute__ ((noreturn))
+void fund_panic ( Char* fn )
+{
+   VG_(printf)(
+      "\nSkin error:\n"
+      "  The skin you have selected is missing the function `%s',\n"
+      "  which is required.\n\n",
+      fn);
+   VG_(skin_error)("Missing skin function");
+}
+
+static __attribute__ ((noreturn))
+void non_fund_panic ( Char* fn )
+{
+   VG_(printf)(
+      "\nSkin error:\n"
+      "  The skin you have selected is missing the function `%s'\n"
+      "  required by one of its needs.\n\n",
+      fn);
+   VG_(skin_error)("Missing skin function");
+}
+
+/* ---------------------------------------------------------------------
+   Fundamental template functions
+   ------------------------------------------------------------------ */
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* track)
+{
+   fund_panic("SK_(pre_clo_init)");
+}
+
+void SK_(post_clo_init)(void)
+{
+   fund_panic("SK_(post_clo_init)");
+}
+
+UCodeBlock* SK_(instrument)(UCodeBlock* cb, Addr not_used)
+{
+   fund_panic("SK_(instrument)");
+}
+
+void SK_(fini)(void)
+{
+   fund_panic("SK_(fini)");
+}
+
+/* ---------------------------------------------------------------------
+   For error reporting and suppression handling
+   ------------------------------------------------------------------ */
+
+Bool SK_(eq_SkinError)(VgRes res, SkinError* e1, SkinError* e2)
+{
+   non_fund_panic("SK_(eq_SkinError)");
+}
+
+void SK_(pp_SkinError)(SkinError* ec, void (*pp_ExeContext)(void))
+{
+   non_fund_panic("SK_(pp_SkinError)");
+}
+
+void SK_(dup_extra_and_update)(SkinError* ec)
+{
+   non_fund_panic("SK_(dup_extra_and_update)");
+}
+
+Bool SK_(recognised_suppression)(Char* name, SuppKind* skind)
+{
+   non_fund_panic("SK_(recognised_suppression)");
+}
+
+Bool SK_(read_extra_suppression_info)(Int fd, Char* buf, 
+                                       Int nBuf, SkinSupp *s)
+{
+   non_fund_panic("SK_(read_extra_suppression_info)");
+}
+
+Bool SK_(error_matches_suppression)(SkinError* ec, SkinSupp* su)
+{
+   non_fund_panic("SK_(error_matches_suppression)");
+}
+
+
+/* ---------------------------------------------------------------------
+   For throwing out basic block level info when code is invalidated
+   ------------------------------------------------------------------ */
+
+void SK_(discard_basic_block_info)(Addr a, UInt size)
+{
+   non_fund_panic("SK_(discard_basic_block_info)");
+}
+
+
+/* ---------------------------------------------------------------------
+   For throwing out basic block level info when code is invalidated
+   ------------------------------------------------------------------ */
+
+void SK_(written_shadow_regs_values)(UInt* gen_reg, UInt* eflags)
+{
+   non_fund_panic("SK_(written_shadow_regs_values)");
+}
+
+
+/* ---------------------------------------------------------------------
+   Command line arg template function
+   ------------------------------------------------------------------ */
+
+Bool SK_(process_cmd_line_option)(Char* argv)
+{
+   non_fund_panic("SK_(process_cmd_line_option)");
+}
+
+Char* SK_(usage)(void)
+{
+   non_fund_panic("SK_(usage)");
+}
+
+/* ---------------------------------------------------------------------
+   Client request template function
+   ------------------------------------------------------------------ */
+
+UInt SK_(handle_client_request)(ThreadState* tst, UInt* arg_block)
+{
+   non_fund_panic("SK_(handle_client_request)");
+}
+
+/* ---------------------------------------------------------------------
+   UCode extension
+   ------------------------------------------------------------------ */
+
+void SK_(emitExtUInstr)(UInstr* u, RRegSet regs_live_before)
+{
+   non_fund_panic("SK_(emitExtUInstr)");
+}
+
+Bool SK_(saneExtUInstr)(Bool beforeRA, Bool beforeLiveness, UInstr* u)
+{
+   non_fund_panic("SK_(saneExtUInstr)");
+}
+
+Char* SK_(nameExtUOpcode)(Opcode opc)
+{
+   non_fund_panic("SK_(nameExtUOpcode)");
+}
+
+void SK_(ppExtUInstr)(UInstr* u)
+{
+   non_fund_panic("SK_(ppExtUInstr)");
+}
+
+Int SK_(getExtRegUsage)(UInstr* u, Tag tag, RegUse* arr)
+{
+   non_fund_panic("SK_(getExtTempUsage)");
+}
+
+/* ---------------------------------------------------------------------
+   Syscall wrapping
+   ------------------------------------------------------------------ */
+
+void* SK_(pre_syscall)(ThreadId tid, UInt syscallno, Bool is_blocking)
+{
+   non_fund_panic("SK_(pre_syscall)");
+}
+
+void  SK_(post_syscall)(ThreadId tid, UInt syscallno,
+                         void* pre_result, Int res, Bool is_blocking)
+{
+   non_fund_panic("SK_(post_syscall)");
+}
+
+/* ---------------------------------------------------------------------
+   Shadow chunks
+   ------------------------------------------------------------------ */
+
+void SK_(complete_shadow_chunk)( ShadowChunk* sc, ThreadState* tst )
+{
+   non_fund_panic("SK_(complete_shadow_chunk)");
+}
+
+/* ---------------------------------------------------------------------
+   Alternative free()
+   ------------------------------------------------------------------ */
+
+void SK_(alt_free) ( ShadowChunk* sc, ThreadState* tst )
+{
+   non_fund_panic("SK_(alt_free)");
+}
+
+/* ---------------------------------------------------------------------
+   Sanity checks
+   ------------------------------------------------------------------ */
+
+Bool SK_(cheap_sanity_check)(void)
+{
+   non_fund_panic("SK_(cheap_sanity_check)");
+}
+
+Bool SK_(expensive_sanity_check)(void)
+{
+   non_fund_panic("SK_(expensive_sanity_check)");
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_defaults.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_demangle.c b/vg_demangle.c
index f07f7f3..6dff76f 100644
--- a/vg_demangle.c
+++ b/vg_demangle.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -49,12 +49,14 @@
    Int   n_result  = 0;
    Char* demangled = NULL;
 
+   VGP_PUSHCC(VgpDemangle);
+
    if (VG_(clo_demangle))
       demangled = VG_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS );
 
    if (demangled) {
       ADD_TO_RESULT(demangled, VG_(strlen)(demangled));
-      VG_(free) (VG_AR_DEMANGLE, demangled);
+      VG_(arena_free) (VG_AR_DEMANGLE, demangled);
    } else {
       ADD_TO_RESULT(orig, VG_(strlen)(orig));
    }
@@ -65,6 +67,8 @@
    vg_assert(VG_(is_empty_arena)(VG_AR_DEMANGLE));
 
    /* VG_(show_all_arena_stats)(); */
+
+   VGP_POPCC(VgpDemangle);
 }
 
 
diff --git a/vg_dispatch.S b/vg_dispatch.S
index bd1c5b9..7cdb209 100644
--- a/vg_dispatch.S
+++ b/vg_dispatch.S
@@ -1,8 +1,8 @@
 
-##--------------------------------------------------------------------##
-##--- The core dispatch loop, for jumping to a code address.       ---##
-##---                                                vg_dispatch.S ---##
-##--------------------------------------------------------------------##
+/*--------------------------------------------------------------------*/
+/*--- The core dispatch loop, for jumping to a code address.       ---*/
+/*---                                                vg_dispatch.S ---*/
+/*--------------------------------------------------------------------*/
 
 /*
   This file is part of Valgrind, an x86 protected-mode emulator 
@@ -26,7 +26,7 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.
 
-  The GNU General Public License is contained in the file LICENSE.
+  The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_constants.h"
@@ -59,9 +59,9 @@
 	
 .globl VG_(run_innerloop)
 VG_(run_innerloop):
-	#OYNK(1000)
+	/* OYNK(1000) */
 
-	# ----- entry point to VG_(run_innerloop) -----
+	/* ----- entry point to VG_(run_innerloop) ----- */
 	pushl	%ebx
 	pushl	%ecx
 	pushl	%edx
@@ -69,74 +69,98 @@
 	pushl	%edi
 	pushl	%ebp
 
-	# Set up the baseBlock pointer
+	/* Set up the baseBlock pointer */
 	movl	$VG_(baseBlock), %ebp
 
-	# fetch m_eip into %eax
+	/* fetch m_eip into %eax */
 	movl	VGOFF_(m_eip), %esi
 	movl	(%ebp, %esi, 4), %eax
 	
-	# Start off dispatching paranoically, since we no longer have
-	# any indication whether or not this might be a special call/ret
-	# transfer.
-	jmp	dispatch_stkadj
-	
-	
 dispatch_main:
-	# Jump here to do a new dispatch.
-	# %eax holds destination (original) address.
-	# %ebp indicates further details of the control transfer
-	# requested to the address in %eax.
-	#
-	# If ebp == & VG_(baseBlock), just jump next to %eax.
-	# 
-	# If ebp == VG_EBP_JMP_SYSCALL, do a system call before 
-	# continuing at eax.
-	#
-	# If ebp == VG_EBP_JMP_CLIENTREQ, do a client request before 
-	# continuing at eax.
-	#
-	# If %ebp has any other value, we panic.
+	/* Jump here to do a new dispatch.
+	   %eax holds destination (original) address.
+	   %ebp indicates further details of the control transfer
+	   requested to the address in %eax.
 	
+	   If ebp == & VG_(baseBlock), just jump next to %eax.
+	 
+	   If ebp == VG_EBP_JMP_SYSCALL, do a system call before 
+	   continuing at eax.
+	
+	   If ebp == VG_EBP_JMP_CLIENTREQ, do a client request before 
+	   continuing at eax.
+	
+	   If %ebp has any other value, we panic.
+	*/
+	cmpl	$VG_(baseBlock), %ebp
+	jnz	dispatch_exceptional
+	/* fall into main loop */
+
+
+dispatch_boring:
+	/* save the jump address at VG_(baseBlock)[VGOFF_(m_eip)] */
+	movl	VGOFF_(m_eip), %esi
+	movl	%eax, (%ebp, %esi, 4)
+	/* Are we out of timeslice?  If yes, defer to scheduler. */
+	decl	VG_(dispatch_ctr)
+	jz	counter_is_zero
+	/* try a fast lookup in the translation cache */
+	movl	%eax, %ebx
+	andl	$VG_TT_FAST_MASK, %ebx	
+	/* ebx = tt_fast index */
+	movl	VG_(tt_fast)(,%ebx,4), %ebx	
+	/* ebx points at a tt entry
+	   now compare target with the tte.orig_addr field (+0) */
+	cmpl	%eax, (%ebx)
+	jnz	fast_lookup_failed
+#if 1
+	/* Found a match.  Set the tte.mru_epoch field (+8)
+	   and call the tte.trans_addr field (+4) */
+	movl	VG_(current_epoch), %ecx
+	movl	%ecx, 8(%ebx)
+#endif
+	call	*4(%ebx)
 	cmpl	$VG_(baseBlock), %ebp
 	jnz	dispatch_exceptional
 
-dispatch_boring:
-	# save the jump address at VG_(baseBlock)[VGOFF_(m_eip)],
+dispatch_boring_unroll2:
+	/* save the jump address at VG_(baseBlock)[VGOFF_(m_eip)] */
 	movl	VGOFF_(m_eip), %esi
 	movl	%eax, (%ebp, %esi, 4)
-	
-	# do a timeslice check.
-	# are we out of timeslice?  If yes, defer to scheduler.
-	#OYNK(1001)
+#if 1
+	/* Are we out of timeslice?  If yes, defer to scheduler. */
 	decl	VG_(dispatch_ctr)
 	jz	counter_is_zero
-
-	#OYNK(1002)
-	# try a fast lookup in the translation cache
+#endif
+	/* try a fast lookup in the translation cache */
 	movl	%eax, %ebx
 	andl	$VG_TT_FAST_MASK, %ebx	
-	# ebx = tt_fast index
+	/* ebx = tt_fast index */
 	movl	VG_(tt_fast)(,%ebx,4), %ebx	
-	# ebx points at a tt entry
-	# now compare target with the tte.orig_addr field (+0)
+	/* ebx points at a tt entry
+	   now compare target with the tte.orig_addr field (+0) */
 	cmpl	%eax, (%ebx)
 	jnz	fast_lookup_failed
-
-	# Found a match.  Set the tte.mru_epoch field (+8)
-	# and call the tte.trans_addr field (+4)
+#if 1
+	/* Found a match.  Set the tte.mru_epoch field (+8)
+	   and call the tte.trans_addr field (+4) */
 	movl	VG_(current_epoch), %ecx
 	movl	%ecx, 8(%ebx)
+#endif
 	call	*4(%ebx)
-	jmp	dispatch_main
+	cmpl	$VG_(baseBlock), %ebp
+	jz	dispatch_boring
+
+	jmp	dispatch_exceptional
+
 	
 fast_lookup_failed:
-	# %EIP is up to date here since dispatch_boring dominates
+	/* %EIP is up to date here since dispatch_boring dominates */
 	movl	$VG_TRC_INNER_FASTMISS, %eax
 	jmp	run_innerloop_exit
 
 counter_is_zero:
-	# %EIP is up to date here since dispatch_boring dominates
+	/* %EIP is up to date here since dispatch_boring dominates */
 	movl	$VG_TRC_INNER_COUNTERZERO, %eax
 	jmp	run_innerloop_exit
 	
@@ -155,21 +179,19 @@
    make it look cleaner. 
 */
 dispatch_exceptional:
-	# this is jumped to only, not fallen-through from above
-	cmpl	$VG_TRC_EBP_JMP_STKADJ, %ebp
-	jz	dispatch_stkadj
+	/* this is jumped to only, not fallen-through from above */
 	cmpl	$VG_TRC_EBP_JMP_SYSCALL, %ebp
 	jz	dispatch_syscall
 	cmpl	$VG_TRC_EBP_JMP_CLIENTREQ, %ebp
 	jz	dispatch_clientreq
 
-	# ebp has an invalid value ... crap out.
+	/* ebp has an invalid value ... crap out. */
 	pushl	$panic_msg_ebp
 	call	VG_(panic)
-	#	(never returns)
+	/* (never returns) */
 
 dispatch_syscall:
-	# save %eax in %EIP and defer to sched
+	/* save %eax in %EIP and defer to sched */
 	movl	$VG_(baseBlock), %ebp
 	movl	VGOFF_(m_eip), %esi
 	movl	%eax, (%ebp, %esi, 4)
@@ -177,29 +199,13 @@
 	jmp	run_innerloop_exit
 	
 dispatch_clientreq:
-	# save %eax in %EIP and defer to sched
+	/* save %eax in %EIP and defer to sched */
 	movl	$VG_(baseBlock), %ebp
 	movl	VGOFF_(m_eip), %esi
 	movl	%eax, (%ebp, %esi, 4)
 	movl	$VG_TRC_EBP_JMP_CLIENTREQ, %eax
 	jmp	run_innerloop_exit
 
-dispatch_stkadj:
-	# save %eax in %EIP
-	movl	$VG_(baseBlock), %ebp
-	movl	VGOFF_(m_eip), %esi
-	movl	%eax, (%ebp, %esi, 4)
-
-	# see if we need to mess with stack blocks
-	pushl	%eax
-	call	VG_(delete_client_stack_blocks_following_ESP_change)
-	popl	%eax
-	movl	$VG_(baseBlock), %ebp
-		
-	# ok, its not interesting.  Handle the normal way.
-	jmp	dispatch_boring
-
-
 .data
 panic_msg_ebp:
 .ascii	"vg_dispatch: %ebp has invalid value!"
@@ -207,6 +213,6 @@
 .text	
 
 
-##--------------------------------------------------------------------##
-##--- end                                            vg_dispatch.S ---##
-##--------------------------------------------------------------------##
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_dispatch.S ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_dummy_profile.c b/vg_dummy_profile.c
new file mode 100644
index 0000000..2f869c9
--- /dev/null
+++ b/vg_dummy_profile.c
@@ -0,0 +1,67 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Dummy profiling machinery -- overridden by skins when they   ---*/
+/*--- want profiling.                                              ---*/
+/*---                                           vg_dummy_profile.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_include.h"
+
+
+void VGP_(register_profile_event) ( Int n, Char* name )
+{
+}
+
+void VGP_(init_profiling) ( void )
+{
+   VG_(printf)(
+      "\nProfiling error:\n"
+      "  The --profile=yes option was specified, but the skin\n"
+      "  wasn't built for profiling.  #include \"vg_profile.c\"\n"
+      "  into the skin and rebuild to allow profiling.\n\n");
+   VG_(exit)(1);
+}
+
+void VGP_(done_profiling) ( void )
+{
+   VG_(panic)("done_profiling");
+}
+
+void VGP_(pushcc) ( UInt cc )
+{
+   VG_(panic)("pushcc");
+}
+
+void VGP_(popcc) ( UInt cc )
+{
+   VG_(panic)("popcc");
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                       vg_dummy_profile.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_eraser.c b/vg_eraser.c
new file mode 100644
index 0000000..43e46bc
--- /dev/null
+++ b/vg_eraser.c
@@ -0,0 +1,1415 @@
+/*--------------------------------------------------------------------*/
+/*--- The Eraser skin: checking for data races in threaded         ---*/
+/*--- programs.                                                    ---*/
+/*---                                                  vg_eraser.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_skin.h"
+
+
+static UInt n_eraser_warnings = 0;
+
+
+/*------------------------------------------------------------*/
+/*--- Debug guff                                           ---*/
+/*------------------------------------------------------------*/
+
+#define DEBUG_LOCK_TABLE    1   /* Print lock table at end */
+
+#define DEBUG_MAKE_ACCESSES 0   /* Print make_access() calls */
+#define DEBUG_LOCKS         0   /* Print lock()/unlock() calls and locksets */
+#define DEBUG_NEW_LOCKSETS  0   /* Print new locksets when created */
+#define DEBUG_ACCESSES      0   /* Print reads, writes */
+#define DEBUG_MEM_LOCKSET_CHANGES 0
+                                /* Print when an address's lockset
+                                   changes; only useful with
+                                   DEBUG_ACCESSES */
+
+#define DEBUG_VIRGIN_READS  0   /* Dump around address on VIRGIN reads */
+
+/* heavyweight LockSet sanity checking:
+   0 == never
+   1 == after important ops
+   2 == As 1 and also after pthread_mutex_* ops (excessively slow)
+ */
+#define LOCKSET_SANITY 0
+
+
+/*------------------------------------------------------------*/
+/*--- Crude profiling machinery.                           ---*/
+/*------------------------------------------------------------*/
+
+// PPP: work out if I want this
+
+#define PROF_EVENT(x)
+#if 0
+#ifdef VG_PROFILE_MEMORY
+
+#define N_PROF_EVENTS 150
+
+static UInt event_ctr[N_PROF_EVENTS];
+
+void VGE_(done_prof_mem) ( void )
+{
+   Int i;
+   for (i = 0; i < N_PROF_EVENTS; i++) {
+      if ((i % 10) == 0)
+         VG_(printf)("\n");
+      if (event_ctr[i] > 0)
+         VG_(printf)( "prof mem event %2d: %d\n", i, event_ctr[i] );
+   }
+   VG_(printf)("\n");
+}
+
+#define PROF_EVENT(ev)                                  \
+   do { vg_assert((ev) >= 0 && (ev) < N_PROF_EVENTS);   \
+        event_ctr[ev]++;                                \
+   } while (False);
+
+#else
+
+//static void init_prof_mem ( void ) { }
+//       void VG_(done_prof_mem) ( void ) { }
+
+#define PROF_EVENT(ev) /* */
+
+#endif /* VG_PROFILE_MEMORY */
+
+/* Event index.  If just the name of the fn is given, this means the
+   number of calls to the fn.  Otherwise it is the specified event.
+
+   [PPP: snip event numbers...]
+*/
+#endif /* 0 */
+
+
+/*------------------------------------------------------------*/
+/*--- Data defns.                                          ---*/
+/*------------------------------------------------------------*/
+
+typedef enum 
+   { Vge_VirginInit, Vge_NonVirginInit, Vge_SegmentInit } 
+   VgeInitStatus;
+
+/* Should add up to 32 to fit in one word */
+#define OTHER_BITS      30
+#define STATE_BITS      2
+
+#define ESEC_MAP_WORDS  16384   /* Words per secondary map */
+
+/* This is for indicating that a memory block has been initialised but not
+ * really directly by a particular thread... (eg. text/data initialised
+ * automatically at startup).
+ * Must be different to virgin_word.other */
+#define TID_INDICATING_NONVIRGIN    1
+
+/* Number of entries must fit in STATE_BITS bits */
+typedef enum { Vge_Virgin, Vge_Excl, Vge_Shar, Vge_SharMod } pth_state;
+
+typedef
+   struct {
+      UInt other:OTHER_BITS;
+      UInt state:STATE_BITS;
+   } shadow_word;
+
+typedef
+   struct {
+      shadow_word swords[ESEC_MAP_WORDS];
+   }
+   ESecMap;
+
+static ESecMap* primary_map[ 65536 ];
+static ESecMap  distinguished_secondary_map;
+
+static shadow_word virgin_sword = { 0, Vge_Virgin };
+
+#define VGE_IS_DISTINGUISHED_SM(smap) \
+   ((smap) == &distinguished_secondary_map)
+
+#define ENSURE_MAPPABLE(addr,caller)                                  \
+   do {                                                               \
+      if (VGE_IS_DISTINGUISHED_SM(primary_map[(addr) >> 16])) {       \
+         primary_map[(addr) >> 16] = alloc_secondary_map(caller);     \
+         /*VG_(printf)("new 2map because of %p\n", addr);*/           \
+      } \
+   } while(0)
+
+
+/*------------------------------------------------------------*/
+/*--- Low-level support for memory tracking.               ---*/
+/*------------------------------------------------------------*/
+
+/*
+   All reads and writes are recorded in the memory map, which
+   records the state of all memory in the process.  The memory map is
+   organised like that for normal Valgrind, except each that everything
+   is done at word-level instead of byte-level, and each word has only
+   one word of shadow (instead of 36 bits).  
+
+   As for normal Valgrind there is a distinguished secondary map.  But we're
+   working at word-granularity, so it has 16k word entries instead of 64k byte
+   entries.  Lookup is done as follows:
+
+     bits 31..16:   primary map lookup
+     bits 15.. 2:   secondary map lookup
+     bits  1.. 0:   ignored
+*/
+
+
+/*------------------------------------------------------------*/
+/*--- Basic bitmap management, reading and writing.        ---*/
+/*------------------------------------------------------------*/
+
+/* Allocate and initialise a secondary map, marking all words as virgin. */
+
+/* Just a value that isn't a real pointer */
+#define SEC_MAP_ACCESS  (shadow_word*)0x99    
+
+
+static 
+ESecMap* alloc_secondary_map ( __attribute__ ((unused)) Char* caller )
+{
+   ESecMap* map;
+   UInt  i;
+   //PROF_EVENT(10); PPP
+
+   /* It just happens that a SecMap occupies exactly 18 pages --
+      although this isn't important, so the following assert is
+      spurious. (SSS: not true for ESecMaps -- they're 16 pages) */
+   vg_assert(0 == (sizeof(ESecMap) % VKI_BYTES_PER_PAGE));
+   map = VG_(get_memory_from_mmap)( sizeof(ESecMap), caller );
+
+   for (i = 0; i < ESEC_MAP_WORDS; i++)
+      map->swords[i] = virgin_sword;
+
+   return map;
+}
+
+
+/* Set a word.  The byte give by 'a' could be anywhere in the word -- the whole
+ * word gets set. */
+static __inline__ 
+void set_sword ( Addr a, shadow_word sword )
+{
+   ESecMap* sm;
+
+   //PROF_EVENT(23); PPP
+   ENSURE_MAPPABLE(a, "VGE_(set_sword)");
+
+   /* Use bits 31..16 for primary, 15..2 for secondary lookup */
+   sm     = primary_map[a >> 16];
+   vg_assert(sm != &distinguished_secondary_map);
+   sm->swords[(a & 0xFFFC) >> 2] = sword;
+
+   if (VGE_IS_DISTINGUISHED_SM(sm)) {
+      VG_(printf)("wrote to distinguished 2ndary map! 0x%x\n", a);
+      // XXX: may be legit, but I want to know when it happens --njn
+      VG_(panic)("wrote to distinguished 2ndary map!");
+   }
+}
+
+
+static __inline__ 
+shadow_word* get_sword_addr ( Addr a )
+{
+   /* Use bits 31..16 for primary, 15..2 for secondary lookup */
+   ESecMap* sm     = primary_map[a >> 16];
+   UInt    sm_off = (a & 0xFFFC) >> 2;
+
+   if (VGE_IS_DISTINGUISHED_SM(sm)) {
+      VG_(printf)("accessed distinguished 2ndary map! 0x%x\n", a);
+      // XXX: may be legit, but I want to know when it happens --njn
+      //VG_(panic)("accessed distinguished 2ndary map!");
+      return SEC_MAP_ACCESS;
+   }
+
+   //PROF_EVENT(21); PPP
+   return & (sm->swords[sm_off]);
+}
+
+
+// SSS: rename these so they're not so similar to memcheck, unless it's
+// appropriate of course
+
+static __inline__ 
+void init_virgin_sword(Addr a)
+{
+   set_sword(a, virgin_sword);
+}
+
+
+/* 'a' is guaranteed to be 4-byte aligned here (not that that's important,
+ * really) */
+static 
+void make_writable_aligned ( Addr a, UInt size )
+{
+   Addr a_past_end = a + size;
+
+   //PROF_EVENT(??)  PPP
+   vg_assert(IS_ALIGNED4_ADDR(a));
+
+   for ( ; a < a_past_end; a += 4) {
+      set_sword(a, virgin_sword);
+   }
+}
+
+static __inline__ 
+void init_nonvirgin_sword(Addr a)
+{
+   shadow_word sword;
+
+   sword.other = VG_(get_current_tid_1_if_root)();
+   sword.state = Vge_Excl;
+   set_sword(a, sword);
+}
+
+
+/* In this case, we treat it for Eraser's sake like virgin (it hasn't
+ * been inited by a particular thread, it's just done automatically upon
+ * startup), but we mark its .state specially so it doesn't look like an 
+ * uninited read. */
+static __inline__ 
+void init_magically_inited_sword(Addr a)
+{
+   shadow_word sword;
+
+   vg_assert(1 == VG_(get_current_tid_1_if_root)());
+   sword.other = TID_INDICATING_NONVIRGIN;
+   sword.state = Vge_Virgin;
+   set_sword(a, virgin_sword);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Implementation of lock sets.                         ---*/
+/*------------------------------------------------------------*/
+
+#define M_LOCKSET_TABLE 1000
+
+#include <pthread.h>
+
+typedef 
+   struct _LockSet {
+       pthread_mutex_t* mutex;
+       struct _LockSet* next;
+   } LockSet;
+
+
+/* Each one is an index into the lockset table. */
+static UInt thread_locks[VG_N_THREADS];
+
+/* # lockset table entries used. */
+static Int n_lockset_table = 1; 
+
+/* lockset_table[0] is always NULL, representing the empty lockset */
+static LockSet* lockset_table[M_LOCKSET_TABLE];
+
+
+static __inline__
+Bool is_valid_lockset_id ( Int id )
+{
+   return id >= 0 && id < n_lockset_table;
+}
+
+
+static
+Int allocate_LockSet(LockSet* set)
+{
+   if (n_lockset_table >= M_LOCKSET_TABLE) 
+      VG_(panic)("lockset table full -- increase M_LOCKSET_TABLE");
+   lockset_table[n_lockset_table] = set;
+   n_lockset_table++;
+#  if DEBUG_MEM_LOCKSET_CHANGES || DEBUG_NEW_LOCKSETS
+   VG_(printf)("allocate LOCKSET VECTOR %p to %d\n", set, n_lockset_table-1);
+#  endif
+   return n_lockset_table-1;
+}
+
+
+static 
+void pp_LockSet(LockSet* p)
+{
+   VG_(printf)("{ ");
+   while (p != NULL) {
+      VG_(printf)("%x ", p->mutex);
+      p = p->next;
+   }
+   VG_(printf)("}\n");
+}
+
+
+static __attribute__((unused))
+void pp_all_LockSets ( void )
+{
+   Int i;
+   for (i = 0; i < n_lockset_table; i++) {
+      VG_(printf)("[%d] = ", i);
+      pp_LockSet(lockset_table[i]);
+   }
+}
+
+
+static 
+void free_LockSet(LockSet *p)
+{
+   LockSet* q;
+   while (NULL != p) {
+      q = p;
+      p = p->next;
+      VG_(free)(q);
+#     if DEBUG_MEM_LOCKSET_CHANGES
+      VG_(printf)("free'd   %x\n", q);
+#     endif
+   }
+}
+
+
+static 
+Bool structural_eq_LockSet(LockSet* a, LockSet* b)
+{
+   while (a && b) {
+      if (a->mutex != b->mutex) {
+         return False;
+      }
+      a = a->next;
+      b = b->next;
+   }
+   return (NULL == a && NULL == b);
+}
+
+
+#if LOCKSET_SANITY 
+/* Check invariants:
+   - all locksets are unique
+   - each set is a linked list in strictly increasing order of mutex addr 
+*/
+static
+void sanity_check_locksets ( Char* caller )
+{
+   Int              i, j, badness;
+   LockSet*         v;
+   pthread_mutex_t* mx_prev;
+
+   badness = 0;
+   i = j = -1;
+
+   //VG_(printf)("sanity %s\n", caller);
+   /* Check really simple things first */
+
+   if (n_lockset_table < 1 || n_lockset_table > M_LOCKSET_TABLE)
+      { badness = 1; goto baaad; }
+
+   if (lockset_table[0] != NULL)
+      { badness = 2; goto baaad; }
+
+   for (i = 1; i < n_lockset_table; i++)
+      if (lockset_table[i] == NULL)
+         { badness = 3; goto baaad; }
+
+   for (i = n_lockset_table; i < M_LOCKSET_TABLE; i++)
+      if (lockset_table[i] != NULL)
+         { badness = 4; goto baaad; }
+
+   /* Check the sanity of each individual set. */
+   for (i = 1; i < n_lockset_table; i++) {
+      v = lockset_table[i];
+      mx_prev = (pthread_mutex_t*)0;
+      while (True) {
+         if (v == NULL) break;
+         if (mx_prev >= v->mutex) 
+            { badness = 5; goto baaad; }
+         mx_prev = v->mutex;
+         v = v->next;
+      }
+   }
+
+   /* Ensure the sets are unique, both structurally and in respect of
+      the address of their first nodes. */
+   for (i = 1; i < n_lockset_table; i++) {
+      for (j = i+1; j < n_lockset_table; j++) {
+         if (lockset_table[i] == lockset_table[j]) 
+            { badness = 6; goto baaad; }
+         if (structural_eq_LockSet(lockset_table[i], lockset_table[j])) 
+            { badness = 7; goto baaad; }
+      }
+   }
+   return;
+
+  baaad:
+   VG_(printf)("sanity_check_locksets: "
+               "i = %d, j = %d, badness = %d, caller = %s\n", 
+               i, j, badness, caller);
+   pp_all_LockSets();
+   VG_(panic)("sanity_check_locksets");
+}
+#endif /* LOCKSET_SANITY */
+
+
+/* Builds ia with mx removed.  mx should actually be in ia! 
+   (a checked assertion).  Resulting set should not already
+   exist in the table (unchecked).
+*/
+static 
+UInt remove ( UInt ia, pthread_mutex_t* mx )
+{
+   Int       found, res;
+   LockSet*  new_vector = NULL;
+   LockSet*  new_node;
+   LockSet** prev_ptr = &new_vector;
+   LockSet*  a = lockset_table[ia];
+   vg_assert(is_valid_lockset_id(ia));
+
+#  if DEBUG_MEM_LOCKSET_CHANGES
+   VG_(printf)("Removing from %d mutex %p:\n", ia, mx);
+#  endif
+
+#  if DEBUG_MEM_LOCKSET_CHANGES
+   print_LockSet(a);
+#  endif
+
+#  if LOCKSET_SANITY 
+   sanity_check_locksets("remove-IN");
+#  endif
+
+   /* Build the intersection of the two lists */
+   found = 0;
+   while (a) {
+      if (a->mutex != mx) {
+         new_node = VG_(malloc)(sizeof(LockSet));
+#        if DEBUG_MEM_LOCKSET_CHANGES
+         VG_(printf)("malloc'd %x\n", new_node);
+#        endif
+         new_node->mutex = a->mutex;
+         *prev_ptr = new_node;
+         prev_ptr = &((*prev_ptr)->next);
+         a = a->next;
+      } else {
+         found++;
+      }
+      *prev_ptr = NULL;
+   }
+   vg_assert(found == 1 /* sigh .. if the client is buggy */ || found == 0 );
+
+   /* Preserve uniqueness invariants in face of client buggyness */
+   if (found == 0) {
+      free_LockSet(new_vector);
+      return ia;
+   }
+
+   /* Add to the table. */
+   res = allocate_LockSet(new_vector);
+
+#  if LOCKSET_SANITY 
+   sanity_check_locksets("remove-OUT");
+#  endif
+
+   return res;
+}
+
+
+/* Tricky: equivalent to (compare(insert(missing_elem, a), b)), but
+ * doesn't do the insertion.  Returns True if they match.
+ */
+static Bool 
+weird_LockSet_equals(LockSet* a, LockSet* b, 
+                     pthread_mutex_t* missing_mutex)
+{
+   /* Idea is to try and match each element of b against either an
+      element of a, or missing_mutex. */
+   while (True) {
+      if (b == NULL) 
+         break;
+      /* deal with missing already being in a */
+      if (a && a->mutex == missing_mutex) 
+         a = a->next;
+      /* match current b element either against a or missing */
+      if (b->mutex == missing_mutex) {
+         b = b->next;
+         continue;
+      }
+      /* wasn't == missing, so have to match from a, or fail */
+      if (a && b->mutex == a->mutex) {
+         a = a->next;
+         b = b->next;
+         continue;
+      }
+      break;
+   }
+   return (b==NULL ? True : False);
+}
+
+
+/* Builds the intersection, and then unbuilds it if it's already in the table.
+ */
+static UInt intersect(UInt ia, UInt ib)
+{
+   Int       i;
+   LockSet*  a = lockset_table[ia];
+   LockSet*  b = lockset_table[ib];
+   LockSet*  new_vector = NULL;
+   LockSet*  new_node;
+   LockSet** prev_ptr = &new_vector;
+
+#  if DEBUG_MEM_LOCKSET_CHANGES
+   VG_(printf)("Intersecting %d %d:\n", ia, ib);
+#  endif
+
+#  if LOCKSET_SANITY 
+   sanity_check_locksets("intersect-IN");
+#  endif
+
+   /* Fast case -- when the two are the same */
+   if (ia == ib) {
+#     if DEBUG_MEM_LOCKSET_CHANGES
+      VG_(printf)("Fast case -- both the same: %u\n", ia);
+      print_LockSet(a);
+#     endif
+      return ia;
+   }
+
+#  if DEBUG_MEM_LOCKSET_CHANGES
+   print_LockSet(a);
+   print_LockSet(b);
+#  endif
+
+   /* Build the intersection of the two lists */
+   while (a && b) {
+      if (a->mutex == b->mutex) {
+         new_node = VG_(malloc)(sizeof(LockSet));
+#        if DEBUG_MEM_LOCKSET_CHANGES
+         VG_(printf)("malloc'd %x\n", new_node);
+#        endif
+         new_node->mutex = a->mutex;
+         *prev_ptr = new_node;
+         prev_ptr = &((*prev_ptr)->next);
+         a = a->next;
+         b = b->next;
+      } else if (a->mutex < b->mutex) {
+         a = a->next;
+      } else if (a->mutex > b->mutex) {
+         b = b->next;
+      } else VG_(panic)("STOP PRESS: Laws of arithmetic broken");
+
+      *prev_ptr = NULL;
+   }
+
+   /* Now search for it in the table, adding it if not seen before */
+   for (i = 0; i < n_lockset_table; i++) {
+      if (structural_eq_LockSet(lockset_table[i], new_vector))
+         break;
+   }
+
+   if (i == n_lockset_table) {
+     i = allocate_LockSet(new_vector);
+   } else {
+     free_LockSet(new_vector);
+   }
+
+   /* Check we won't overflow the OTHER_BITS bits of sword->other */
+   vg_assert(i < (1 << OTHER_BITS));
+
+#  if LOCKSET_SANITY 
+   sanity_check_locksets("intersect-OUT");
+#  endif
+
+   return i;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Setting and checking permissions.                    ---*/
+/*------------------------------------------------------------*/
+
+static
+void set_address_range_state ( Addr a, UInt len /* in bytes */, 
+                               VgeInitStatus status )
+{
+   Addr aligned_a, end, aligned_end;
+
+#  if DEBUG_MAKE_ACCESSES
+   VG_(printf)("make_access: 0x%x, %u, status=%u\n", a, len, status);
+#  endif
+   //PROF_EVENT(30); PPP
+
+   if (len == 0)
+      return;
+
+   if (len > 100 * 1000 * 1000)
+      VG_(message)(Vg_UserMsg,
+                   "Warning: set address range state: large range %d",
+                   len);
+
+   VGP_PUSHCC(VgpSARP);
+
+   /* Memory block may not be aligned or a whole word multiple.  In neat cases,
+    * we have to init len/4 words (len is in bytes).  In nasty cases, it's
+    * len/4+1 words.  This works out which it is by aligning the block and
+    * seeing if the end byte is in the same word as it is for the unaligned
+    * block; if not, it's the awkward case. */
+   aligned_a   = a & 0xc;                       /* zero bottom two bits */
+   end         = a + len;
+   aligned_end = aligned_a + len;
+   if ((end & 0xc) != (aligned_end & 0xc)) {
+       end += 4;    /* len/4 + 1 case */
+   }
+
+   /* Do it ... */
+   switch (status) {
+   case Vge_VirginInit:
+      for ( ; a < end; a += 4) {
+         //PROF_EVENT(31);  PPP
+         init_virgin_sword(a);
+      }
+      break;
+
+   case Vge_NonVirginInit:
+      for ( ; a < end; a += 4) {
+         //PROF_EVENT(31);  PPP
+         init_nonvirgin_sword(a);
+      }
+      break;
+
+   case Vge_SegmentInit:
+      for ( ; a < end; a += 4) {
+         //PROF_EVENT(31);  PPP
+         init_magically_inited_sword(a);
+      }
+      break;
+   
+   default:
+      VG_(printf)("init_status = %u\n", status);
+      VG_(panic)("Unexpected Vge_InitStatus");
+   }
+      
+   /* Check that zero page and highest page have not been written to
+      -- this could happen with buggy syscall wrappers.  Today
+      (2001-04-26) had precisely such a problem with
+      __NR_setitimer. */
+   vg_assert(SK_(cheap_sanity_check)());
+   VGP_POPCC(VgpSARP);
+}
+
+
+static void make_segment_readable ( Addr a, UInt len )
+{
+   //PROF_EVENT(??);    PPP
+   set_address_range_state ( a, len, Vge_SegmentInit );
+}
+
+static void make_writable ( Addr a, UInt len )
+{
+   //PROF_EVENT(36);  PPP
+   set_address_range_state( a, len, Vge_VirginInit );
+}
+
+static void make_readable ( Addr a, UInt len )
+{
+   //PROF_EVENT(37);  PPP
+   set_address_range_state( a, len, Vge_NonVirginInit );
+}
+
+
+// SSS: change name
+/* Block-copy states (needed for implementing realloc()). */
+static void copy_address_range_state(Addr src, Addr dst, UInt len)
+{
+   UInt i;
+
+   //PROF_EVENT(40); PPP
+   for (i = 0; i < len; i += 4) {
+      shadow_word sword = *(get_sword_addr ( src+i ));
+      //PROF_EVENT(41);  PPP
+      set_sword ( dst+i, sword );
+   }
+}
+
+// SSS: put these somewhere better
+static void eraser_mem_read (Addr a, UInt data_size);
+static void eraser_mem_write(Addr a, UInt data_size);
+
+static
+void eraser_pre_mem_read(CorePart part, ThreadState* tst,
+                         Char* s, UInt base, UInt size )
+{
+   eraser_mem_read(base, size);
+}
+
+static
+void eraser_pre_mem_read_asciiz(CorePart part, ThreadState* tst,
+                                Char* s, UInt base )
+{
+   eraser_mem_read(base, VG_(strlen)((Char*)base));
+}
+
+static
+void eraser_pre_mem_write(CorePart part, ThreadState* tst,
+                          Char* s, UInt base, UInt size )
+{
+   eraser_mem_write(base, size);
+}
+
+
+
+static
+void eraser_new_mem_startup( Addr a, UInt len, Bool rr, Bool ww, Bool xx )
+{
+   // JJJ: this ignores the permissions and just makes it readable, like the
+   // old code did, AFAICT
+   make_segment_readable(a, len);
+}
+
+
+static
+void eraser_new_mem_heap ( Addr a, UInt len, Bool is_inited )
+{
+   if (is_inited) {
+      make_readable(a, len);
+   } else {
+      make_writable(a, len);
+   }
+}
+
+static
+void eraser_set_perms (Addr a, UInt len,
+                       Bool nn, Bool rr, Bool ww, Bool xx)
+{
+   if      (rr) make_readable(a, len);
+   else if (ww) make_writable(a, len);
+   /* else do nothing */
+}
+
+
+/*--------------------------------------------------------------*/
+/*--- Initialise the memory audit system on program startup. ---*/
+/*--------------------------------------------------------------*/
+
+static 
+void init_shadow_memory(void)
+{
+   Int i;
+
+   for (i = 0; i < ESEC_MAP_WORDS; i++)
+      distinguished_secondary_map.swords[i] = virgin_sword;
+
+   /* These entries gradually get overwritten as the used address
+      space expands. */
+   for (i = 0; i < 65536; i++)
+      primary_map[i] = &distinguished_secondary_map;
+}
+
+
+/*--------------------------------------------------------------*/
+/*--- Machinery to support sanity checking                   ---*/
+/*--------------------------------------------------------------*/
+
+/* Check that nobody has spuriously claimed that the first or last 16
+   pages (64 KB) of address space have become accessible.  Failure of
+   the following do not per se indicate an internal consistency
+   problem, but they are so likely to that we really want to know
+   about it if so. */
+
+Bool SK_(cheap_sanity_check) ( void )
+{
+   if (VGE_IS_DISTINGUISHED_SM(primary_map[0]) && 
+       VGE_IS_DISTINGUISHED_SM(primary_map[65535]))
+      return True;
+   else
+      return False;
+}
+
+
+Bool SK_(expensive_sanity_check)(void)
+{
+   Int i;
+
+   /* Make sure nobody changed the distinguished secondary. */
+   for (i = 0; i < ESEC_MAP_WORDS; i++)
+      if (distinguished_secondary_map.swords[i].other != virgin_sword.other ||
+          distinguished_secondary_map.swords[i].state != virgin_sword.state)
+         return False;
+
+   return True;
+}
+
+
+/*--------------------------------------------------------------*/
+/*--- Instrumentation                                        ---*/
+/*--------------------------------------------------------------*/
+
+#define uInstr1   VG_(newUInstr1)
+#define uInstr2   VG_(newUInstr2)
+#define uLiteral  VG_(setLiteralField)
+#define uCCall    VG_(setCCallFields)
+#define newTemp   VG_(getNewTemp)
+
+/* Create and return an instrumented version of cb_in.  Free cb_in
+   before returning. */
+UCodeBlock* SK_(instrument) ( UCodeBlock* cb_in, Addr not_used )
+{
+   UCodeBlock* cb;
+   Int         i;
+   UInstr*     u_in;
+   Int         t_size = INVALID_TEMPREG;
+
+   cb = VG_(allocCodeBlock)();
+   cb->nextTemp = cb_in->nextTemp;
+
+   for (i = 0; i < cb_in->used; i++) {
+      u_in = &cb_in->instrs[i];
+
+      /* VG_(ppUInstr)(0, u_in); */
+      switch (u_in->opcode) {
+
+         case NOP: case CALLM_S: case CALLM_E:
+            break;
+
+         /* For LOAD, address is in val1 */
+         case LOAD:
+            t_size = newTemp(cb);
+            uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_size);
+            uLiteral(cb, (UInt)u_in->size);
+
+            vg_assert(1 == u_in->size || 2 == u_in->size || 4 == u_in->size || 
+                      8 == u_in->size || 10 == u_in->size);
+            uInstr2(cb, CCALL, 0, TempReg, u_in->val1, TempReg, t_size);
+            // SSS: make regparms(2) eventually...
+            uCCall(cb, (Addr) & eraser_mem_read, 2, 0, False);
+            VG_(copyUInstr)(cb, u_in);
+            t_size = INVALID_TEMPREG;
+            break;
+
+         /* For others, address is in val2 */
+         case STORE:  case FPU_R:  case FPU_W:
+            t_size = newTemp(cb);
+            uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_size);
+            uLiteral(cb, (UInt)u_in->size);
+
+            vg_assert(1 == u_in->size || 2 == u_in->size || 4 == u_in->size || 
+                      8 == u_in->size || 10 == u_in->size);
+            uInstr2(cb, CCALL, 0, TempReg, u_in->val2, TempReg, t_size);
+            uCCall(cb, (Addr) & eraser_mem_write, 2, 0, False);
+            VG_(copyUInstr)(cb, u_in);
+            t_size = INVALID_TEMPREG;
+            break;
+
+         default:
+            VG_(copyUInstr)(cb, u_in);
+            break;
+      }
+   }
+
+   VG_(freeCodeBlock)(cb_in);
+   return cb;
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- Error and suppression handling                               ---*/
+/*--------------------------------------------------------------------*/
+
+typedef
+   enum {
+      /* Possible data race */
+      EraserSupp
+   }
+   EraserSuppKind;
+
+/* What kind of error it is. */
+typedef
+   enum { 
+      EraserErr 
+   }
+   EraserErrorKind;
+
+
+static void record_eraser_error ( ThreadId tid, Addr a, Bool is_write )
+{
+   VG_(maybe_record_error)( VG_(get_ThreadState)(tid), EraserErr, a, 
+                            (is_write ? "writing" : "reading"),
+                            /*extra*/NULL);
+}
+
+
+Bool SK_(eq_SkinError) ( VgRes not_used,
+                          SkinError* e1, SkinError* e2 )
+{
+   vg_assert(EraserErr == e1->ekind && EraserErr == e2->ekind);
+   if (e1->string != e2->string) return False;
+   if (0 != VG_(strcmp)(e1->string, e2->string)) return False;
+   return True;
+}
+
+
+void SK_(pp_SkinError) ( SkinError* err, void (*pp_ExeContext)(void) )
+{
+   vg_assert(EraserErr == err->ekind);
+   VG_(message)(Vg_UserMsg, "Possible data race %s variable at 0x%x",
+                err->string, err->addr );
+   pp_ExeContext();
+}
+
+
+void SK_(dup_extra_and_update)(SkinError* err)
+{
+   /* do nothing -- extra field not used, and no need to update */
+}
+
+
+Bool SK_(recognised_suppression) ( Char* name, SuppKind *skind )
+{
+   if (0 == VG_(strcmp)(name, "Eraser")) {
+      *skind = EraserSupp;
+      return True;
+   } else {
+      return False;
+   }
+}
+
+
+Bool SK_(read_extra_suppression_info) ( Int fd, Char* buf, 
+                                        Int nBuf, SkinSupp* s )
+{
+   /* do nothing -- no extra suppression info present.  Return True to
+      indicate nothing bad happened. */
+   return True;
+}
+
+
+Bool SK_(error_matches_suppression)(SkinError* err, SkinSupp* su)
+{
+   vg_assert( su->skind == EraserSupp);
+   vg_assert(err->ekind == EraserErr);
+   return True;
+}
+
+
+// SSS: copying mutex's pointer... is that ok?  Could they get deallocated?
+// (does that make sense, deallocating a mutex?)
+static void eraser_post_mutex_lock(ThreadId tid, void* void_mutex)
+{
+   Int i = 1;
+   LockSet*  new_node;
+   LockSet*  p;
+   LockSet** q;
+   pthread_mutex_t* mutex = (pthread_mutex_t*)void_mutex;
+   
+#  if DEBUG_LOCKS
+   VG_(printf)("lock  (%u, %x)\n", tid, mutex);
+#  endif
+
+   vg_assert(tid < VG_N_THREADS &&
+             thread_locks[tid] < M_LOCKSET_TABLE);
+   /* VG_(printf)("LOCK: held %d, new %p\n", thread_locks[tid], mutex); */
+#  if LOCKSET_SANITY > 1
+   sanity_check_locksets("eraser_post_mutex_lock-IN");
+#  endif
+
+   while (True) {
+      if (i == M_LOCKSET_TABLE) 
+         VG_(panic)("lockset table full -- increase M_LOCKSET_TABLE");
+
+      /* the lockset didn't already exist */
+      if (i == n_lockset_table) {
+
+         p = lockset_table[thread_locks[tid]];
+         q = &lockset_table[i];
+
+         /* copy the thread's lockset, creating a new list */
+         while (p != NULL) {
+            new_node = VG_(malloc)(sizeof(LockSet));
+            new_node->mutex = p->mutex;
+            *q = new_node;
+            q = &((*q)->next);
+            p = p->next;
+         }
+         (*q) = NULL;
+
+         /* find spot for the new mutex in the new list */
+         p = lockset_table[i];
+         q = &lockset_table[i];
+         while (NULL != p && mutex > p->mutex) {
+            p = p->next;
+            q = &((*q)->next);
+         }
+
+         /* insert new mutex in new list */
+         new_node = VG_(malloc)(sizeof(LockSet));
+         new_node->mutex = mutex;
+         new_node->next = p;
+         (*q) = new_node;
+
+         p = lockset_table[i];
+         vg_assert(i == n_lockset_table);
+         n_lockset_table++;
+
+#        if DEBUG_NEW_LOCKSETS
+         VG_(printf)("new lockset vector (%d): ", i);
+         print_LockSet(p);
+#        endif
+         
+         goto done;
+
+      } else {
+         /* If this succeeds, the required vector (with the new mutex added)
+          * already exists in the table at position i.  Otherwise, keep
+          * looking. */
+         if (weird_LockSet_equals(lockset_table[thread_locks[tid]],
+                                  lockset_table[i], mutex)) {
+            goto done;
+         }
+      }
+      /* if we get to here, table lockset didn't match the new thread
+       * lockset, so keep looking */
+      i++;
+   }
+
+  done:
+   /* Update the thread's lock vector */
+   thread_locks[tid] = i;
+#  if DEBUG_LOCKS
+   VG_(printf)("tid %u now has lockset %d\n", tid, i);
+#  endif
+
+#  if LOCKSET_SANITY > 1
+   sanity_check_locksets("eraser_post_mutex_lock-OUT");
+#  endif
+
+}
+
+
+static void eraser_post_mutex_unlock(ThreadId tid, void* void_mutex)
+{
+   Int i = 0;
+   pthread_mutex_t* mutex = (pthread_mutex_t*)void_mutex;
+   
+#  if DEBUG_LOCKS
+   VG_(printf)("unlock(%u, %x)\n", tid, mutex);
+#  endif
+
+#  if LOCKSET_SANITY > 1
+   sanity_check_locksets("eraser_post_mutex_unlock-IN");
+#  endif
+
+   // find the lockset that is the current one minus tid, change thread to use
+   // that index.
+   
+   while (True) {
+
+      if (i == n_lockset_table) {
+         /* We can't find a suitable pre-made set, so we'll have to
+            make one. */
+         i = remove ( thread_locks[tid], mutex );
+         break;
+      }
+
+      /* Args are in opposite order to call above, for reverse effect */
+      if (weird_LockSet_equals( lockset_table[i],
+                                lockset_table[thread_locks[tid]], mutex) ) {
+         /* found existing diminished set -- the best outcome. */
+         break;
+      }
+
+      i++;
+   }
+
+   /* Update the thread's lock vector */
+#  if DEBUG_LOCKS
+   VG_(printf)("tid %u reverts from %d to lockset %d\n", 
+               tid, thread_locks[tid], i);
+#  endif
+
+   thread_locks[tid] = i;
+
+#  if LOCKSET_SANITY > 1
+   sanity_check_locksets("eraser_post_mutex_unlock-OUT");
+#  endif
+}
+
+
+/* ---------------------------------------------------------------------
+   Checking memory reads and writes
+   ------------------------------------------------------------------ */
+
+/* Behaviour on reads and writes:
+ *
+ *                      VIR          EXCL        SHAR        SH_MOD
+ * ----------------------------------------------------------------
+ * rd/wr, 1st thread |  -            EXCL        -           -
+ * rd, new thread    |  -            SHAR        -           -
+ * wr, new thread    |  -            SH_MOD      -           -
+ * rd                |  error!       -           SHAR        SH_MOD
+ * wr                |  EXCL         -           SH_MOD      SH_MOD
+ * ----------------------------------------------------------------
+ */
+
+#if 0
+static 
+void dump_around_a(Addr a)
+{
+   UInt i;
+   shadow_word* sword;
+   VG_(printf)("NEARBY:\n");
+   for (i = a - 12; i <= a + 12; i += 4) {
+      sword = get_sword_addr(i); 
+      VG_(printf)("    %x -- tid: %u, state: %u\n", i, sword->other, sword->state);
+   }
+}
+#endif
+
+/* Find which word the first and last bytes are in (by shifting out bottom 2
+ * bits) then find the difference. */
+static __inline__ 
+Int compute_num_words_accessed(Addr a, UInt size) 
+{
+   Int x, y, n_words;
+   x =  a             >> 2;
+   y = (a + size - 1) >> 2;
+   n_words = y - x + 1;
+   return n_words;
+}
+
+
+#if DEBUG_ACCESSES
+   #define DEBUG_STATE(args...)   \
+      VG_(printf)("(%u) ", size), \
+      VG_(printf)(args)
+#else
+   #define DEBUG_STATE(args...)
+#endif
+
+
+static void eraser_mem_read(Addr a, UInt size)
+{
+   shadow_word* sword;
+   ThreadId tid = VG_(get_current_tid_1_if_root)();
+   Addr     end = a + 4*compute_num_words_accessed(a, size);
+
+   for ( ; a < end; a += 4) {
+
+      sword = get_sword_addr(a);
+      if (sword == SEC_MAP_ACCESS) {
+         VG_(printf)("read distinguished 2ndary map! 0x%x\n", a);
+         continue;
+      }
+
+      switch (sword->state) {
+
+      /* This looks like reading of unitialised memory, may be legit.  Eg. 
+       * calloc() zeroes its values, so untouched memory may actually be 
+       * initialised.   Leave that stuff to Valgrind.  */
+      case Vge_Virgin:
+         if (TID_INDICATING_NONVIRGIN == sword->other) {
+            DEBUG_STATE("Read  VIRGIN --> EXCL:   %8x, %u\n", a, tid);
+#           if DEBUG_VIRGIN_READS
+            dump_around_a(a);
+#           endif
+         } else {
+            DEBUG_STATE("Read  SPECIAL --> EXCL:  %8x, %u\n", a, tid);
+         }
+         sword->state = Vge_Excl;
+         sword->other = tid;       /* remember exclusive owner */
+         break;
+
+      case Vge_Excl:
+         if (tid == sword->other) {
+            DEBUG_STATE("Read  EXCL:              %8x, %u\n", a, tid);
+
+         } else {
+            DEBUG_STATE("Read  EXCL(%u) --> SHAR:  %8x, %u\n", sword->other, a, tid);
+            sword->state = Vge_Shar;
+            sword->other = thread_locks[tid];
+#           if DEBUG_MEM_LOCKSET_CHANGES
+            print_LockSet(lockset_table[sword->other]);
+#           endif
+         }
+         break;
+
+      case Vge_Shar:
+         DEBUG_STATE("Read  SHAR:              %8x, %u\n", a, tid);
+         sword->other = intersect(sword->other, thread_locks[tid]);
+         break;
+
+      case Vge_SharMod:
+         DEBUG_STATE("Read  SHAR_MOD:          %8x, %u\n", a, tid);
+         sword->other = intersect(sword->other, thread_locks[tid]);
+
+         if (lockset_table[sword->other] == NULL) {
+            record_eraser_error(tid, a, False /* !is_write */);
+            n_eraser_warnings++;
+         }
+         break;
+
+      default:
+         VG_(panic)("Unknown eraser state");
+      }
+   }
+}
+
+
+static void eraser_mem_write(Addr a, UInt size)
+{
+   shadow_word* sword;
+   ThreadId tid = VG_(get_current_tid_1_if_root)();
+   Addr     end = a + 4*compute_num_words_accessed(a, size);
+
+   for ( ; a < end; a += 4) {
+
+      sword = get_sword_addr(a);
+      if (sword == SEC_MAP_ACCESS) {
+         VG_(printf)("read distinguished 2ndary map! 0x%x\n", a);
+         continue;
+      }
+
+      switch (sword->state) {
+      case Vge_Virgin:
+         if (TID_INDICATING_NONVIRGIN == sword->other)
+            DEBUG_STATE("Write VIRGIN --> EXCL:   %8x, %u\n", a, tid);
+         else
+            DEBUG_STATE("Write SPECIAL --> EXCL:  %8x, %u\n", a, tid);
+         sword->state = Vge_Excl;
+         sword->other = tid;       /* remember exclusive owner */
+         break;
+
+      case Vge_Excl:
+         if (tid == sword->other) {
+            DEBUG_STATE("Write EXCL:              %8x, %u\n", a, tid);
+            break;
+
+         } else {
+            DEBUG_STATE("Write EXCL(%u) --> SHAR_MOD: %8x, %u\n", sword->other, a, tid);
+            sword->state = Vge_SharMod;
+            sword->other = thread_locks[tid];
+#           if DEBUG_MEM_LOCKSET_CHANGES
+            print_LockSet(lockset_table[sword->other]);
+#           endif
+            goto SHARED_MODIFIED;
+         }
+
+      case Vge_Shar:
+         DEBUG_STATE("Write SHAR --> SHAR_MOD: %8x, %u\n", a, tid);
+         sword->state = Vge_SharMod;
+         sword->other = intersect(sword->other, thread_locks[tid]);
+         goto SHARED_MODIFIED;
+
+      case Vge_SharMod:
+         DEBUG_STATE("Write SHAR_MOD:          %8x, %u\n", a, tid);
+         sword->other = intersect(sword->other, thread_locks[tid]);
+         SHARED_MODIFIED:
+         if (lockset_table[sword->other] == NULL) {
+            record_eraser_error(tid, a, True /* is_write */);
+            n_eraser_warnings++;
+         }
+         break;
+
+      default:
+         VG_(panic)("Unknown eraser state");
+      }
+   }
+}
+
+#undef DEBUG_STATE
+
+
+/*--------------------------------------------------------------------*/
+/*--- Setup                                                        ---*/
+/*--------------------------------------------------------------------*/
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* track)
+{
+   Int i;
+
+   needs->name                    = "helgrind";
+   needs->description             = "a data race detector";
+
+   needs->core_errors             = True;
+   needs->skin_errors             = True;
+
+   VG_(register_compact_helper)((Addr) & eraser_mem_read);
+   VG_(register_compact_helper)((Addr) & eraser_mem_write);
+
+   /* Events to track */
+   track->new_mem_startup       = & eraser_new_mem_startup;
+   track->new_mem_heap          = & eraser_new_mem_heap;
+   track->new_mem_stack         = & make_writable;
+   track->new_mem_stack_aligned = & make_writable_aligned;
+   track->new_mem_stack_signal  = & make_writable;
+   track->new_mem_brk           = & make_writable;
+   track->new_mem_mmap          = & eraser_set_perms;
+
+   track->copy_mem_heap         = & copy_address_range_state;
+   track->change_mem_mprotect   = & eraser_set_perms;
+
+   track->ban_mem_heap          = NULL;
+   track->ban_mem_stack         = NULL;
+
+   track->die_mem_heap          = NULL;
+   track->die_mem_stack         = NULL;
+   track->die_mem_stack_aligned = NULL;
+   track->die_mem_stack_signal  = NULL;
+   track->die_mem_brk           = NULL;
+   track->die_mem_munmap        = NULL;
+
+   track->pre_mem_read          = & eraser_pre_mem_read;
+   track->pre_mem_read_asciiz   = & eraser_pre_mem_read_asciiz;
+   track->pre_mem_write         = & eraser_pre_mem_write;
+   track->post_mem_write        = NULL;
+
+   track->post_mutex_lock       = & eraser_post_mutex_lock;
+   track->post_mutex_unlock     = & eraser_post_mutex_unlock;
+
+   /* Init lock table */
+   for (i = 0; i < VG_N_THREADS; i++) 
+      thread_locks[i] = 0 /* the empty lock set */;
+
+   lockset_table[0] = NULL;
+   for (i = 1; i < M_LOCKSET_TABLE; i++) 
+      lockset_table[i] = NULL;
+
+   init_shadow_memory();
+}
+
+
+void SK_(post_clo_init)(void)
+{
+}
+
+
+void SK_(fini)(void)
+{
+#  if DEBUG_LOCK_TABLE
+   pp_all_LockSets();
+#  endif
+#  if LOCKSET_SANITY 
+   sanity_check_locksets("SK_(fini)");
+#  endif
+   VG_(message)(Vg_UserMsg, "%u possible data races found", n_eraser_warnings);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              vg_eraser.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_errcontext.c b/vg_errcontext.c
index 46838b6..f38ade6 100644
--- a/vg_errcontext.c
+++ b/vg_errcontext.c
@@ -25,147 +25,22 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
-
 
 /*------------------------------------------------------------*/
-/*--- Defns                                                ---*/
+/*--- Globals                                              ---*/
 /*------------------------------------------------------------*/
 
-/* Suppression is a type describing an error which we want to
-   suppress, ie, not show the user, usually because it is caused by a
-   problem in a library which we can't fix, replace or work around.
-   Suppressions are read from a file at startup time, specified by
-   vg_clo_suppressions, and placed in the vg_suppressions list.  This
-   gives flexibility so that new suppressions can be added to the file
-   as and when needed. 
-*/
-typedef 
-   enum { 
-      /* Bad syscall params */
-      Param, 
-      /* Use of invalid values of given size */
-      Value0, Value1, Value2, Value4, Value8, 
-      /* Invalid read/write attempt at given size */
-      Addr1, Addr2, Addr4, Addr8,
-      /* Invalid or mismatching free */
-      FreeS,
-      /* Pthreading error */
-      PThread
-   } 
-   SuppressionKind;
-
-
-/* For each caller specified for a suppression, record the nature of
-   the caller name. */
-typedef
-   enum { 
-      /* Name is of an shared object file. */
-      ObjName,
-      /* Name is of a function. */
-      FunName 
-   }
-   SuppressionLocTy;
-
-
-/* A complete suppression record. */
-typedef
-   struct _Suppression {
-      struct _Suppression* next;
-      /* The number of times this error has been suppressed. */
-      Int count;
-      /* The name by which the suppression is referred to. */
-      Char* sname;
-      /* What kind of suppression. */
-      SuppressionKind skind;
-      /* Name of syscall param if skind==Param */
-      Char* param;
-      /* Name of fn where err occurs, and immediate caller (mandatory). */
-      SuppressionLocTy caller0_ty;
-      Char*            caller0;
-      SuppressionLocTy caller1_ty;
-      Char*            caller1;
-      /* Optional extra callers. */
-      SuppressionLocTy caller2_ty;
-      Char*            caller2;
-      SuppressionLocTy caller3_ty;
-      Char*            caller3;
-   } 
-   Suppression;
-
-
-/* ErrContext is a type for recording just enough info to generate an
-   error report for an illegal memory access.  The idea is that
-   (typically) the same few points in the program generate thousands
-   of illegal accesses, and we don't want to spew out a fresh error
-   message for each one.  Instead, we use these structures to common
-   up duplicates.  
-*/
-
-/* What kind of error it is. */
-typedef 
-   enum { ValueErr, AddrErr, 
-          ParamErr, UserErr, /* behaves like an anonymous ParamErr */
-          FreeErr, FreeMismatchErr,
-          PThreadErr /* pthread API error */
-   }
-   ErrKind;
-
-/* What kind of memory access is involved in the error? */
-typedef
-   enum { ReadAxs, WriteAxs, ExecAxs }
-   AxsKind;
-
-/* Top-level struct for recording errors. */
-typedef
-   struct _ErrContext {
-      /* ALL */
-      struct _ErrContext* next;
-      /* ALL */
-      /* NULL if unsuppressed; or ptr to suppression record. */
-      Suppression* supp;
-      /* ALL */
-      Int count;
-      /* ALL */
-      ErrKind ekind;
-      /* ALL */
-      ExeContext* where;
-      /* Addr */
-      AxsKind axskind;
-      /* Addr, Value */
-      Int size;
-      /* Addr, Free, Param, User */
-      Addr addr;
-      /* Addr, Free, Param, User */
-      AddrInfo addrinfo;
-      /* Param; hijacked for PThread as a description */
-      Char* syscall_param;
-      /* Param, User */
-      Bool isWriteableLack;
-      /* ALL */
-      ThreadId tid;
-      /* ALL */
-      /* These record %EIP, %ESP and %EBP at the error point.  They
-         are only used to make GDB-attaching convenient; there is no
-         other purpose; specifically they are not used to do
-         comparisons between errors. */
-      UInt m_eip;
-      UInt m_esp;
-      UInt m_ebp;
-   } 
-   ErrContext;
-
 /* The list of error contexts found, both suppressed and unsuppressed.
    Initially empty, and grows as errors are detected. */
-static ErrContext* vg_err_contexts = NULL;
+static CoreError* vg_errors = NULL;
 
 /* The list of suppression directives, as read from the specified
    suppressions file. */
-static Suppression* vg_suppressions = NULL;
+static CoreSupp* vg_suppressions = NULL;
 
 /* Running count of unsuppressed errors detected. */
 static UInt vg_n_errs_found = 0;
@@ -173,265 +48,76 @@
 /* Running count of suppressed errors detected. */
 static UInt vg_n_errs_suppressed = 0;
 
-/* Used to disable further error reporting once some huge number of
-   errors have already been logged. */
-static Bool vg_ignore_errors = False;
-
 /* forwards ... */
-static Suppression* is_suppressible_error ( ErrContext* ec );
+static CoreSupp* is_suppressible_error ( CoreError* err );
 
 
 /*------------------------------------------------------------*/
 /*--- Helper fns                                           ---*/
 /*------------------------------------------------------------*/
 
-
-static void clear_AddrInfo ( AddrInfo* ai )
-{
-   ai->akind      = Unknown;
-   ai->blksize    = 0;
-   ai->rwoffset   = 0;
-   ai->lastchange = NULL;
-   ai->stack_tid  = VG_INVALID_THREADID;
-   ai->maybe_gcc  = False;
-}
-
-static void clear_ErrContext ( ErrContext* ec )
-{
-   ec->next    = NULL;
-   ec->supp    = NULL;
-   ec->count   = 0;
-   ec->ekind   = ValueErr;
-   ec->where   = NULL;
-   ec->axskind = ReadAxs;
-   ec->size    = 0;
-   ec->addr    = 0;
-   clear_AddrInfo ( &ec->addrinfo );
-   ec->syscall_param   = NULL;
-   ec->isWriteableLack = False;
-   ec->m_eip   = 0xDEADB00F;
-   ec->m_esp   = 0xDEADBE0F;
-   ec->m_ebp   = 0xDEADB0EF;
-   ec->tid     = VG_INVALID_THREADID;
-}
-
-
-static __inline__
-Bool vg_eq_ExeContext ( Bool top_2_only,
-                        ExeContext* e1, ExeContext* e2 )
-{
-   /* Note that frames after the 4th are always ignored. */
-   if (top_2_only) {
-      return VG_(eq_ExeContext_top2(e1, e2));
-   } else {
-      return VG_(eq_ExeContext_top4(e1, e2));
-   }
-}
-
-
-static Bool eq_AddrInfo ( Bool cheap_addr_cmp,
-                          AddrInfo* ai1, AddrInfo* ai2 )
-{
-   if (ai1->akind != Undescribed 
-       && ai2->akind != Undescribed
-       && ai1->akind != ai2->akind) 
-      return False;
-   if (ai1->akind == Freed || ai1->akind == Mallocd) {
-      if (ai1->blksize != ai2->blksize)
-         return False;
-      if (!vg_eq_ExeContext(cheap_addr_cmp, 
-                            ai1->lastchange, ai2->lastchange))
-         return False;
-   }
-   return True;
-}
-
 /* Compare error contexts, to detect duplicates.  Note that if they
    are otherwise the same, the faulting addrs and associated rwoffsets
    are allowed to be different.  */
-
-static Bool eq_ErrContext ( Bool cheap_addr_cmp,
-                            ErrContext* e1, ErrContext* e2 )
+static Bool eq_CoreError ( VgRes res, CoreError* e1, CoreError* e2 )
 {
-   if (e1->ekind != e2->ekind) 
+   if (e1->skin_err.ekind != e2->skin_err.ekind) 
       return False;
-   if (!vg_eq_ExeContext(cheap_addr_cmp, e1->where, e2->where))
+   if (!VG_(eq_ExeContext)(res, e1->where, e2->where))
       return False;
 
-   switch (e1->ekind) {
+   switch (e1->skin_err.ekind) {
       case PThreadErr:
-         if (e1->syscall_param == e2->syscall_param) 
+         vg_assert(VG_(needs).core_errors);
+         if (e1->skin_err.string == e2->skin_err.string) 
             return True;
-         if (0 == VG_(strcmp)(e1->syscall_param, e2->syscall_param))
+         if (0 == VG_(strcmp)(e1->skin_err.string, e2->skin_err.string))
             return True;
          return False;
-      case UserErr:
-      case ParamErr:
-         if (e1->isWriteableLack != e2->isWriteableLack) return False;
-         if (e1->ekind == ParamErr 
-             && 0 != VG_(strcmp)(e1->syscall_param, e2->syscall_param))
-            return False;
-         return True;
-      case FreeErr:
-      case FreeMismatchErr:
-         if (e1->addr != e2->addr) return False;
-         if (!eq_AddrInfo(cheap_addr_cmp, &e1->addrinfo, &e2->addrinfo)) 
-            return False;
-         return True;
-      case AddrErr:
-         if (e1->axskind != e2->axskind) return False;
-         if (e1->size != e2->size) return False;
-         if (!eq_AddrInfo(cheap_addr_cmp, &e1->addrinfo, &e2->addrinfo)) 
-            return False;
-         return True;
-      case ValueErr:
-         if (e1->size != e2->size) return False;
-         return True;
       default: 
-         VG_(panic)("eq_ErrContext");
+         if (VG_(needs).skin_errors)
+            return SK_(eq_SkinError)(res, &e1->skin_err, &e2->skin_err);
+         else {
+            VG_(printf)("\nUnhandled error type: %u. VG_(needs).skin_errors\n"
+                        "probably needs to be set.\n",
+                        e1->skin_err.ekind);
+            VG_(skin_error)("unhandled error type");
+         }
    }
 }
 
-static void pp_AddrInfo ( Addr a, AddrInfo* ai )
+static void pp_CoreError ( CoreError* err, Bool printCount )
 {
-   switch (ai->akind) {
-      case Stack: 
-         VG_(message)(Vg_UserMsg, 
-                      "   Address 0x%x is on thread %d's stack", 
-                      a, ai->stack_tid);
-         break;
-      case Unknown:
-         if (ai->maybe_gcc) {
-            VG_(message)(Vg_UserMsg, 
-               "   Address 0x%x is just below %%esp.  Possibly a bug in GCC/G++",
-               a);
-            VG_(message)(Vg_UserMsg, 
-               "   v 2.96 or 3.0.X.  To suppress, use: --workaround-gcc296-bugs=yes");
-	 } else {
-            VG_(message)(Vg_UserMsg, 
-               "   Address 0x%x is not stack'd, malloc'd or free'd", a);
-         }
-         break;
-      case Freed: case Mallocd: case UserG: case UserS: {
-         UInt delta;
-         UChar* relative;
-         if (ai->rwoffset < 0) {
-            delta    = (UInt)(- ai->rwoffset);
-            relative = "before";
-         } else if (ai->rwoffset >= ai->blksize) {
-            delta    = ai->rwoffset - ai->blksize;
-            relative = "after";
-         } else {
-            delta    = ai->rwoffset;
-            relative = "inside";
-         }
-         if (ai->akind == UserS) {
-            VG_(message)(Vg_UserMsg, 
-               "   Address 0x%x is %d bytes %s a %d-byte stack red-zone created",
-               a, delta, relative, 
-               ai->blksize );
-	 } else {
-            VG_(message)(Vg_UserMsg, 
-               "   Address 0x%x is %d bytes %s a block of size %d %s",
-               a, delta, relative, 
-               ai->blksize,
-               ai->akind==Mallocd ? "alloc'd" 
-                  : ai->akind==Freed ? "free'd" 
-                                     : "client-defined");
-         }
-         VG_(pp_ExeContext)(ai->lastchange);
-         break;
-      }
-      default:
-         VG_(panic)("pp_AddrInfo");
+   /* Closure for printing where the error occurred.  Abstracts details
+      about the `where' field away from the skin. */
+   void pp_ExeContextClosure(void)
+   {
+      VG_(pp_ExeContext) ( err->where );
    }
-}
-
-static void pp_ErrContext ( ErrContext* ec, Bool printCount )
-{
+   
    if (printCount)
-      VG_(message)(Vg_UserMsg, "Observed %d times:", ec->count );
-   if (ec->tid > 1)
-      VG_(message)(Vg_UserMsg, "Thread %d:", ec->tid );
-   switch (ec->ekind) {
-      case ValueErr:
-         if (ec->size == 0) {
-             VG_(message)(
-                Vg_UserMsg,
-                "Conditional jump or move depends on uninitialised value(s)");
-         } else {
-             VG_(message)(Vg_UserMsg,
-                          "Use of uninitialised value of size %d",
-                          ec->size);
-         }
-         VG_(pp_ExeContext)(ec->where);
-         break;
-      case AddrErr:
-         switch (ec->axskind) {
-            case ReadAxs:
-               VG_(message)(Vg_UserMsg, "Invalid read of size %d", 
-                                        ec->size ); 
-               break;
-            case WriteAxs:
-               VG_(message)(Vg_UserMsg, "Invalid write of size %d", 
-                                        ec->size ); 
-               break;
-            case ExecAxs:
-               VG_(message)(Vg_UserMsg, "Jump to the invalid address "
-                                        "stated on the next line");
-               break;
-            default: 
-               VG_(panic)("pp_ErrContext(axskind)");
-         }
-         VG_(pp_ExeContext)(ec->where);
-         pp_AddrInfo(ec->addr, &ec->addrinfo);
-         break;
-      case FreeErr:
-         VG_(message)(Vg_UserMsg,"Invalid free() / delete / delete[]");
-         /* fall through */
-      case FreeMismatchErr:
-         if (ec->ekind == FreeMismatchErr)
-            VG_(message)(Vg_UserMsg, 
-                         "Mismatched free() / delete / delete []");
-         VG_(pp_ExeContext)(ec->where);
-         pp_AddrInfo(ec->addr, &ec->addrinfo);
-         break;
-      case ParamErr:
-         if (ec->isWriteableLack) {
-            VG_(message)(Vg_UserMsg, 
-               "Syscall param %s contains unaddressable byte(s)",
-                ec->syscall_param );
-         } else {
-            VG_(message)(Vg_UserMsg, 
-                "Syscall param %s contains uninitialised or "
-                "unaddressable byte(s)",
-            ec->syscall_param);
-         }
-         VG_(pp_ExeContext)(ec->where);
-         pp_AddrInfo(ec->addr, &ec->addrinfo);
-         break;
-      case UserErr:
-         if (ec->isWriteableLack) {
-            VG_(message)(Vg_UserMsg, 
-               "Unaddressable byte(s) found during client check request");
-         } else {
-            VG_(message)(Vg_UserMsg, 
-               "Uninitialised or "
-               "unaddressable byte(s) found during client check request");
-         }
-         VG_(pp_ExeContext)(ec->where);
-         pp_AddrInfo(ec->addr, &ec->addrinfo);
-         break;
+      VG_(message)(Vg_UserMsg, "Observed %d times:", err->count );
+   if (err->tid > 1)
+      VG_(message)(Vg_UserMsg, "Thread %d:", err->tid );
+
+   switch (err->skin_err.ekind) {
       case PThreadErr:
-         VG_(message)(Vg_UserMsg, "%s", ec->syscall_param );
-         VG_(pp_ExeContext)(ec->where);
+         vg_assert(VG_(needs).core_errors);
+         VG_(message)(Vg_UserMsg, "%s", err->skin_err.string );
+         VG_(pp_ExeContext)(err->where);
          break;
       default: 
-         VG_(panic)("pp_ErrContext");
+         if (VG_(needs).skin_errors)
+            SK_(pp_SkinError)( &err->skin_err, &pp_ExeContextClosure );
+         else {
+            VG_(printf)("\nUnhandled error type: %u.  VG_(needs).skin_errors\n"
+                        "probably needs to be set?\n",
+                        err->skin_err.ekind);
+            VG_(skin_error)("unhandled error type");
+         }
    }
 }
 
-
 /* Figure out if we want to attach for GDB for this error, possibly
    by asking the user. */
 static
@@ -476,21 +162,69 @@
 }
 
 
-/* Top-level entry point to the error management subsystem.  All
-   detected errors are notified here; this routine decides if/when the
-   user should see the error. */
-static void VG_(maybe_add_context) ( ErrContext* ec )
+/* I've gone all object-oriented... initialisation depends on where the
+   error comes from:
+
+   - If from generated code (tst == NULL), the %EIP/%EBP values that we
+     need in order to create proper error messages are picked up out of
+     VG_(baseBlock) rather than from the thread table (vg_threads in
+     vg_scheduler.c).
+
+   - If not from generated code but in response to requests passed back to
+     the scheduler (tst != NULL), we pick up %EIP/%EBP values from the
+     stored thread state, not from VG_(baseBlock).  
+*/
+static __inline__
+void construct_error ( CoreError* err, ThreadState* tst, 
+                       ErrorKind ekind, Addr a, Char* s, void* extra )
 {
-   ErrContext* p;
-   ErrContext* p_prev;
-   Bool        cheap_addr_cmp         = False;
+   /* CoreError parts */
+   err->next     = NULL;
+   err->supp     = NULL;
+   err->count    = 1;
+   if (NULL == tst) {
+      err->tid   = VG_(get_current_tid)();
+      err->where = 
+         VG_(get_ExeContext2)( VG_(baseBlock)[VGOFF_(m_eip)], 
+                               VG_(baseBlock)[VGOFF_(m_ebp)],
+                               VG_(baseBlock)[VGOFF_(m_esp)],
+                               VG_(threads)[err->tid].stack_highest_word);
+      err->m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
+      err->m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
+      err->m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
+   } else {
+      err->where = VG_(get_ExeContext) ( tst );
+      err->tid   = tst->tid;
+      err->m_eip = tst->m_eip;
+      err->m_esp = tst->m_esp;
+      err->m_ebp = tst->m_ebp;
+   }
+
+   /* SkinError parts */
+   err->skin_err.ekind  = ekind;
+   err->skin_err.addr   = a;
+   err->skin_err.string = s;
+   err->skin_err.extra  = extra;
+
+   /* sanity... */
+   vg_assert(err->tid >= 0 && err->tid < VG_N_THREADS);
+}
+
+/* Top-level entry point to the error management subsystem.
+   All detected errors are notified here; this routine decides if/when the
+   user should see the error. */
+void VG_(maybe_record_error) ( ThreadState* tst, 
+                               ErrorKind ekind, Addr a, Char* s, void* extra )
+{
+   CoreError   err;
+   CoreError*  p;
+   CoreError*  p_prev;
+   VgRes       exe_res                = Vg_MedRes;
    static Bool is_first_shown_context = True;
    static Bool stopping_message       = False;
    static Bool slowdown_message       = False;
    static Int  vg_n_errs_shown        = 0;
 
-   vg_assert(ec->tid >= 0 && ec->tid < VG_N_THREADS);
-
    /* After M_VG_COLLECT_NO_ERRORS_AFTER_SHOWN different errors have
       been found, or M_VG_COLLECT_NO_ERRORS_AFTER_FOUND total errors
       have been found, just refuse to collect any more.  This stops
@@ -520,12 +254,11 @@
          VG_(message)(Vg_UserMsg, 
             "Rerun with --error-limit=no to disable this cutoff.  Note");
          VG_(message)(Vg_UserMsg, 
-            "that your program may now segfault without prior warning from");
+            "that errors may occur in your program without prior warning from");
          VG_(message)(Vg_UserMsg, 
             "Valgrind, because errors are no longer being displayed.");
          VG_(message)(Vg_UserMsg, "");
          stopping_message = True;
-         vg_ignore_errors = True;
       }
       return;
    }
@@ -534,7 +267,7 @@
       been found, be much more conservative about collecting new
       ones. */
    if (vg_n_errs_shown >= M_VG_COLLECT_ERRORS_SLOWLY_AFTER) {
-      cheap_addr_cmp = True;
+      exe_res = Vg_LowRes;
       if (!slowdown_message) {
          VG_(message)(Vg_UserMsg, "");
          VG_(message)(Vg_UserMsg, 
@@ -546,12 +279,14 @@
       }
    }
 
+   /* Build ourselves the error */
+   construct_error ( &err, tst, ekind, a, s, extra );
 
    /* First, see if we've got an error record matching this one. */
-   p      = vg_err_contexts;
+   p      = vg_errors;
    p_prev = NULL;
    while (p != NULL) {
-      if (eq_ErrContext(cheap_addr_cmp, p, ec)) {
+      if (eq_CoreError(exe_res, p, &err)) {
          /* Found it. */
          p->count++;
 	 if (p->supp != NULL) {
@@ -567,8 +302,8 @@
          if (p_prev != NULL) {
             vg_assert(p_prev->next == p);
             p_prev->next    = p->next;
-            p->next         = vg_err_contexts;
-            vg_err_contexts = p;
+            p->next         = vg_errors;
+            vg_errors = p;
 	 }
          return;
       }
@@ -578,27 +313,37 @@
 
    /* Didn't see it.  Copy and add. */
 
-   /* OK, we're really going to collect it.  First, describe any addr
-      info in the error. */
-   if (ec->addrinfo.akind == Undescribed)
-      VG_(describe_addr) ( ec->addr, &ec->addrinfo );
+   /* OK, we're really going to collect it.  First make a copy,
+      because the error context is on the stack and will disappear shortly.
+      We can duplicate the main part ourselves, but use
+      SK_(dup_extra_and_update) to duplicate the 'extra' part (unless it's
+      NULL).
+     
+      SK_(dup_extra_and_update) can also update the SkinError.  This is
+      for when there are more details to fill in which take time to work out
+      but don't affect our earlier decision to include the error -- by
+      postponing those details until now, we avoid the extra work in the
+      case where we ignore the error.
+    */
+   p = VG_(arena_malloc)(VG_AR_ERRORS, sizeof(CoreError));
+   *p = err;
+   if (NULL != err.skin_err.extra)
+      SK_(dup_extra_and_update)(&p->skin_err);
 
-   p = VG_(malloc)(VG_AR_ERRCTXT, sizeof(ErrContext));
-   *p = *ec;
-   p->next = vg_err_contexts;
-   p->supp = is_suppressible_error(ec);
-   vg_err_contexts = p;
+   p->next = vg_errors;
+   p->supp = is_suppressible_error(&err);
+   vg_errors = p;
    if (p->supp == NULL) {
       vg_n_errs_found++;
       if (!is_first_shown_context)
          VG_(message)(Vg_UserMsg, "");
-      pp_ErrContext(p, False);      
+      pp_CoreError(p, False);      
       is_first_shown_context = False;
       vg_n_errs_shown++;
       /* Perhaps we want a GDB attach at this point? */
       if (vg_is_GDB_attach_requested()) {
          VG_(swizzle_esp_then_start_GDB)(
-            ec->m_eip, ec->m_esp, ec->m_ebp);
+            err.m_eip, err.m_esp, err.m_ebp);
       }
    } else {
       vg_n_errs_suppressed++;
@@ -607,202 +352,34 @@
 }
 
 
-
-
 /*------------------------------------------------------------*/
 /*--- Exported fns                                         ---*/
 /*------------------------------------------------------------*/
 
-/* These two are called from generated code, so that the %EIP/%EBP
-   values that we need in order to create proper error messages are
-   picked up out of VG_(baseBlock) rather than from the thread table
-   (vg_threads in vg_scheduler.c). */
+/* These are called not from generated code but from the scheduler */
 
-void VG_(record_value_error) ( Int size )
+void VG_(record_pthread_error) ( ThreadId tid, Char* msg )
 {
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count = 1;
-   ec.next  = NULL;
-   ec.where = VG_(get_ExeContext)( False, VG_(baseBlock)[VGOFF_(m_eip)], 
-                                          VG_(baseBlock)[VGOFF_(m_ebp)] );
-   ec.ekind = ValueErr;
-   ec.size  = size;
-   ec.tid   = VG_(get_current_tid)();
-   ec.m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
-   ec.m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   ec.m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
-   VG_(maybe_add_context) ( &ec );
+   if (! VG_(needs).core_errors) return;
+   VG_(maybe_record_error)( &VG_(threads)[tid], PThreadErr, /*addr*/0, msg, 
+                            /*extra*/NULL );
 }
 
-void VG_(record_address_error) ( Addr a, Int size, Bool isWrite )
-{
-   ErrContext ec;
-   Bool       just_below_esp;
-   if (vg_ignore_errors) return;
-
-   just_below_esp 
-      = VG_(is_just_below_ESP)( VG_(baseBlock)[VGOFF_(m_esp)], a );
-
-   /* If this is caused by an access immediately below %ESP, and the
-      user asks nicely, we just ignore it. */
-   if (VG_(clo_workaround_gcc296_bugs) && just_below_esp)
-      return;
-
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, VG_(baseBlock)[VGOFF_(m_eip)], 
-                                            VG_(baseBlock)[VGOFF_(m_ebp)] );
-   ec.ekind   = AddrErr;
-   ec.axskind = isWrite ? WriteAxs : ReadAxs;
-   ec.size    = size;
-   ec.addr    = a;
-   ec.tid     = VG_(get_current_tid)();
-   ec.m_eip = VG_(baseBlock)[VGOFF_(m_eip)];
-   ec.m_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   ec.m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)];
-   ec.addrinfo.akind     = Undescribed;
-   ec.addrinfo.maybe_gcc = just_below_esp;
-   VG_(maybe_add_context) ( &ec );
-}
-
-
-/* These five are called not from generated code but in response to
-   requests passed back to the scheduler.  So we pick up %EIP/%EBP
-   values from the stored thread state, not from VG_(baseBlock).  */
-
-void VG_(record_free_error) ( ThreadState* tst, Addr a )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp );
-   ec.ekind   = FreeErr;
-   ec.addr    = a;
-   ec.tid     = tst->tid;
-   ec.m_eip   = tst->m_eip;
-   ec.m_esp   = tst->m_esp;
-   ec.m_ebp   = tst->m_ebp;
-   ec.addrinfo.akind = Undescribed;
-   VG_(maybe_add_context) ( &ec );
-}
-
-void VG_(record_freemismatch_error) ( ThreadState* tst, Addr a )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp );
-   ec.ekind   = FreeMismatchErr;
-   ec.addr    = a;
-   ec.tid     = tst->tid;
-   ec.m_eip   = tst->m_eip;
-   ec.m_esp   = tst->m_esp;
-   ec.m_ebp   = tst->m_ebp;
-   ec.addrinfo.akind = Undescribed;
-   VG_(maybe_add_context) ( &ec );
-}
-
-void VG_(record_jump_error) ( ThreadState* tst, Addr a )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp );
-   ec.ekind   = AddrErr;
-   ec.axskind = ExecAxs;
-   ec.addr    = a;
-   ec.tid     = tst->tid;
-   ec.m_eip   = tst->m_eip;
-   ec.m_esp   = tst->m_esp;
-   ec.m_ebp   = tst->m_ebp;
-   ec.addrinfo.akind = Undescribed;
-   VG_(maybe_add_context) ( &ec );
-}
-
-void VG_(record_param_err) ( ThreadState* tst, Addr a, Bool isWriteLack, 
-                             Char* msg )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp );
-   ec.ekind   = ParamErr;
-   ec.addr    = a;
-   ec.tid     = tst->tid;
-   ec.m_eip   = tst->m_eip;
-   ec.m_esp   = tst->m_esp;
-   ec.m_ebp   = tst->m_ebp;
-   ec.addrinfo.akind = Undescribed;
-   ec.syscall_param = msg;
-   ec.isWriteableLack = isWriteLack;
-   VG_(maybe_add_context) ( &ec );
-}
-
-void VG_(record_user_err) ( ThreadState* tst, Addr a, Bool isWriteLack )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp );
-   ec.ekind   = UserErr;
-   ec.addr    = a;
-   ec.tid     = tst->tid;
-   ec.m_eip   = tst->m_eip;
-   ec.m_esp   = tst->m_esp;
-   ec.m_ebp   = tst->m_ebp;
-   ec.addrinfo.akind = Undescribed;
-   ec.isWriteableLack = isWriteLack;
-   VG_(maybe_add_context) ( &ec );
-}
-
-void VG_(record_pthread_err) ( ThreadId tid, Char* msg )
-{
-   ErrContext ec;
-   if (vg_ignore_errors) return;
-   if (!VG_(clo_instrument)) return;
-   clear_ErrContext( &ec );
-   ec.count   = 1;
-   ec.next    = NULL;
-   ec.where   = VG_(get_ExeContext)( False, VG_(threads)[tid].m_eip, 
-                                            VG_(threads)[tid].m_ebp );
-   ec.ekind   = PThreadErr;
-   ec.tid     = tid;
-   ec.syscall_param = msg;
-   ec.m_eip   = VG_(threads)[tid].m_eip;
-   ec.m_esp   = VG_(threads)[tid].m_esp;
-   ec.m_ebp   = VG_(threads)[tid].m_ebp;
-   VG_(maybe_add_context) ( &ec );
-}
-
-
 /*------------------------------*/
 
 void VG_(show_all_errors) ( void )
 {
-   Int         i, n_min;
-   Int         n_err_contexts, n_supp_contexts;
-   ErrContext  *p, *p_min;
-   Suppression *su;
-   Bool        any_supp;
+   Int        i, n_min;
+   Int        n_err_contexts, n_supp_contexts;
+   CoreError *p, *p_min;
+   CoreSupp   *su;
+   Bool       any_supp;
 
    if (VG_(clo_verbosity) == 0)
       return;
 
    n_err_contexts = 0;
-   for (p = vg_err_contexts; p != NULL; p = p->next) {
+   for (p = vg_errors; p != NULL; p = p->next) {
       if (p->supp == NULL)
          n_err_contexts++;
    }
@@ -826,20 +403,20 @@
    for (i = 0; i < n_err_contexts; i++) {
       n_min = (1 << 30) - 1;
       p_min = NULL;
-      for (p = vg_err_contexts; p != NULL; p = p->next) {
+      for (p = vg_errors; p != NULL; p = p->next) {
          if (p->supp != NULL) continue;
          if (p->count < n_min) {
             n_min = p->count;
             p_min = p;
          }
       }
-      if (p_min == NULL) VG_(panic)("pp_AllErrContexts");
+      if (p_min == NULL) VG_(panic)("show_all_errors()");
 
       VG_(message)(Vg_UserMsg, "");
       VG_(message)(Vg_UserMsg, "%d errors in context %d of %d:",
                    p_min->count,
                    i+1, n_err_contexts);
-      pp_ErrContext( p_min, False );
+      pp_CoreError( p_min, False );
 
       if ((i+1 == VG_(clo_dump_error))) {
 	VG_(translate) ( 0 /* dummy ThreadId; irrelevant due to below NULLs */,
@@ -855,8 +432,7 @@
    for (su = vg_suppressions; su != NULL; su = su->next) {
       if (su->count > 0) {
          any_supp = True;
-         VG_(message)(Vg_DebugMsg, "supp: %4d %s", su->count, 
-                                   su->sname);
+         VG_(message)(Vg_DebugMsg, "supp: %4d %s", su->count, su->sname);
       }
    }
 
@@ -883,7 +459,7 @@
 
 #define VG_ISSPACE(ch) (((ch)==' ') || ((ch)=='\n') || ((ch)=='\t'))
 
-static Bool getLine ( Int fd, Char* buf, Int nBuf )
+Bool VG_(getLine) ( Int fd, Char* buf, Int nBuf )
 {
    Char ch;
    Int  n, i;
@@ -924,7 +500,7 @@
    (fun: or obj:) part.
    Returns False if failed.
 */
-static Bool setLocationTy ( Char** p_caller, SuppressionLocTy* p_ty )
+static Bool setLocationTy ( Char** p_caller, SuppLocTy* p_ty )
 {
    if (VG_(strncmp)(*p_caller, "fun:", 4) == 0) {
       (*p_caller) += 4;
@@ -948,107 +524,95 @@
 #define STREQ(s1,s2) (s1 != NULL && s2 != NULL \
                       && VG_(strcmp)((s1),(s2))==0)
 
-static Char* copyStr ( Char* str )
-{
-   Int   n, i;
-   Char* str2;
-   n    = VG_(strlen)(str);
-   str2 = VG_(malloc)(VG_AR_PRIVATE, n+1);
-   vg_assert(n > 0);
-   for (i = 0; i < n+1; i++) str2[i] = str[i];
-   return str2;
-}
-
 static void load_one_suppressions_file ( Char* filename )
 {
 #  define N_BUF 200
-   Int  fd;
+   Int  fd, i;
    Bool eof;
+   Bool is_unrecognised_suppressions = False;
    Char buf[N_BUF+1];
-   fd = VG_(open_read)( filename );
+   fd = VG_(open)( filename, VKI_O_RDONLY, 0 );
    if (fd == -1) {
-      VG_(message)(Vg_UserMsg, 
-                   "FATAL: can't open suppressions file `%s'", 
+      VG_(message)(Vg_UserMsg, "FATAL: can't open suppressions file `%s'", 
                    filename );
       VG_(exit)(1);
    }
 
    while (True) {
-      Suppression* supp;
-      supp = VG_(malloc)(VG_AR_PRIVATE, sizeof(Suppression));
+      /* Assign and initialise the two suppression halves (core and skin) */
+      CoreSupp* supp;
+      supp            = VG_(arena_malloc)(VG_AR_CORE, sizeof(CoreSupp));
       supp->count = 0;
-      supp->param = supp->caller0 = supp->caller1 
-                  = supp->caller2 = supp->caller3 = NULL;
+      for (i = 0; i < VG_N_SUPP_CALLERS; i++) supp->caller[i] = NULL;
+      supp->skin_supp.string = supp->skin_supp.extra = NULL;
 
-      eof = getLine ( fd, buf, N_BUF );
+      eof = VG_(getLine) ( fd, buf, N_BUF );
       if (eof) break;
 
       if (!STREQ(buf, "{")) goto syntax_error;
       
-      eof = getLine ( fd, buf, N_BUF );
+      eof = VG_(getLine) ( fd, buf, N_BUF );
       if (eof || STREQ(buf, "}")) goto syntax_error;
-      supp->sname = copyStr(buf);
+      supp->sname = VG_(arena_strdup)(VG_AR_CORE, buf);
 
-      eof = getLine ( fd, buf, N_BUF );
+      eof = VG_(getLine) ( fd, buf, N_BUF );
+
       if (eof) goto syntax_error;
-      else if (STREQ(buf, "Param"))  supp->skind = Param;
-      else if (STREQ(buf, "Value0")) supp->skind = Value0; /* backwards compat */
-      else if (STREQ(buf, "Cond"))   supp->skind = Value0;
-      else if (STREQ(buf, "Value1")) supp->skind = Value1;
-      else if (STREQ(buf, "Value2")) supp->skind = Value2;
-      else if (STREQ(buf, "Value4")) supp->skind = Value4;
-      else if (STREQ(buf, "Value8")) supp->skind = Value8;
-      else if (STREQ(buf, "Addr1"))  supp->skind = Addr1;
-      else if (STREQ(buf, "Addr2"))  supp->skind = Addr2;
-      else if (STREQ(buf, "Addr4"))  supp->skind = Addr4;
-      else if (STREQ(buf, "Addr8"))  supp->skind = Addr8;
-      else if (STREQ(buf, "Free"))   supp->skind = FreeS;
-      else if (STREQ(buf, "PThread")) supp->skind = PThread;
-      else goto syntax_error;
 
-      if (supp->skind == Param) {
-         eof = getLine ( fd, buf, N_BUF );
-         if (eof) goto syntax_error;
-         supp->param = copyStr(buf);
+      /* Is it a core suppression? */
+      else if (VG_(needs).core_errors && STREQ(buf, "PThread")) 
+         supp->skin_supp.skind = PThreadSupp;
+
+      /* Is it a skin suppression? */
+      else if (VG_(needs).skin_errors && 
+               SK_(recognised_suppression)(buf, &(supp->skin_supp.skind))) {
+         /* do nothing, function fills in supp->skin_supp.skind */
+      }
+      //else goto syntax_error;
+      else {
+         /* SSS: if we don't recognise the suppression name, ignore entire
+          * entry.  Not sure if this is a good long-term approach -- makes
+          * it impossible to spot incorrect suppression names?  (apart
+          * from the warning given) */
+         if (! is_unrecognised_suppressions) {
+            is_unrecognised_suppressions = True;
+            VG_(start_msg)(Vg_DebugMsg);
+            VG_(add_to_msg)("Ignoring unrecognised suppressions: ");
+            VG_(add_to_msg)("'%s'", buf);
+         } else {
+            VG_(add_to_msg)(", '%s'", buf);
+         }
+         while (True) {
+            eof = VG_(getLine) ( fd, buf, N_BUF );
+            if (eof) goto syntax_error;
+            if (STREQ(buf, "}"))
+               break;
+         }
+         continue;
       }
 
-      eof = getLine ( fd, buf, N_BUF );
-      if (eof) goto syntax_error;
-      supp->caller0 = copyStr(buf);
-      if (!setLocationTy(&(supp->caller0), &(supp->caller0_ty)))
+      if (VG_(needs).skin_errors && 
+          !SK_(read_extra_suppression_info)(fd, buf, N_BUF, &supp->skin_supp)) 
          goto syntax_error;
 
-      eof = getLine ( fd, buf, N_BUF );
-      if (eof) goto syntax_error;
-      if (!STREQ(buf, "}")) {
-         supp->caller1 = copyStr(buf);
-         if (!setLocationTy(&(supp->caller1), &(supp->caller1_ty)))
-            goto syntax_error;
-      
-         eof = getLine ( fd, buf, N_BUF );
+      /* "i > 0" ensures at least one caller read. */
+      for (i = 0; i < VG_N_SUPP_CALLERS; i++) {
+         eof = VG_(getLine) ( fd, buf, N_BUF );
          if (eof) goto syntax_error;
-         if (!STREQ(buf, "}")) {
-            supp->caller2 = copyStr(buf);
-            if (!setLocationTy(&(supp->caller2), &(supp->caller2_ty)))
-               goto syntax_error;
-
-            eof = getLine ( fd, buf, N_BUF );
-            if (eof) goto syntax_error;
-            if (!STREQ(buf, "}")) {
-               supp->caller3 = copyStr(buf);
-              if (!setLocationTy(&(supp->caller3), &(supp->caller3_ty)))
-                 goto syntax_error;
-
-               eof = getLine ( fd, buf, N_BUF );
-               if (eof || !STREQ(buf, "}")) goto syntax_error;
-	    }
-         }
+         if (i > 0 && STREQ(buf, "}")) 
+            break;
+         supp->caller[i] = VG_(arena_strdup)(VG_AR_CORE, buf);
+         if (!setLocationTy(&(supp->caller[i]), &(supp->caller_ty[i])))
+            goto syntax_error;
       }
 
       supp->next = vg_suppressions;
       vg_suppressions = supp;
    }
-
+   if (is_unrecognised_suppressions) {
+      /* Print out warning about any ignored suppressions */
+      //VG_(end_msg)();
+   }
    VG_(close)(fd);
    return;
 
@@ -1083,148 +647,102 @@
    }
 }
 
+/* Return the name of an erring fn in a way which is useful
+   for comparing against the contents of a suppressions file. 
+   Doesn't demangle the fn name, because we want to refer to 
+   mangled names in the suppressions file.
+*/    
+static
+void get_objname_fnname ( Addr a,
+                          Char* obj_buf, Int n_obj_buf,
+                          Char* fun_buf, Int n_fun_buf )
+{     
+   (void)VG_(get_objname)          ( a, obj_buf, n_obj_buf );
+   (void)VG_(get_fnname_nodemangle)( a, fun_buf, n_fun_buf );
+}     
+
+static __inline__
+Bool supp_matches_error(CoreSupp* su, CoreError* err)
+{
+   switch (su->skin_supp.skind) {
+      case PThreadSupp:
+         return (err->skin_err.ekind == PThreadErr);
+      default:
+         if (VG_(needs).skin_errors) {
+            return (SK_(error_matches_suppression)(&err->skin_err, 
+                                                    &su->skin_supp));
+         } else {
+            VG_(printf)(
+               "\nUnhandled suppression type: %u.  VG_(needs).skin_errors\n"
+               "probably needs to be set.\n",
+               err->skin_err.ekind);
+            VG_(skin_error)("unhandled suppression type");
+         }
+   }
+}
+
+static __inline__
+Bool supp_matches_callers(CoreSupp* su, Char caller_obj[][M_VG_ERRTXT], 
+                                        Char caller_fun[][M_VG_ERRTXT])
+{
+   Int i;
+
+   for (i = 0; su->caller[i] != NULL; i++) {
+      switch (su->caller_ty[i]) {
+         case ObjName: if (VG_(stringMatch)(su->caller[i],
+                                            caller_obj[i])) break;
+                       return False;
+         case FunName: if (VG_(stringMatch)(su->caller[i], 
+                                            caller_fun[i])) break;
+                       return False;
+         default: VG_(panic)("is_suppressible_error");
+      }
+   }
+
+   /* If we reach here, it's a match */
+   return True;
+}
 
 /* Does an error context match a suppression?  ie is this a
-   suppressible error?  If so, return a pointer to the Suppression
+   suppressible error?  If so, return a pointer to the CoreSupp
    record, otherwise NULL.
-   Tries to minimise the number of calls to what_fn_is_this since they
-   are expensive.  
+   Tries to minimise the number of symbol searches since they are expensive.  
 */
-static Suppression* is_suppressible_error ( ErrContext* ec )
+static CoreSupp* is_suppressible_error ( CoreError* err )
 {
 #  define STREQ(s1,s2) (s1 != NULL && s2 != NULL \
                         && VG_(strcmp)((s1),(s2))==0)
+   Int i;
 
-   Char caller0_obj[M_VG_ERRTXT];
-   Char caller0_fun[M_VG_ERRTXT];
-   Char caller1_obj[M_VG_ERRTXT];
-   Char caller1_fun[M_VG_ERRTXT];
-   Char caller2_obj[M_VG_ERRTXT];
-   Char caller2_fun[M_VG_ERRTXT];
-   Char caller3_obj[M_VG_ERRTXT];
-   Char caller3_fun[M_VG_ERRTXT];
+   Char caller_obj[VG_N_SUPP_CALLERS][M_VG_ERRTXT];
+   Char caller_fun[VG_N_SUPP_CALLERS][M_VG_ERRTXT];
 
-   Suppression* su;
-   Int          su_size;
+   CoreSupp* su;
 
-   /* vg_what_fn_or_object_is_this returns:
-         <function_name>      or
-         <object_name>        or
-         ???
-      so the strings in the suppression file should match these.
+   /* get_objname_fnname() writes the function name and object name if
+      it finds them in the debug info.  so the strings in the suppression
+      file should match these.
    */
 
    /* Initialise these strs so they are always safe to compare, even
-      if what_fn_or_object_is_this doesn't write anything to them. */
-   caller0_obj[0] = caller1_obj[0] = caller2_obj[0] = caller3_obj[0] = 0;
-   caller0_fun[0] = caller1_fun[0] = caller2_obj[0] = caller3_obj[0] = 0;
+      if get_objname_fnname doesn't write anything to them. */
+   for (i = 0; i < VG_N_SUPP_CALLERS; i++)
+      caller_obj[i][0] = caller_fun[i][0] = 0;
 
-   VG_(what_obj_and_fun_is_this)
-      ( ec->where->eips[0], caller0_obj, M_VG_ERRTXT,
-                            caller0_fun, M_VG_ERRTXT );
-   VG_(what_obj_and_fun_is_this)
-      ( ec->where->eips[1], caller1_obj, M_VG_ERRTXT,
-                            caller1_fun, M_VG_ERRTXT );
-
-   if (VG_(clo_backtrace_size) > 2) {
-      VG_(what_obj_and_fun_is_this)
-         ( ec->where->eips[2], caller2_obj, M_VG_ERRTXT,
-                               caller2_fun, M_VG_ERRTXT );
-
-      if (VG_(clo_backtrace_size) > 3) {
-         VG_(what_obj_and_fun_is_this)
-            ( ec->where->eips[3], caller3_obj, M_VG_ERRTXT,
-                                  caller3_fun, M_VG_ERRTXT );
-      }
+   for (i = 0; i < VG_N_SUPP_CALLERS && i < VG_(clo_backtrace_size); i++) {
+      get_objname_fnname ( err->where->eips[i], 
+                           caller_obj[i], M_VG_ERRTXT,
+                           caller_fun[i], M_VG_ERRTXT );
    }
 
    /* See if the error context matches any suppression. */
    for (su = vg_suppressions; su != NULL; su = su->next) {
-      switch (su->skind) {
-         case FreeS:  case PThread:
-         case Param:  case Value0: su_size = 0; break;
-         case Value1: case Addr1:  su_size = 1; break;
-         case Value2: case Addr2:  su_size = 2; break;
-         case Value4: case Addr4:  su_size = 4; break;
-         case Value8: case Addr8:  su_size = 8; break;
-         default: VG_(panic)("errcontext_matches_suppression");
+      if (supp_matches_error(su, err) &&
+          supp_matches_callers(su, caller_obj, caller_fun)) {
+         return su;
       }
-      switch (su->skind) {
-         case Param:
-            if (ec->ekind != ParamErr) continue;
-            if (!STREQ(su->param, ec->syscall_param)) continue;
-            break;
-         case Value0: case Value1: case Value2: case Value4: case Value8:
-            if (ec->ekind != ValueErr) continue;
-            if (ec->size  != su_size)  continue;
-            break;
-         case Addr1: case Addr2: case Addr4: case Addr8:
-            if (ec->ekind != AddrErr) continue;
-            if (ec->size  != su_size) continue;
-            break;
-         case FreeS:
-            if (ec->ekind != FreeErr 
-                && ec->ekind != FreeMismatchErr) continue;
-            break;
-         case PThread:
-            if (ec->ekind != PThreadErr) continue;
-            break;
-      }
-
-      switch (su->caller0_ty) {
-         case ObjName: if (!VG_(stringMatch)(su->caller0, 
-                                             caller0_obj)) continue;
-                       break;
-         case FunName: if (!VG_(stringMatch)(su->caller0, 
-                                             caller0_fun)) continue;
-                       break;
-         default: goto baaaad;
-      }
-
-      if (su->caller1 != NULL) {
-         vg_assert(VG_(clo_backtrace_size) >= 2);
-         switch (su->caller1_ty) {
-            case ObjName: if (!VG_(stringMatch)(su->caller1, 
-                                                caller1_obj)) continue;
-                          break;
-            case FunName: if (!VG_(stringMatch)(su->caller1, 
-                                                caller1_fun)) continue;
-                          break;
-            default: goto baaaad;
-         }
-      }
-
-      if (VG_(clo_backtrace_size) > 2 && su->caller2 != NULL) {
-         switch (su->caller2_ty) {
-            case ObjName: if (!VG_(stringMatch)(su->caller2, 
-                                                caller2_obj)) continue;
-                          break;
-            case FunName: if (!VG_(stringMatch)(su->caller2, 
-                                                caller2_fun)) continue;
-                          break;
-            default: goto baaaad;
-         }
-      }
-
-      if (VG_(clo_backtrace_size) > 3 && su->caller3 != NULL) {
-         switch (su->caller3_ty) {
-            case ObjName: if (!VG_(stringMatch)(su->caller3,
-                                                caller3_obj)) continue;
-                          break;
-            case FunName: if (!VG_(stringMatch)(su->caller3, 
-                                                caller3_fun)) continue;
-                          break;
-            default: goto baaaad;
-         }
-      }
-
-      return su;
    }
-
-   return NULL;
-
-  baaaad:
-   VG_(panic)("is_suppressible_error");
+   return NULL;      /* no matches */
 
 #  undef STREQ
 }
diff --git a/vg_execontext.c b/vg_execontext.c
index 4da1b31..fe85fa0 100644
--- a/vg_execontext.c
+++ b/vg_execontext.c
@@ -26,11 +26,10 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
 
 
 /*------------------------------------------------------------*/
@@ -109,39 +108,40 @@
 
 
 /* Compare two ExeContexts, comparing all callers. */
-Bool VG_(eq_ExeContext_all) ( ExeContext* e1, ExeContext* e2 )
+Bool VG_(eq_ExeContext) ( VgRes res, ExeContext* e1, ExeContext* e2 )
 {
-   vg_ec_cmpAlls++;
-   /* Just do pointer comparison. */
-   if (e1 != e2) return False;
-   return True;
-}
+   if (e1 == NULL || e2 == NULL) 
+      return False;
+   switch (res) {
+   case Vg_LowRes:
+      /* Just compare the top two callers. */
+      vg_ec_cmp2s++;
+      if (e1->eips[0] != e2->eips[0]
+          || e1->eips[1] != e2->eips[1]) return False;
+      return True;
 
+   case Vg_MedRes:
+      /* Just compare the top four callers. */
+      vg_ec_cmp4s++;
+      if (e1->eips[0] != e2->eips[0]
+          || e1->eips[1] != e2->eips[1]) return False;
 
-/* Compare two ExeContexts, just comparing the top two callers. */
-Bool VG_(eq_ExeContext_top2) ( ExeContext* e1, ExeContext* e2 )
-{
-   vg_ec_cmp2s++;
-   if (e1->eips[0] != e2->eips[0]
-       || e1->eips[1] != e2->eips[1]) return False;
-   return True;
-}
+      if (VG_(clo_backtrace_size) < 3) return True;
+      if (e1->eips[2] != e2->eips[2]) return False;
 
+      if (VG_(clo_backtrace_size) < 4) return True;
+      if (e1->eips[3] != e2->eips[3]) return False;
+      return True;
 
-/* Compare two ExeContexts, just comparing the top four callers. */
-Bool VG_(eq_ExeContext_top4) ( ExeContext* e1, ExeContext* e2 )
-{
-   vg_ec_cmp4s++;
-   if (e1->eips[0] != e2->eips[0]
-       || e1->eips[1] != e2->eips[1]) return False;
+   case Vg_HighRes:
+      vg_ec_cmpAlls++;
+      /* Compare them all -- just do pointer comparison. */
+      if (e1 != e2) return False;
+      return True;
 
-   if (VG_(clo_backtrace_size) < 3) return True;
-   if (e1->eips[2] != e2->eips[2]) return False;
-
-   if (VG_(clo_backtrace_size) < 4) return True;
-   if (e1->eips[3] != e2->eips[3]) return False;
-
-   return True;
+   default:
+      VG_(panic)("VG_(eq_ExeContext): unrecognised VgRes");
+   }
 }
 
 
@@ -156,11 +156,12 @@
 
    In order to be thread-safe, we pass in the thread's %EIP and %EBP.
 */
-ExeContext* VG_(get_ExeContext) ( Bool skip_top_frame,
-                                  Addr eip, Addr ebp )
+ExeContext* VG_(get_ExeContext2) ( Addr eip, Addr ebp,
+                                   Addr ebp_min, Addr ebp_max_orig )
 {
    Int         i;
    Addr        eips[VG_DEEPEST_BACKTRACE];
+   Addr        ebp_max;
    Bool        same;
    UInt        hash;
    ExeContext* new_ec;
@@ -173,29 +174,53 @@
 
    /* First snaffle %EIPs from the client's stack into eips[0
       .. VG_(clo_backtrace_size)-1], putting zeroes in when the trail
-      goes cold. */
+      goes cold, which we guess to be when %ebp is not a reasonable
+      stack location.  We also assert that %ebp increases down the chain. */
 
-   for (i = 0; i < VG_(clo_backtrace_size); i++)
+   // Gives shorter stack trace for tests/badjump.c
+   // JRS 2002-aug-16: I don't think this is a big deal; looks ok for
+   // most "normal" backtraces.
+   // NJN 2002-sep-05: traces for pthreaded programs are particularly bad.
+
+   // JRS 2002-sep-17: hack, to round up ebp_max to the end of the
+   // current page, at least.  Dunno if it helps.
+   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
+   ebp_max = (ebp_max_orig + VKI_BYTES_PER_PAGE - 1) 
+                & ~(VKI_BYTES_PER_PAGE - 1);
+   ebp_max -= sizeof(Addr);
+
+   /* Assertion broken before main() is reached in pthreaded programs;  the
+    * offending stack traces only have one item.  --njn, 2002-aug-16 */
+   /* vg_assert(ebp_min <= ebp_max);*/
+
+   /* Checks the stack isn't riduculously big */
+   vg_assert(ebp_min + 4000000 > ebp_max);
+
+   //   VG_(printf)("%p -> %p\n", ebp_max_orig, ebp_max);
+   eips[0] = eip;
+   //   VG_(printf)("\nSNAP: %p .. %p, EBP=%p\n", ebp_min, ebp_max, ebp  );
+   //   VG_(printf)("   : %p\n", eips[0]);
+   /* Get whatever we safely can ... */
+   for (i = 1; i < VG_(clo_backtrace_size); i++) {
+      if (!(ebp_min <= ebp && ebp <= ebp_max)) {
+         //VG_(printf)("... out of range %p\n", ebp);
+         break; /* ebp gone baaaad */
+      }
+      // NJN 2002-sep-17: monotonicity doesn't work -- gives wrong traces...
+      //     if (ebp >= ((UInt*)ebp)[0]) {
+      //   VG_(printf)("nonmonotonic\n");
+      //    break; /* ebp gone nonmonotonic */
+      // }
+      eips[i] = ((UInt*)ebp)[1];  /* ret addr */
+      ebp     = ((UInt*)ebp)[0];  /* old ebp */
+      //VG_(printf)("     %p\n", eips[i]);
+   }
+
+   /* Put zeroes in the rest. */
+   for (;  i < VG_(clo_backtrace_size); i++) {
       eips[i] = 0;
-   
-#  define GET_CALLER(lval)                                        \
-   if (ebp != 0 && VGM_(check_readable)(ebp, 8, NULL)) {          \
-      lval = ((UInt*)ebp)[1];  /* ret addr */                     \
-      ebp  = ((UInt*)ebp)[0];  /* old ebp */                      \
-   } else {                                                       \
-      lval = ebp = 0;                                             \
    }
 
-   if (skip_top_frame) {
-      for (i = 0; i < VG_(clo_backtrace_size); i++)
-         GET_CALLER(eips[i]);
-   } else {
-      eips[0] = eip;
-      for (i = 1; i < VG_(clo_backtrace_size); i++)
-         GET_CALLER(eips[i]);
-   }
-#  undef GET_CALLER
-
    /* Now figure out if we've seen this one before.  First hash it so
       as to determine the list number. */
 
@@ -228,19 +253,16 @@
 
    if (list != NULL) {
       /* Yay!  We found it.  */
-      VGP_POPCC;
+      VGP_POPCC(VgpExeContext);
       return list;
    }
 
    /* Bummer.  We have to allocate a new context record. */
    vg_ec_totstored++;
 
-   new_ec 
-      = VG_(malloc)( 
-           VG_AR_EXECTXT, 
-           sizeof(struct _ExeContextRec *) 
-              + VG_(clo_backtrace_size) * sizeof(Addr) 
-        );
+   new_ec = VG_(arena_malloc)( VG_AR_EXECTXT, 
+                               sizeof(struct _ExeContext *) 
+                               + VG_(clo_backtrace_size) * sizeof(Addr) );
 
    for (i = 0; i < VG_(clo_backtrace_size); i++)
       new_ec->eips[i] = eips[i];
@@ -248,10 +270,16 @@
    new_ec->next = vg_ec_list[hash];
    vg_ec_list[hash] = new_ec;
 
-   VGP_POPCC;
+   VGP_POPCC(VgpExeContext);
    return new_ec;
 }
 
+ExeContext* VG_(get_ExeContext) ( ThreadState *tst )
+{
+   return VG_(get_ExeContext2)( tst->m_eip, tst->m_ebp, tst->m_esp, 
+                                tst->stack_highest_word );
+}
+
 
 /*--------------------------------------------------------------------*/
 /*--- end                                          vg_execontext.c ---*/
diff --git a/vg_from_ucode.c b/vg_from_ucode.c
index 26f1613..e99bfaa 100644
--- a/vg_from_ucode.c
+++ b/vg_from_ucode.c
@@ -25,7 +25,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -35,10 +35,10 @@
 /*--- Renamings of frequently-used global functions.       ---*/
 /*------------------------------------------------------------*/
 
-#define dis       VG_(disassemble)
 #define nameIReg  VG_(nameOfIntReg)
 #define nameISize VG_(nameOfIntSize)
 
+#define dis       VG_(print_codegen)
 
 /*------------------------------------------------------------*/
 /*--- Instruction emission -- turning final uinstrs back   ---*/
@@ -52,7 +52,7 @@
    do this, calls and jmps to fixed addresses must specify the address
    by first loading it into a register, and jump to/call that
    register.  Fortunately, the only jump to a literal is the jump back
-   to vg_dispatch, and only %eax is live then, conveniently.  Ucode
+   to vg_dispatch, and only %eax is live then, conveniently.  UCode
    call insns may only have a register as target anyway, so there's no
    need to do anything fancy for them.
 
@@ -71,19 +71,104 @@
 static Int    emitted_code_used;
 static Int    emitted_code_size;
 
+/* Statistics about C functions called from generated code. */
+static UInt ccalls                 = 0;
+static UInt ccall_reg_saves        = 0;
+static UInt ccall_args             = 0;
+static UInt ccall_arg_setup_instrs = 0;
+static UInt ccall_stack_clears     = 0;
+static UInt ccall_retvals          = 0;
+static UInt ccall_retval_movs      = 0;
+
+/* Statistics about frequency of each UInstr */
+typedef
+   struct {
+      UInt counts;
+      UInt size;
+   } Histogram;
+
+/* Automatically zeroed because it's static. */
+static Histogram histogram[100];     
+
+void VG_(print_ccall_stats)(void)
+{
+   VG_(message)(Vg_DebugMsg,
+                "   ccalls: %u C calls, %u%% saves+restores avoided"
+                " (%d bytes)",
+                ccalls, 
+                100-(UInt)(ccall_reg_saves/(double)(ccalls*3)*100),
+                ((ccalls*3) - ccall_reg_saves)*2);
+   VG_(message)(Vg_DebugMsg,
+                "           %u args, avg 0.%d setup instrs each (%d bytes)", 
+                ccall_args, 
+               (UInt)(ccall_arg_setup_instrs/(double)ccall_args*100),
+               (ccall_args - ccall_arg_setup_instrs)*2);
+   VG_(message)(Vg_DebugMsg,
+                "           %d%% clear the stack (%d bytes)", 
+               (UInt)(ccall_stack_clears/(double)ccalls*100),
+               (ccalls - ccall_stack_clears)*3);
+   VG_(message)(Vg_DebugMsg,
+                "           %u retvals, %u%% of reg-reg movs avoided (%d bytes)",
+                ccall_retvals,
+                ( ccall_retvals == 0 
+                ? 100
+                : 100-(UInt)(ccall_retval_movs / 
+                             (double)ccall_retvals*100)),
+                (ccall_retvals-ccall_retval_movs)*2);
+}
+
+void VG_(print_UInstr_histogram)(void)
+{
+   Int i, j;
+   UInt total_counts = 0;
+   UInt total_size   = 0;
+   
+   for (i = 0; i < 100; i++) {
+      total_counts += histogram[i].counts;
+      total_size   += histogram[i].size;
+   }
+
+   VG_(printf)("-- UInstr frequencies -----------\n");
+   for (i = 0; i < 100; i++) {
+      if (0 != histogram[i].counts) {
+
+         UInt count_pc = 
+            (UInt)(histogram[i].counts/(double)total_counts*100 + 0.5);
+         UInt size_pc  = 
+            (UInt)(histogram[i].size  /(double)total_size  *100 + 0.5);
+         UInt avg_size =
+            (UInt)(histogram[i].size / (double)histogram[i].counts + 0.5);
+
+         VG_(printf)("%-7s:%8u (%2u%%), avg %2dB (%2u%%) |", 
+                     VG_(nameUOpcode)(True, i), 
+                     histogram[i].counts, count_pc, 
+                     avg_size, size_pc);
+
+         for (j = 0; j < size_pc; j++) VG_(printf)("O");
+         VG_(printf)("\n");
+
+      } else {
+         vg_assert(0 == histogram[i].size);
+      }
+   }
+
+   VG_(printf)("total UInstrs %u, total size %u\n", total_counts, total_size);
+}
+
 static void expandEmittedCode ( void )
 {
    Int    i;
-   UChar* tmp = VG_(jitmalloc)(2 * emitted_code_size);
+   UChar *tmp = VG_(arena_malloc)(VG_AR_JITTER, 2 * emitted_code_size);
    /* VG_(printf)("expand to %d\n", 2 * emitted_code_size); */
    for (i = 0; i < emitted_code_size; i++)
       tmp[i] = emitted_code[i];
-   VG_(jitfree)(emitted_code);
+   VG_(arena_free)(VG_AR_JITTER, emitted_code);
    emitted_code = tmp;
    emitted_code_size *= 2;
 }
 
-static __inline__ void emitB ( UInt b )
+/* Local calls will be inlined, cross-module ones not */
+__inline__ void VG_(emitB) ( UInt b )
 {
    if (dis) {
       if (b < 16) VG_(printf)("0%x ", b); else VG_(printf)("%2x ", b);
@@ -95,29 +180,26 @@
    emitted_code_used++;
 }
 
-static __inline__ void emitW ( UInt l )
+__inline__ void VG_(emitW) ( UInt l )
 {
-   emitB ( (l) & 0x000000FF );
-   emitB ( (l >> 8) & 0x000000FF );
+   VG_(emitB) ( (l) & 0x000000FF );
+   VG_(emitB) ( (l >> 8) & 0x000000FF );
 }
 
-static __inline__ void emitL ( UInt l )
+__inline__ void VG_(emitL) ( UInt l )
 {
-   emitB ( (l) & 0x000000FF );
-   emitB ( (l >> 8) & 0x000000FF );
-   emitB ( (l >> 16) & 0x000000FF );
-   emitB ( (l >> 24) & 0x000000FF );
+   VG_(emitB) ( (l) & 0x000000FF );
+   VG_(emitB) ( (l >> 8) & 0x000000FF );
+   VG_(emitB) ( (l >> 16) & 0x000000FF );
+   VG_(emitB) ( (l >> 24) & 0x000000FF );
 }
 
-static __inline__ void newEmit ( void )
+__inline__ void VG_(newEmit) ( void )
 {
    if (dis)
       VG_(printf)("\t       %4d: ", emitted_code_used );
 }
 
-/* Is this a callee-save register, in the normal C calling convention?  */
-#define VG_CALLEE_SAVED(reg) (reg == R_EBX || reg == R_ESI || reg == R_EDI)
-
 
 /*----------------------------------------------------*/
 /*--- Addressing modes                             ---*/
@@ -144,8 +226,8 @@
 static __inline__ void emit_amode_litmem_reg ( Addr addr, Int reg )
 {
    /* ($ADDR), reg */
-   emitB ( mkModRegRM(0, reg, 5) );
-   emitL ( addr );
+   VG_(emitB) ( mkModRegRM(0, reg, 5) );
+   VG_(emitL) ( addr );
 }
 
 static __inline__ void emit_amode_regmem_reg ( Int regmem, Int reg )
@@ -154,26 +236,26 @@
    if (regmem == R_ESP) 
       VG_(panic)("emit_amode_regmem_reg");
    if (regmem == R_EBP) {
-      emitB ( mkModRegRM(1, reg, 5) );
-      emitB ( 0x00 );
+      VG_(emitB) ( mkModRegRM(1, reg, 5) );
+      VG_(emitB) ( 0x00 );
    } else {
-      emitB( mkModRegRM(0, reg, regmem) );
+      VG_(emitB)( mkModRegRM(0, reg, regmem) );
    }
 }
 
-static __inline__ void emit_amode_offregmem_reg ( Int off, Int regmem, Int reg )
+void VG_(emit_amode_offregmem_reg) ( Int off, Int regmem, Int reg )
 {
    if (regmem == R_ESP)
       VG_(panic)("emit_amode_offregmem_reg(ESP)");
    if (off < -128 || off > 127) {
       /* Use a large offset */
       /* d32(regmem), reg */
-      emitB ( mkModRegRM(2, reg, regmem) );
-      emitL ( off );
+      VG_(emitB) ( mkModRegRM(2, reg, regmem) );
+      VG_(emitL) ( off );
    } else {
       /* d8(regmem), reg */
-      emitB ( mkModRegRM(1, reg, regmem) );
-      emitB ( off & 0xFF );
+      VG_(emitB) ( mkModRegRM(1, reg, regmem) );
+      VG_(emitB) ( off & 0xFF );
    }
 }
 
@@ -184,27 +266,27 @@
       VG_(panic)("emit_amode_sib_reg(ESP)");
    if (off < -128 || off > 127) {
       /* Use a 32-bit offset */
-      emitB ( mkModRegRM(2, reg, 4) ); /* SIB with 32-bit displacement */
-      emitB ( mkSIB( scale, regindex, regbase ) );
-      emitL ( off );
+      VG_(emitB) ( mkModRegRM(2, reg, 4) ); /* SIB with 32-bit displacement */
+      VG_(emitB) ( mkSIB( scale, regindex, regbase ) );
+      VG_(emitL) ( off );
    } else {
       /* Use an 8-bit offset */
-      emitB ( mkModRegRM(1, reg, 4) ); /* SIB with 8-bit displacement */
-      emitB ( mkSIB( scale, regindex, regbase ) );
-      emitB ( off & 0xFF );
+      VG_(emitB) ( mkModRegRM(1, reg, 4) ); /* SIB with 8-bit displacement */
+      VG_(emitB) ( mkSIB( scale, regindex, regbase ) );
+      VG_(emitB) ( off & 0xFF );
    }
 }
 
-static __inline__ void emit_amode_ereg_greg ( Int e_reg, Int g_reg )
+void VG_(emit_amode_ereg_greg) ( Int e_reg, Int g_reg )
 {
    /* other_reg, reg */
-   emitB ( mkModRegRM(3, g_reg, e_reg) );
+   VG_(emitB) ( mkModRegRM(3, g_reg, e_reg) );
 }
 
 static __inline__ void emit_amode_greg_ereg ( Int g_reg, Int e_reg )
 {
    /* other_reg, reg */
-   emitB ( mkModRegRM(3, g_reg, e_reg) );
+   VG_(emitB) ( mkModRegRM(3, g_reg, e_reg) );
 }
 
 
@@ -285,23 +367,23 @@
 /*--- v-size (4, or 2 with OSO) insn emitters      ---*/
 /*----------------------------------------------------*/
 
-static void emit_movv_offregmem_reg ( Int sz, Int off, Int areg, Int reg )
+void VG_(emit_movv_offregmem_reg) ( Int sz, Int off, Int areg, Int reg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0x8B ); /* MOV Ev, Gv */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0x8B ); /* MOV Ev, Gv */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t0x%x(%s), %s\n", 
                    nameISize(sz), off, nameIReg(4,areg), nameIReg(sz,reg));
 }
 
-static void emit_movv_reg_offregmem ( Int sz, Int reg, Int off, Int areg )
+void VG_(emit_movv_reg_offregmem) ( Int sz, Int reg, Int off, Int areg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0x89 ); /* MOV Gv, Ev */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0x89 ); /* MOV Gv, Ev */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t%s, 0x%x(%s)\n", 
                    nameISize(sz), nameIReg(sz,reg), off, nameIReg(4,areg));
@@ -309,9 +391,9 @@
 
 static void emit_movv_regmem_reg ( Int sz, Int reg1, Int reg2 )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0x8B ); /* MOV Ev, Gv */
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0x8B ); /* MOV Ev, Gv */
    emit_amode_regmem_reg ( reg1, reg2 );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t(%s), %s\n",
@@ -320,40 +402,39 @@
 
 static void emit_movv_reg_regmem ( Int sz, Int reg1, Int reg2 )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0x89 ); /* MOV Gv, Ev */
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0x89 ); /* MOV Gv, Ev */
    emit_amode_regmem_reg ( reg2, reg1 );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t%s, (%s)\n", 
                    nameISize(sz), nameIReg(sz,reg1), nameIReg(4,reg2));
 }
 
-static void emit_movv_reg_reg ( Int sz, Int reg1, Int reg2 )
+void VG_(emit_movv_reg_reg) ( Int sz, Int reg1, Int reg2 )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0x89 ); /* MOV Gv, Ev */
-   emit_amode_ereg_greg ( reg2, reg1 );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0x89 ); /* MOV Gv, Ev */
+   VG_(emit_amode_ereg_greg) ( reg2, reg1 );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t%s, %s\n", 
                    nameISize(sz), nameIReg(sz,reg1), nameIReg(sz,reg2));
 }
 
-static void emit_nonshiftopv_lit_reg ( Int sz, Opcode opc, 
-                                       UInt lit, Int reg )
+void VG_(emit_nonshiftopv_lit_reg) ( Int sz, Opcode opc, UInt lit, Int reg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
    if (lit == VG_(extend_s_8to32)(lit & 0x000000FF)) {
       /* short form OK */
-      emitB ( 0x83 ); /* Grp1 Ib,Ev */
-      emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) );
-      emitB ( lit & 0x000000FF );
+      VG_(emitB) ( 0x83 ); /* Grp1 Ib,Ev */
+      VG_(emit_amode_ereg_greg) ( reg, mkGrp1opcode(opc) );
+      VG_(emitB) ( lit & 0x000000FF );
    } else {
-      emitB ( 0x81 ); /* Grp1 Iv,Ev */
-      emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) );
-      if (sz == 2) emitW ( lit ); else emitL ( lit );
+      VG_(emitB) ( 0x81 ); /* Grp1 Iv,Ev */
+      VG_(emit_amode_ereg_greg) ( reg, mkGrp1opcode(opc) );
+      if (sz == 2) VG_(emitW) ( lit ); else VG_(emitL) ( lit );
    }
    if (dis)
       VG_(printf)( "\n\t\t%s%c\t$0x%x, %s\n", 
@@ -361,13 +442,13 @@
                    lit, nameIReg(sz,reg));
 }
 
-static void emit_shiftopv_lit_reg ( Int sz, Opcode opc, UInt lit, Int reg )
+void VG_(emit_shiftopv_lit_reg) ( Int sz, Opcode opc, UInt lit, Int reg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0xC1 ); /* Grp2 Ib,Ev */
-   emit_amode_ereg_greg ( reg, mkGrp2opcode(opc) );
-   emitB ( lit );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0xC1 ); /* Grp2 Ib,Ev */
+   VG_(emit_amode_ereg_greg) ( reg, mkGrp2opcode(opc) );
+   VG_(emitB) ( lit );
    if (dis)
       VG_(printf)( "\n\t\t%s%c\t$%d, %s\n", 
                    VG_(nameUOpcode)(False,opc), nameISize(sz), 
@@ -376,12 +457,12 @@
 
 static void emit_shiftopv_cl_stack0 ( Int sz, Opcode opc )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0xD3 ); /* Grp2 CL,Ev */
-   emitB ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
-   emitB ( 0x24 ); /* a SIB, I think `d8(%esp)' */
-   emitB ( 0x00 ); /* the d8 displacement */
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0xD3 ); /* Grp2 CL,Ev */
+   VG_(emitB) ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
+   VG_(emitB) ( 0x24 ); /* a SIB, I think `d8(%esp)' */
+   VG_(emitB) ( 0x00 ); /* the d8 displacement */
    if (dis)
       VG_(printf)("\n\t\t%s%c %%cl, 0(%%esp)\n",
                   VG_(nameUOpcode)(False,opc), nameISize(sz) );
@@ -389,11 +470,11 @@
 
 static void emit_shiftopb_cl_stack0 ( Opcode opc )
 {
-   newEmit();
-   emitB ( 0xD2 ); /* Grp2 CL,Eb */
-   emitB ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
-   emitB ( 0x24 ); /* a SIB, I think `d8(%esp)' */
-   emitB ( 0x00 ); /* the d8 displacement */
+   VG_(newEmit)();
+   VG_(emitB) ( 0xD2 ); /* Grp2 CL,Eb */
+   VG_(emitB) ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) );
+   VG_(emitB) ( 0x24 ); /* a SIB, I think `d8(%esp)' */
+   VG_(emitB) ( 0x00 ); /* the d8 displacement */
    if (dis)
       VG_(printf)("\n\t\t%s%c %%cl, 0(%%esp)\n",
                   VG_(nameUOpcode)(False,opc), nameISize(1) );
@@ -402,28 +483,28 @@
 static void emit_nonshiftopv_offregmem_reg ( Int sz, Opcode opc, 
                                              Int off, Int areg, Int reg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\t%s%c\t0x%x(%s), %s\n", 
                    VG_(nameUOpcode)(False,opc), nameISize(sz),
                    off, nameIReg(4,areg), nameIReg(sz,reg));
 }
 
-static void emit_nonshiftopv_reg_reg ( Int sz, Opcode opc, 
+void VG_(emit_nonshiftopv_reg_reg) ( Int sz, Opcode opc, 
                                        Int reg1, Int reg2 )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
 #  if 0
    /* Perfectly correct, but the GNU assembler uses the other form.
       Therefore we too use the other form, to aid verification. */
-   emitB ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
-   emit_amode_ereg_greg ( reg1, reg2 );
+   VG_(emitB) ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */
+   VG_(emit_amode_ereg_greg) ( reg1, reg2 );
 #  else
-   emitB ( 1 + mkPrimaryOpcode(opc) ); /* op Gv, Ev */
+   VG_(emitB) ( 1 + mkPrimaryOpcode(opc) ); /* op Gv, Ev */
    emit_amode_greg_ereg ( reg1, reg2 );
 #  endif
    if (dis)
@@ -432,134 +513,134 @@
                    nameIReg(sz,reg1), nameIReg(sz,reg2));
 }
 
-static void emit_movv_lit_reg ( Int sz, UInt lit, Int reg )
+void VG_(emit_movv_lit_reg) ( Int sz, UInt lit, Int reg )
 {
    if (lit == 0) {
-      emit_nonshiftopv_reg_reg ( sz, XOR, reg, reg );
+      VG_(emit_nonshiftopv_reg_reg) ( sz, XOR, reg, reg );
       return;
    }
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
-   emitB ( 0xB8+reg ); /* MOV imm, Gv */
-   if (sz == 2) emitW ( lit ); else emitL ( lit );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
+   VG_(emitB) ( 0xB8+reg ); /* MOV imm, Gv */
+   if (sz == 2) VG_(emitW) ( lit ); else VG_(emitL) ( lit );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t$0x%x, %s\n", 
                    nameISize(sz), lit, nameIReg(sz,reg));
 }
 
-static void emit_unaryopv_reg ( Int sz, Opcode opc, Int reg )
+void VG_(emit_unaryopv_reg) ( Int sz, Opcode opc, Int reg )
 {
-   newEmit();
-   if (sz == 2) emitB ( 0x66 );
+   VG_(newEmit)();
+   if (sz == 2) VG_(emitB) ( 0x66 );
    switch (opc) {
       case NEG:
-         emitB ( 0xF7 );
-         emit_amode_ereg_greg ( reg, mkGrp3opcode(NEG) );
+         VG_(emitB) ( 0xF7 );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp3opcode(NEG) );
          if (dis)
             VG_(printf)( "\n\t\tneg%c\t%s\n", 
                          nameISize(sz), nameIReg(sz,reg));
          break;
       case NOT:
-         emitB ( 0xF7 );
-         emit_amode_ereg_greg ( reg, mkGrp3opcode(NOT) );
+         VG_(emitB) ( 0xF7 );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp3opcode(NOT) );
          if (dis)
             VG_(printf)( "\n\t\tnot%c\t%s\n", 
                          nameISize(sz), nameIReg(sz,reg));
          break;
       case DEC:
-         emitB ( 0x48 + reg );
+         VG_(emitB) ( 0x48 + reg );
          if (dis)
             VG_(printf)( "\n\t\tdec%c\t%s\n", 
                          nameISize(sz), nameIReg(sz,reg));
          break;
       case INC:
-         emitB ( 0x40 + reg );
+         VG_(emitB) ( 0x40 + reg );
          if (dis)
             VG_(printf)( "\n\t\tinc%c\t%s\n", 
                          nameISize(sz), nameIReg(sz,reg));
          break;
       default: 
-         VG_(panic)("emit_unaryopv_reg");
+         VG_(panic)("VG_(emit_unaryopv_reg)");
    }
 }
 
-static void emit_pushv_reg ( Int sz, Int reg )
+void VG_(emit_pushv_reg) ( Int sz, Int reg )
 {
-   newEmit();
+   VG_(newEmit)();
    if (sz == 2) {
-      emitB ( 0x66 ); 
+      VG_(emitB) ( 0x66 ); 
    } else {
       vg_assert(sz == 4);
    }
-   emitB ( 0x50 + reg );
+   VG_(emitB) ( 0x50 + reg );
    if (dis)
       VG_(printf)("\n\t\tpush%c %s\n", nameISize(sz), nameIReg(sz,reg));
 }
 
-static void emit_popv_reg ( Int sz, Int reg )
+void VG_(emit_popv_reg) ( Int sz, Int reg )
 {
-   newEmit();
+   VG_(newEmit)();
    if (sz == 2) {
-      emitB ( 0x66 ); 
+      VG_(emitB) ( 0x66 ); 
    } else {
       vg_assert(sz == 4);
    }
-   emitB ( 0x58 + reg );
+   VG_(emitB) ( 0x58 + reg );
    if (dis)
       VG_(printf)("\n\t\tpop%c %s\n", nameISize(sz), nameIReg(sz,reg));
 }
 
-static void emit_pushl_lit8 ( Int lit8 )
+void VG_(emit_pushl_lit32) ( UInt int32 )
+{  
+   VG_(newEmit)();
+   VG_(emitB) ( 0x68 );
+   VG_(emitL) ( int32 );
+   if (dis)
+      VG_(printf)("\n\t\tpushl $0x%x\n", int32 );
+}  
+
+void VG_(emit_pushl_lit8) ( Int lit8 )
 {
    vg_assert(lit8 >= -128 && lit8 < 128);
-   newEmit();
-   emitB ( 0x6A );
-   emitB ( (UChar)((UInt)lit8) );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x6A );
+   VG_(emitB) ( (UChar)((UInt)lit8) );
    if (dis)
       VG_(printf)("\n\t\tpushl $%d\n", lit8 );
 }
 
-static void emit_pushl_lit32 ( UInt int32 )
+void VG_(emit_cmpl_zero_reg) ( Int reg )
 {
-   newEmit();
-   emitB ( 0x68 );
-   emitL ( int32 );
-   if (dis)
-      VG_(printf)("\n\t\tpushl $0x%x\n", int32 );
-}
-
-static void emit_cmpl_zero_reg ( Int reg )
-{
-   newEmit();
-   emitB ( 0x83 );
-   emit_amode_ereg_greg ( reg, 7 /* Grp 3 opcode for CMP */ );
-   emitB ( 0x00 );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x83 );
+   VG_(emit_amode_ereg_greg) ( reg, 7 /* Grp 3 opcode for CMP */ );
+   VG_(emitB) ( 0x00 );
    if (dis)
       VG_(printf)("\n\t\tcmpl $0, %s\n", nameIReg(4,reg));
 }
 
 static void emit_swapl_reg_ECX ( Int reg )
 {
-   newEmit();
-   emitB ( 0x87 ); /* XCHG Gv,Ev */
-   emit_amode_ereg_greg ( reg, R_ECX );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x87 ); /* XCHG Gv,Ev */
+   VG_(emit_amode_ereg_greg) ( reg, R_ECX );
    if (dis) 
       VG_(printf)("\n\t\txchgl %%ecx, %s\n", nameIReg(4,reg));
 }
 
-static void emit_swapl_reg_EAX ( Int reg )
+void VG_(emit_swapl_reg_EAX) ( Int reg )
 {
-   newEmit();
-   emitB ( 0x90 + reg ); /* XCHG Gv,eAX */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x90 + reg ); /* XCHG Gv,eAX */
    if (dis) 
       VG_(printf)("\n\t\txchgl %%eax, %s\n", nameIReg(4,reg));
 }
 
 static void emit_swapl_reg_reg ( Int reg1, Int reg2 )
 {
-   newEmit();
-   emitB ( 0x87 ); /* XCHG Gv,Ev */
-   emit_amode_ereg_greg ( reg1, reg2 );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x87 ); /* XCHG Gv,Ev */
+   VG_(emit_amode_ereg_greg) ( reg1, reg2 );
    if (dis) 
       VG_(printf)("\n\t\txchgl %s, %s\n", nameIReg(4,reg1), 
                   nameIReg(4,reg2));
@@ -567,65 +648,33 @@
 
 static void emit_bswapl_reg ( Int reg )
 {
-   newEmit();
-   emitB ( 0x0F );
-   emitB ( 0xC8 + reg ); /* BSWAP r32 */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F );
+   VG_(emitB) ( 0xC8 + reg ); /* BSWAP r32 */
    if (dis) 
       VG_(printf)("\n\t\tbswapl %s\n", nameIReg(4,reg));
 }
 
 static void emit_movl_reg_reg ( Int regs, Int regd )
 {
-   newEmit();
-   emitB ( 0x89 ); /* MOV Gv,Ev */
-   emit_amode_ereg_greg ( regd, regs );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x89 ); /* MOV Gv,Ev */
+   VG_(emit_amode_ereg_greg) ( regd, regs );
    if (dis) 
       VG_(printf)("\n\t\tmovl %s, %s\n", nameIReg(4,regs), nameIReg(4,regd));
 }
 
-static void emit_testv_lit_reg ( Int sz, UInt lit, Int reg )
+void VG_(emit_movv_lit_offregmem) ( Int sz, UInt lit, Int off, Int memreg )
 {
-   newEmit();
+   VG_(newEmit)();
    if (sz == 2) {
-      emitB ( 0x66 );
+      VG_(emitB) ( 0x66 );
    } else {
       vg_assert(sz == 4);
    }
-   emitB ( 0xF7 ); /* Grp3 Ev */
-   emit_amode_ereg_greg ( reg, 0 /* Grp3 subopcode for TEST */ );
-   if (sz == 2) emitW ( lit ); else emitL ( lit );
-   if (dis)
-      VG_(printf)("\n\t\ttest%c $0x%x, %s\n", nameISize(sz), 
-                                            lit, nameIReg(sz,reg));
-}
-
-static void emit_testv_lit_offregmem ( Int sz, UInt lit, Int off, Int reg )
-{
-   newEmit();
-   if (sz == 2) {
-      emitB ( 0x66 );
-   } else {
-      vg_assert(sz == 4);
-   }
-   emitB ( 0xF7 ); /* Grp3 Ev */
-   emit_amode_offregmem_reg ( off, reg, 0 /* Grp3 subopcode for TEST */ );
-   if (sz == 2) emitW ( lit ); else emitL ( lit );
-   if (dis)
-      VG_(printf)("\n\t\ttest%c $%d, 0x%x(%s)\n", 
-                  nameISize(sz), lit, off, nameIReg(4,reg) );
-}
-
-static void emit_movv_lit_offregmem ( Int sz, UInt lit, Int off, Int memreg )
-{
-   newEmit();
-   if (sz == 2) {
-      emitB ( 0x66 );
-   } else {
-      vg_assert(sz == 4);
-   }
-   emitB ( 0xC7 ); /* Grp11 Ev */
-   emit_amode_offregmem_reg ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
-   if (sz == 2) emitW ( lit ); else emitL ( lit );
+   VG_(emitB) ( 0xC7 ); /* Grp11 Ev */
+   VG_(emit_amode_offregmem_reg) ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
+   if (sz == 2) VG_(emitW) ( lit ); else VG_(emitL) ( lit );
    if (dis)
       VG_(printf)( "\n\t\tmov%c\t$0x%x, 0x%x(%s)\n", 
                    nameISize(sz), lit, off, nameIReg(4,memreg) );
@@ -638,35 +687,35 @@
 
 /* There is some doubt as to whether C6 (Grp 11) is in the
    486 insn set.  ToDo: investigate. */
-static void emit_movb_lit_offregmem ( UInt lit, Int off, Int memreg )
-{
-   newEmit();
-   emitB ( 0xC6 ); /* Grp11 Eb */
-   emit_amode_offregmem_reg ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
-   emitB ( lit );
+void VG_(emit_movb_lit_offregmem) ( UInt lit, Int off, Int memreg )
+{                                     
+   VG_(newEmit)();
+   VG_(emitB) ( 0xC6 ); /* Grp11 Eb */
+   VG_(emit_amode_offregmem_reg) ( off, memreg, 0 /* Grp11 subopcode for MOV */ );
+   VG_(emitB) ( lit ); 
    if (dis)
       VG_(printf)( "\n\t\tmovb\t$0x%x, 0x%x(%s)\n", 
                    lit, off, nameIReg(4,memreg) );
-}
-
+}              
+              
 static void emit_nonshiftopb_offregmem_reg ( Opcode opc, 
                                              Int off, Int areg, Int reg )
 {
-   newEmit();
-   emitB ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   VG_(emitB) ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\t%sb\t0x%x(%s), %s\n", 
                    VG_(nameUOpcode)(False,opc), off, nameIReg(4,areg), 
                    nameIReg(1,reg));
 }
 
-static void emit_movb_reg_offregmem ( Int reg, Int off, Int areg )
+void VG_(emit_movb_reg_offregmem) ( Int reg, Int off, Int areg )
 {
    /* Could do better when reg == %al. */
-   newEmit();
-   emitB ( 0x88 ); /* MOV G1, E1 */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x88 ); /* MOV G1, E1 */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\tmovb\t%s, 0x%x(%s)\n", 
                    nameIReg(1,reg), off, nameIReg(4,areg));
@@ -674,9 +723,9 @@
 
 static void emit_nonshiftopb_reg_reg ( Opcode opc, Int reg1, Int reg2 )
 {
-   newEmit();
-   emitB ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
-   emit_amode_ereg_greg ( reg1, reg2 );
+   VG_(newEmit)();
+   VG_(emitB) ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */
+   VG_(emit_amode_ereg_greg) ( reg1, reg2 );
    if (dis)
       VG_(printf)( "\n\t\t%sb\t%s, %s\n", 
                    VG_(nameUOpcode)(False,opc),
@@ -685,8 +734,8 @@
 
 static void emit_movb_reg_regmem ( Int reg1, Int reg2 )
 {
-   newEmit();
-   emitB ( 0x88 ); /* MOV G1, E1 */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x88 ); /* MOV G1, E1 */
    emit_amode_regmem_reg ( reg2, reg1 );
    if (dis)
       VG_(printf)( "\n\t\tmovb\t%s, (%s)\n", nameIReg(1,reg1), 
@@ -695,10 +744,10 @@
 
 static void emit_nonshiftopb_lit_reg ( Opcode opc, UInt lit, Int reg )
 {
-   newEmit();
-   emitB ( 0x80 ); /* Grp1 Ib,Eb */
-   emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) );
-   emitB ( lit & 0x000000FF );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x80 ); /* Grp1 Ib,Eb */
+   VG_(emit_amode_ereg_greg) ( reg, mkGrp1opcode(opc) );
+   VG_(emitB) ( lit & 0x000000FF );
    if (dis)
       VG_(printf)( "\n\t\t%sb\t$0x%x, %s\n", VG_(nameUOpcode)(False,opc),
                                              lit, nameIReg(1,reg));
@@ -706,69 +755,68 @@
 
 static void emit_shiftopb_lit_reg ( Opcode opc, UInt lit, Int reg )
 {
-   newEmit();
-   emitB ( 0xC0 ); /* Grp2 Ib,Eb */
-   emit_amode_ereg_greg ( reg, mkGrp2opcode(opc) );
-   emitB ( lit );
+   VG_(newEmit)();
+   VG_(emitB) ( 0xC0 ); /* Grp2 Ib,Eb */
+   VG_(emit_amode_ereg_greg) ( reg, mkGrp2opcode(opc) );
+   VG_(emitB) ( lit );
    if (dis)
       VG_(printf)( "\n\t\t%sb\t$%d, %s\n", 
                    VG_(nameUOpcode)(False,opc),
                    lit, nameIReg(1,reg));
 }
 
-static void emit_unaryopb_reg ( Opcode opc, Int reg )
+void VG_(emit_unaryopb_reg) ( Opcode opc, Int reg )
 {
-   newEmit();
+   VG_(newEmit)();
    switch (opc) {
       case INC:
-         emitB ( 0xFE );
-         emit_amode_ereg_greg ( reg, mkGrp4opcode(INC) );
+         VG_(emitB) ( 0xFE );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp4opcode(INC) );
          if (dis)
             VG_(printf)( "\n\t\tincb\t%s\n", nameIReg(1,reg));
          break;
       case DEC:
-         emitB ( 0xFE );
-         emit_amode_ereg_greg ( reg, mkGrp4opcode(DEC) );
+         VG_(emitB) ( 0xFE );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp4opcode(DEC) );
          if (dis)
             VG_(printf)( "\n\t\tdecb\t%s\n", nameIReg(1,reg));
          break;
       case NOT:
-         emitB ( 0xF6 );
-         emit_amode_ereg_greg ( reg, mkGrp3opcode(NOT) );
+         VG_(emitB) ( 0xF6 );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp3opcode(NOT) );
          if (dis)
             VG_(printf)( "\n\t\tnotb\t%s\n", nameIReg(1,reg));
          break;
       case NEG:
-         emitB ( 0xF6 );
-         emit_amode_ereg_greg ( reg, mkGrp3opcode(NEG) );
+         VG_(emitB) ( 0xF6 );
+         VG_(emit_amode_ereg_greg) ( reg, mkGrp3opcode(NEG) );
          if (dis)
             VG_(printf)( "\n\t\tnegb\t%s\n", nameIReg(1,reg));
          break;
       default: 
-         VG_(panic)("emit_unaryopb_reg");
+         VG_(panic)("VG_(emit_unaryopb_reg)");
    }
 }
 
-static void emit_testb_lit_reg ( UInt lit, Int reg )
+void VG_(emit_testb_lit_reg) ( UInt lit, Int reg )
 {
-   newEmit();
-   emitB ( 0xF6 ); /* Grp3 Eb */
-   emit_amode_ereg_greg ( reg, 0 /* Grp3 subopcode for TEST */ );
-   emitB ( lit );
+   VG_(newEmit)();
+   VG_(emitB) ( 0xF6 ); /* Grp3 Eb */
+   VG_(emit_amode_ereg_greg) ( reg, 0 /* Grp3 subopcode for TEST */ );
+   VG_(emitB) ( lit );
    if (dis)
       VG_(printf)("\n\t\ttestb $0x%x, %s\n", lit, nameIReg(1,reg));
 }
 
-
 /*----------------------------------------------------*/
 /*--- zero-extended load emitters                  ---*/
 /*----------------------------------------------------*/
 
-static void emit_movzbl_offregmem_reg ( Int off, Int regmem, Int reg )
+void VG_(emit_movzbl_offregmem_reg) ( Int off, Int regmem, Int reg )
 {
-   newEmit();
-   emitB ( 0x0F ); emitB ( 0xB6 ); /* MOVZBL */
-   emit_amode_offregmem_reg ( off, regmem, reg );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB6 ); /* MOVZBL */
+   VG_(emit_amode_offregmem_reg) ( off, regmem, reg );
    if (dis)
       VG_(printf)( "\n\t\tmovzbl\t0x%x(%s), %s\n", 
                    off, nameIReg(4,regmem), nameIReg(4,reg));
@@ -776,19 +824,19 @@
 
 static void emit_movzbl_regmem_reg ( Int reg1, Int reg2 )
 {
-   newEmit();
-   emitB ( 0x0F ); emitB ( 0xB6 ); /* MOVZBL */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB6 ); /* MOVZBL */
    emit_amode_regmem_reg ( reg1, reg2 );
    if (dis)
       VG_(printf)( "\n\t\tmovzbl\t(%s), %s\n", nameIReg(4,reg1), 
                                                nameIReg(4,reg2));
 }
 
-static void emit_movzwl_offregmem_reg ( Int off, Int areg, Int reg )
+void VG_(emit_movzwl_offregmem_reg) ( Int off, Int areg, Int reg )
 {
-   newEmit();
-   emitB ( 0x0F ); emitB ( 0xB7 ); /* MOVZWL */
-   emit_amode_offregmem_reg ( off, areg, reg );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB7 ); /* MOVZWL */
+   VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
       VG_(printf)( "\n\t\tmovzwl\t0x%x(%s), %s\n",
                    off, nameIReg(4,areg), nameIReg(4,reg));
@@ -796,8 +844,8 @@
 
 static void emit_movzwl_regmem_reg ( Int reg1, Int reg2 )
 {
-   newEmit();
-   emitB ( 0x0F ); emitB ( 0xB7 ); /* MOVZWL */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB7 ); /* MOVZWL */
    emit_amode_regmem_reg ( reg1, reg2 );
    if (dis)
       VG_(printf)( "\n\t\tmovzwl\t(%s), %s\n", nameIReg(4,reg1), 
@@ -811,9 +859,9 @@
 static void emit_get_fpu_state ( void )
 {
    Int off = 4 * VGOFF_(m_fpustate);
-   newEmit();
-   emitB ( 0xDD ); emitB ( 0xA5 ); /* frstor d32(%ebp) */
-   emitL ( off );
+   VG_(newEmit)();
+   VG_(emitB) ( 0xDD ); VG_(emitB) ( 0xA5 ); /* frstor d32(%ebp) */
+   VG_(emitL) ( off );
    if (dis)
       VG_(printf)("\n\t\tfrstor\t%d(%%ebp)\n", off );
 }
@@ -821,9 +869,9 @@
 static void emit_put_fpu_state ( void )
 {
    Int off = 4 * VGOFF_(m_fpustate);
-   newEmit();
-   emitB ( 0xDD ); emitB ( 0xB5 ); /* fnsave d32(%ebp) */
-   emitL ( off );
+   VG_(newEmit)();
+   VG_(emitB) ( 0xDD ); VG_(emitB) ( 0xB5 ); /* fnsave d32(%ebp) */
+   VG_(emitL) ( off );
    if (dis)
       VG_(printf)("\n\t\tfnsave\t%d(%%ebp)\n", off );
 }
@@ -831,9 +879,9 @@
 static void emit_fpu_no_mem ( UChar first_byte, 
                               UChar second_byte )
 {
-   newEmit();
-   emitB ( first_byte );
-   emitB ( second_byte );
+   VG_(newEmit)();
+   VG_(emitB) ( first_byte );
+   VG_(emitB) ( second_byte );
    if (dis)
       VG_(printf)("\n\t\tfpu-0x%x:0x%x\n", 
                   (UInt)first_byte, (UInt)second_byte );
@@ -843,8 +891,8 @@
                               UChar second_byte_masked, 
                               Int reg )
 {
-   newEmit();
-   emitB ( first_byte );
+   VG_(newEmit)();
+   VG_(emitB) ( first_byte );
    emit_amode_regmem_reg ( reg, second_byte_masked >> 3 );
    if (dis)
       VG_(printf)("\n\t\tfpu-0x%x:0x%x-(%s)\n", 
@@ -857,27 +905,26 @@
 /*--- misc instruction emitters                    ---*/
 /*----------------------------------------------------*/
 
-static void emit_call_reg ( Int reg )
-{
-   newEmit();
-   emitB ( 0xFF ); /* Grp5 */
-   emit_amode_ereg_greg ( reg, mkGrp5opcode(CALLM) );
-   if (dis)
+void VG_(emit_call_reg) ( Int reg )
+{           
+   VG_(newEmit)();
+   VG_(emitB) ( 0xFF ); /* Grp5 */
+   VG_(emit_amode_ereg_greg) ( reg, mkGrp5opcode(CALLM) );
+   if (dis) 
       VG_(printf)( "\n\t\tcall\t*%s\n", nameIReg(4,reg) );
-}
-
-
+}              
+         
 static void emit_call_star_EBP_off ( Int byte_off )
 {
-  newEmit();
+  VG_(newEmit)();
   if (byte_off < -128 || byte_off > 127) {
-     emitB ( 0xFF );
-     emitB ( 0x95 );
-     emitL ( byte_off );
+     VG_(emitB) ( 0xFF );
+     VG_(emitB) ( 0x95 );
+     VG_(emitL) ( byte_off );
   } else {
-     emitB ( 0xFF );
-     emitB ( 0x55 );
-     emitB ( byte_off );
+     VG_(emitB) ( 0xFF );
+     VG_(emitB) ( 0x55 );
+     VG_(emitB) ( byte_off );
   }
   if (dis)
      VG_(printf)( "\n\t\tcall * %d(%%ebp)\n", byte_off );
@@ -887,24 +934,24 @@
 static void emit_addlit8_offregmem ( Int lit8, Int regmem, Int off )
 {
    vg_assert(lit8 >= -128 && lit8 < 128);
-   newEmit();
-   emitB ( 0x83 ); /* Grp1 Ib,Ev */
-   emit_amode_offregmem_reg ( off, regmem, 
+   VG_(newEmit)();
+   VG_(emitB) ( 0x83 ); /* Grp1 Ib,Ev */
+   VG_(emit_amode_offregmem_reg) ( off, regmem, 
                               0 /* Grp1 subopcode for ADD */ );
-   emitB ( lit8 & 0xFF );
+   VG_(emitB) ( lit8 & 0xFF );
    if (dis)
       VG_(printf)( "\n\t\taddl $%d, %d(%s)\n", lit8, off, 
                                                nameIReg(4,regmem));
 }
 
 
-static void emit_add_lit_to_esp ( Int lit )
+void VG_(emit_add_lit_to_esp) ( Int lit )
 {
-   if (lit < -128 || lit > 127) VG_(panic)("emit_add_lit_to_esp");
-   newEmit();
-   emitB ( 0x83 );
-   emitB ( 0xC4 );
-   emitB ( lit & 0xFF );
+   if (lit < -128 || lit > 127) VG_(panic)("VG_(emit_add_lit_to_esp)");
+   VG_(newEmit)();
+   VG_(emitB) ( 0x83 );
+   VG_(emitB) ( 0xC4 );
+   VG_(emitB) ( lit & 0xFF );
    if (dis)
       VG_(printf)( "\n\t\taddl $%d, %%esp\n", lit );
 }
@@ -914,11 +961,11 @@
 {
    /* movb %al, 0(%esp) */
    /* 88442400              movb    %al, 0(%esp) */
-   newEmit();
-   emitB ( 0x88 );
-   emitB ( 0x44 );
-   emitB ( 0x24 );
-   emitB ( 0x00 );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x88 );
+   VG_(emitB) ( 0x44 );
+   VG_(emitB) ( 0x24 );
+   VG_(emitB) ( 0x00 );
    if (dis)
       VG_(printf)( "\n\t\tmovb %%al, 0(%%esp)\n" );
 }
@@ -927,11 +974,11 @@
 {
    /* movb 0(%esp), %al */
    /* 8A442400              movb    0(%esp), %al */
-   newEmit();
-   emitB ( 0x8A );
-   emitB ( 0x44 );
-   emitB ( 0x24 );
-   emitB ( 0x00 );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x8A );
+   VG_(emitB) ( 0x44 );
+   VG_(emitB) ( 0x24 );
+   VG_(emitB) ( 0x00 );
    if (dis)
       VG_(printf)( "\n\t\tmovb 0(%%esp), %%al\n" );
 }
@@ -940,12 +987,12 @@
 /* Emit a jump short with an 8-bit signed offset.  Note that the
    offset is that which should be added to %eip once %eip has been
    advanced over this insn.  */
-static void emit_jcondshort_delta ( Condcode cond, Int delta )
+void VG_(emit_jcondshort_delta) ( Condcode cond, Int delta )
 {
    vg_assert(delta >= -128 && delta <= 127);
-   newEmit();
-   emitB ( 0x70 + (UInt)cond );
-   emitB ( (UChar)delta );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x70 + (UInt)cond );
+   VG_(emitB) ( (UChar)delta );
    if (dis)
       VG_(printf)( "\n\t\tj%s-8\t%%eip+%d\n", 
                    VG_(nameCondcode)(cond), delta );
@@ -955,11 +1002,11 @@
 {
    Int off = 4 * VGOFF_(m_eflags);
    vg_assert(off >= 0 && off < 128);
-   newEmit();
-   emitB ( 0xFF ); /* PUSHL off(%ebp) */
-   emitB ( 0x75 );
-   emitB ( off );
-   emitB ( 0x9D ); /* POPFL */
+   VG_(newEmit)();
+   VG_(emitB) ( 0xFF ); /* PUSHL off(%ebp) */
+   VG_(emitB) ( 0x75 );
+   VG_(emitB) ( off );
+   VG_(emitB) ( 0x9D ); /* POPFL */
    if (dis)
       VG_(printf)( "\n\t\tpushl %d(%%ebp) ; popfl\n", off );
 }
@@ -968,20 +1015,20 @@
 {
    Int off = 4 * VGOFF_(m_eflags);
    vg_assert(off >= 0 && off < 128);
-   newEmit();
-   emitB ( 0x9C ); /* PUSHFL */
-   emitB ( 0x8F ); /* POPL vg_m_state.m_eflags */
-   emitB ( 0x45 );
-   emitB ( off );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x9C ); /* PUSHFL */
+   VG_(emitB) ( 0x8F ); /* POPL vg_m_state.m_eflags */
+   VG_(emitB) ( 0x45 );
+   VG_(emitB) ( off );
    if (dis)
       VG_(printf)( "\n\t\tpushfl ; popl %d(%%ebp)\n", off );
 }
 
 static void emit_setb_reg ( Int reg, Condcode cond )
 {
-   newEmit();
-   emitB ( 0x0F ); emitB ( 0x90 + (UChar)cond );
-   emit_amode_ereg_greg ( reg, 0 );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F ); VG_(emitB) ( 0x90 + (UChar)cond );
+   VG_(emit_amode_ereg_greg) ( reg, 0 );
    if (dis)
       VG_(printf)("\n\t\tset%s %s\n", 
                   VG_(nameCondcode)(cond), nameIReg(1,reg));
@@ -989,33 +1036,33 @@
 
 static void emit_ret ( void )
 {
-   newEmit();
-   emitB ( 0xC3 ); /* RET */
+   VG_(newEmit)();
+   VG_(emitB) ( 0xC3 ); /* RET */
    if (dis)
       VG_(printf)("\n\t\tret\n");
 }
 
-static void emit_pushal ( void )
+void VG_(emit_pushal) ( void )
 {
-   newEmit();
-   emitB ( 0x60 ); /* PUSHAL */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x60 ); /* PUSHAL */
    if (dis)
       VG_(printf)("\n\t\tpushal\n");
 }
 
-static void emit_popal ( void )
+void VG_(emit_popal) ( void )
 {
-   newEmit();
-   emitB ( 0x61 ); /* POPAL */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x61 ); /* POPAL */
    if (dis)
       VG_(printf)("\n\t\tpopal\n");
 }
 
 static void emit_lea_litreg_reg ( UInt lit, Int regmem, Int reg )
 {
-   newEmit();
-   emitB ( 0x8D ); /* LEA M,Gv */
-   emit_amode_offregmem_reg ( (Int)lit, regmem, reg );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x8D ); /* LEA M,Gv */
+   VG_(emit_amode_offregmem_reg) ( (Int)lit, regmem, reg );
    if (dis)
       VG_(printf)("\n\t\tleal 0x%x(%s), %s\n",
                   lit, nameIReg(4,regmem), nameIReg(4,reg) );
@@ -1024,8 +1071,8 @@
 static void emit_lea_sib_reg ( UInt lit, Int scale,
 			       Int regbase, Int regindex, Int reg )
 {
-   newEmit();
-   emitB ( 0x8D ); /* LEA M,Gv */
+   VG_(newEmit)();
+   VG_(emitB) ( 0x8D ); /* LEA M,Gv */
    emit_amode_sib_reg ( (Int)lit, scale, regbase, regindex, reg );
    if (dis)
       VG_(printf)("\n\t\tleal 0x%x(%s,%s,%d), %s\n",
@@ -1034,17 +1081,51 @@
                        nameIReg(4,reg) );
 }
 
-static void emit_AMD_prefetch_reg ( Int reg )
+void VG_(emit_AMD_prefetch_reg) ( Int reg )
 {
-   newEmit();
-   emitB ( 0x0F );
-   emitB ( 0x0D );
+   VG_(newEmit)();
+   VG_(emitB) ( 0x0F );
+   VG_(emitB) ( 0x0D );
    emit_amode_regmem_reg ( reg, 1 /* 0 is prefetch; 1 is prefetchw */ );
    if (dis)
       VG_(printf)("\n\t\tamd-prefetch (%s)\n", nameIReg(4,reg) );
 }
 
 /*----------------------------------------------------*/
+/*--- Helper offset -> addr translation            ---*/
+/*----------------------------------------------------*/
+
+/* Finds the baseBlock offset of a skin-specified helper.
+ * Searches through compacts first, then non-compacts. */
+Int VG_(helper_offset)(Addr a)
+{
+   Int i;
+
+   for (i = 0; i < VG_(n_compact_helpers); i++)
+      if (VG_(compact_helper_addrs)[i] == a)
+         return VG_(compact_helper_offsets)[i];
+   for (i = 0; i < VG_(n_noncompact_helpers); i++)
+      if (VG_(noncompact_helper_addrs)[i] == a)
+         return VG_(noncompact_helper_offsets)[i];
+
+   /* Shouldn't get here */
+   VG_(printf)(
+      "\nCouldn't find offset of helper from its address (%p).\n"
+      "A helper function probably used hasn't been registered?\n\n", a);
+
+   VG_(printf)("      compact helpers: ");
+   for (i = 0; i < VG_(n_compact_helpers); i++)
+      VG_(printf)("%p ", VG_(compact_helper_addrs)[i]);
+
+   VG_(printf)("\n  non-compact helpers: ");
+   for (i = 0; i < VG_(n_noncompact_helpers); i++)
+      VG_(printf)("%p ", VG_(noncompact_helper_addrs)[i]);
+
+   VG_(printf)("\n");
+   VG_(skin_error)("Unfound helper");
+}
+
+/*----------------------------------------------------*/
 /*--- Instruction synthesisers                     ---*/
 /*----------------------------------------------------*/
 
@@ -1057,8 +1138,7 @@
 /* Synthesise a call to *baseBlock[offset], ie,
    call * (4 x offset)(%ebp).
 */
-static void synth_call_baseBlock_method ( Bool ensure_shortform, 
-                                          Int word_offset )
+void VG_(synth_call) ( Bool ensure_shortform, Int word_offset )
 {
    vg_assert(word_offset >= 0);
    vg_assert(word_offset < VG_BASEBLOCK_WORDS);
@@ -1067,42 +1147,237 @@
    emit_call_star_EBP_off ( 4 * word_offset );
 }
 
-static void synth_ccall_saveRegs ( void )
+static void maybe_emit_movl_reg_reg ( UInt src, UInt dst )
 {
-   emit_pushv_reg ( 4, R_EAX ); 
-   emit_pushv_reg ( 4, R_ECX ); 
-   emit_pushv_reg ( 4, R_EDX ); 
+   if (src != dst) {
+      VG_(emit_movv_reg_reg) ( 4, src, dst );
+      ccall_arg_setup_instrs++;
+   }
 }
+
+/* 'maybe' because it is sometimes skipped eg. for "movl %eax,%eax" */
+static void maybe_emit_movl_litOrReg_reg ( UInt litOrReg, Tag tag, UInt reg )
+{
+   if (RealReg == tag) {
+      maybe_emit_movl_reg_reg ( litOrReg, reg );
+   } else if (Literal == tag) {
+      VG_(emit_movv_lit_reg) ( 4, litOrReg, reg );
+      ccall_arg_setup_instrs++;
+   }
+   else
+      VG_(panic)("emit_movl_litOrReg_reg: unexpected tag");
+}
+
+static
+void emit_swapl_arg_regs ( UInt reg1, UInt reg2 )
+{
+   if        (R_EAX == reg1) {
+      VG_(emit_swapl_reg_EAX) ( reg2 );
+   } else if (R_EAX == reg2) {
+      VG_(emit_swapl_reg_EAX) ( reg1 );
+   } else {
+      emit_swapl_reg_reg ( reg1, reg2 );
+   }
+   ccall_arg_setup_instrs++;
+}
+
+static
+void emit_two_regs_args_setup ( UInt src1, UInt src2, UInt dst1, UInt dst2)
+{
+   if        (dst1 != src2) {
+      maybe_emit_movl_reg_reg ( src1, dst1 );
+      maybe_emit_movl_reg_reg ( src2, dst2 );
+
+   } else if (dst2 != src1) {
+      maybe_emit_movl_reg_reg ( src2, dst2 );
+      maybe_emit_movl_reg_reg ( src1, dst1 );
+
+   } else {
+      /* swap to break cycle */
+      emit_swapl_arg_regs ( dst1, dst2 );
+   }
+}
+
+static
+void emit_three_regs_args_setup ( UInt src1, UInt src2, UInt src3,
+                                  UInt dst1, UInt dst2, UInt dst3)
+{
+   if        (dst1 != src2 && dst1 != src3) {
+      maybe_emit_movl_reg_reg ( src1, dst1 );
+      emit_two_regs_args_setup ( src2, src3, dst2, dst3 );
+
+   } else if (dst2 != src1 && dst2 != src3) {
+      maybe_emit_movl_reg_reg ( src2, dst2 );
+      emit_two_regs_args_setup ( src1, src3, dst1, dst3 );
+
+   } else if (dst3 != src1 && dst3 != src2) {
+      maybe_emit_movl_reg_reg ( src3, dst3 );
+      emit_two_regs_args_setup ( src1, src2, dst1, dst2 );
+      
+   } else {
+      /* break cycle */
+      if        (dst1 == src2 && dst2 == src3 && dst3 == src1) {
+         emit_swapl_arg_regs ( dst1, dst2 );
+         emit_swapl_arg_regs ( dst1, dst3 );
+
+      } else if (dst1 == src3 && dst2 == src1 && dst3 == src2) {
+         emit_swapl_arg_regs ( dst1, dst3 );
+         emit_swapl_arg_regs ( dst1, dst2 );
+
+      } else {
+         VG_(panic)("impossible 3-cycle");
+      }
+   }
+}
+
+static
+void emit_two_regs_or_lits_args_setup ( UInt argv[], Tag tagv[],
+                                        UInt src1, UInt src2,
+                                        UInt dst1, UInt dst2)
+{
+   /* If either are lits, order doesn't matter */
+   if (Literal == tagv[src1] || Literal == tagv[src2]) {
+      maybe_emit_movl_litOrReg_reg ( argv[src1], tagv[src1], dst1 );
+      maybe_emit_movl_litOrReg_reg ( argv[src2], tagv[src2], dst2 );
+
+   } else {
+      emit_two_regs_args_setup ( argv[src1], argv[src2], dst1, dst2 );
+   }
+}
+
+static
+void emit_three_regs_or_lits_args_setup ( UInt argv[], Tag tagv[],
+                                          UInt src1, UInt src2, UInt src3,
+                                          UInt dst1, UInt dst2, UInt dst3)
+{
+   // SSS: fix this eventually -- make STOREV use two RealRegs?
+   /* Not supporting literals for 3-arg C functions -- they're only used
+      by STOREV which has 2 args */
+   vg_assert(RealReg == tagv[src1] &&
+             RealReg == tagv[src2] &&
+             RealReg == tagv[src3]);
+   emit_three_regs_args_setup ( argv[src1], argv[src2], argv[src3],
+                                dst1, dst2, dst3 );
+}
+
+/* Synthesise a call to a C function `fn' (which must be registered in
+   baseBlock) doing all the reg saving and arg handling work.
+ 
+   WARNING:  a UInstr should *not* be translated with synth_ccall followed
+   by some other x86 assembly code;  vg_liveness_analysis() doesn't expect
+   such behaviour and everything will fall over.
+ */
+void VG_(synth_ccall) ( Addr fn, Int argc, Int regparms_n, UInt argv[],
+                        Tag tagv[], Int ret_reg,
+                        RRegSet regs_live_before, RRegSet regs_live_after )
+{
+   Int  i;
+   Int  stack_used = 0;
+   Bool preserve_eax, preserve_ecx, preserve_edx;
+
+   vg_assert(0 <= regparms_n && regparms_n <= 3);
+
+   ccalls++;
+
+   /* If %e[acd]x is live before and after the C call, save/restore it.
+      Unless the return values clobbers the reg;  in this case we must not
+      save/restore the reg, because the restore would clobber the return
+      value.  (Before and after the UInstr really constitute separate live
+      ranges, but you miss this if you don't consider what happens during
+      the UInstr.) */
+#  define PRESERVE_REG(realReg)   \
+   (IS_RREG_LIVE(VG_(realRegNumToRank)(realReg), regs_live_before) &&   \
+    IS_RREG_LIVE(VG_(realRegNumToRank)(realReg), regs_live_after)  &&   \
+    ret_reg != realReg)
+
+   preserve_eax = PRESERVE_REG(R_EAX);
+   preserve_ecx = PRESERVE_REG(R_ECX);
+   preserve_edx = PRESERVE_REG(R_EDX);
+
+#  undef PRESERVE_REG
+
+   /* Save caller-save regs as required */
+   if (preserve_eax) { VG_(emit_pushv_reg) ( 4, R_EAX ); ccall_reg_saves++; }
+   if (preserve_ecx) { VG_(emit_pushv_reg) ( 4, R_ECX ); ccall_reg_saves++; }
+   if (preserve_edx) { VG_(emit_pushv_reg) ( 4, R_EDX ); ccall_reg_saves++; }
+
+   /* Args are passed in two groups: (a) via stack (b) via regs.  regparms_n
+      is the number of args passed in regs (maximum 3 for GCC on x86). */
+
+   ccall_args += argc;
    
-static void synth_ccall_pushOneArg ( Int r1 )
-{
-   emit_pushv_reg ( 4, r1 );
-}
+   /* First push stack args (RealRegs or Literals) in reverse order. */
+   for (i = argc-1; i >= regparms_n; i--) {
+      switch (tagv[i]) {
+      case RealReg:
+         VG_(emit_pushv_reg) ( 4, argv[i] );
+         break;
+      case Literal:
+         /* Use short form of pushl if possible. */
+         if (argv[i] == VG_(extend_s_8to32) ( argv[i] ))
+            VG_(emit_pushl_lit8) ( VG_(extend_s_8to32)(argv[i]) );
+         else
+            VG_(emit_pushl_lit32)( argv[i] );
+         break;
+      default:
+         VG_(printf)("tag=%d\n", tagv[i]);
+         VG_(panic)("VG_(synth_ccall): bad tag");
+      }
+      stack_used += 4;
+      ccall_arg_setup_instrs++;
+   }
 
-static void synth_ccall_pushTwoArgs ( Int r1, Int r2 )
-{
-   /* must push in reverse order */
-   emit_pushv_reg ( 4, r2 );
-   emit_pushv_reg ( 4, r1 );
-}
+   /* Then setup args in registers (arg[123] --> %e[adc]x;  note order!).
+      If moving values between registers, be careful not to clobber any on
+      the way.  Happily we can use xchgl to swap registers.
+   */
+   switch (regparms_n) {
 
-/* Synthesise a call to *baseBlock[offset], ie,
-   call * (4 x offset)(%ebp) with arguments
-*/
-static void synth_ccall_call_clearStack_restoreRegs ( Int word_offset, 
-                                                      UInt n_args_bytes )
-{
-   vg_assert(word_offset >= 0);
-   vg_assert(word_offset < VG_BASEBLOCK_WORDS);
-   vg_assert(n_args_bytes <= 12);           /* Max 3 word-sized args */
-   vg_assert(0 == (n_args_bytes & 0x3));    /* Divisible by four */
+   /* Trickiest.  Args passed in %eax, %edx, and %ecx. */
+   case 3:
+      emit_three_regs_or_lits_args_setup ( argv, tagv, 0, 1, 2,
+                                           R_EAX, R_EDX, R_ECX );
+      break;
 
-   emit_call_star_EBP_off ( 4 * word_offset );
-   if ( 0 != n_args_bytes )
-      emit_add_lit_to_esp ( n_args_bytes );
-   emit_popv_reg ( 4, R_EDX ); 
-   emit_popv_reg ( 4, R_ECX ); 
-   emit_popv_reg ( 4, R_EAX ); 
+   /* Less-tricky.  Args passed in %eax and %edx. */
+   case 2:
+      emit_two_regs_or_lits_args_setup ( argv, tagv, 0, 1, R_EAX, R_EDX );
+      break;
+      
+   /* Easy.  Just move arg1 into %eax (if not already in there). */
+   case 1:  
+      maybe_emit_movl_litOrReg_reg ( argv[0], tagv[0], R_EAX );
+      break;
+
+   case 0:
+      break;
+
+   default:
+      VG_(panic)("VG_(synth_call): regparms_n value not in range 0..3");
+   }
+   
+   /* Call the function */
+   VG_(synth_call) ( False, VG_(helper_offset) ( fn ) );
+
+   /* Clear any args from stack */
+   if (0 != stack_used) {
+      VG_(emit_add_lit_to_esp) ( stack_used );
+      ccall_stack_clears++;
+   }
+
+   /* Move return value into ret_reg if necessary and not already there */
+   if (INVALID_REALREG != ret_reg) {
+      ccall_retvals++;
+      if (R_EAX != ret_reg) {
+         VG_(emit_movv_reg_reg) ( 4, R_EAX, ret_reg );
+         ccall_retval_movs++;
+      }
+   }
+
+   /* Restore live caller-save regs as required */
+   if (preserve_edx) VG_(emit_popv_reg) ( 4, R_EDX ); 
+   if (preserve_ecx) VG_(emit_popv_reg) ( 4, R_ECX ); 
+   if (preserve_eax) VG_(emit_popv_reg) ( 4, R_EAX ); 
 }
 
 static void load_ebp_from_JmpKind ( JmpKind jmpkind )
@@ -1110,15 +1385,15 @@
    switch (jmpkind) {
       case JmpBoring: 
          break;
-      case JmpCall:
       case JmpRet: 
-         emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_STKADJ, R_EBP );
+         break;
+      case JmpCall:
          break;
       case JmpSyscall: 
-         emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_SYSCALL, R_EBP );
+         VG_(emit_movv_lit_reg) ( 4, VG_TRC_EBP_JMP_SYSCALL, R_EBP );
          break;
       case JmpClientReq: 
-         emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_CLIENTREQ, R_EBP );
+         VG_(emit_movv_lit_reg) ( 4, VG_TRC_EBP_JMP_CLIENTREQ, R_EBP );
          break;
       default: 
          VG_(panic)("load_ebp_from_JmpKind");
@@ -1133,7 +1408,7 @@
 {
    load_ebp_from_JmpKind ( jmpkind );
    if (reg != R_EAX)
-      emit_movv_reg_reg ( 4, reg, R_EAX );
+      VG_(emit_movv_reg_reg) ( 4, reg, R_EAX );
    emit_ret();
 }
 
@@ -1142,7 +1417,7 @@
 static void synth_jmp_lit ( Addr addr, JmpKind jmpkind )
 {
    load_ebp_from_JmpKind ( jmpkind );
-   emit_movv_lit_reg ( 4, addr, R_EAX );
+   VG_(emit_movv_lit_reg) ( 4, addr, R_EAX );
    emit_ret();
 }
 
@@ -1163,7 +1438,7 @@
    6                    xyxyxy:
   */
    emit_get_eflags();
-   emit_jcondshort_delta ( invertCondition(cond), 5+1 );
+   VG_(emit_jcondshort_delta) ( invertCondition(cond), 5+1 );
    synth_jmp_lit ( addr, JmpBoring );
 }
 
@@ -1176,8 +1451,8 @@
       000a C3                    ret
       next:
    */
-   emit_cmpl_zero_reg ( reg );
-   emit_jcondshort_delta ( CondNZ, 5+1 );
+   VG_(emit_cmpl_zero_reg) ( reg );
+   VG_(emit_jcondshort_delta) ( CondNZ, 5+1 );
    synth_jmp_lit ( addr, JmpBoring );
 }
 
@@ -1186,7 +1461,7 @@
 {
    /* Load the zero-extended literal into reg, at size l,
       regardless of the request size. */
-   emit_movv_lit_reg ( 4, lit, reg );
+   VG_(emit_movv_lit_reg) ( 4, lit, reg );
 }
 
 
@@ -1204,9 +1479,9 @@
 static void synth_mov_offregmem_reg ( Int size, Int off, Int areg, Int reg ) 
 {
    switch (size) {
-      case 4: emit_movv_offregmem_reg ( 4, off, areg, reg ); break;
-      case 2: emit_movzwl_offregmem_reg ( off, areg, reg ); break;
-      case 1: emit_movzbl_offregmem_reg ( off, areg, reg ); break;
+      case 4: VG_(emit_movv_offregmem_reg) ( 4, off, areg, reg ); break;
+      case 2: VG_(emit_movzwl_offregmem_reg) ( off, areg, reg ); break;
+      case 1: VG_(emit_movzbl_offregmem_reg) ( off, areg, reg ); break;
       default: VG_(panic)("synth_mov_offregmem_reg");
    }  
 }
@@ -1216,15 +1491,15 @@
                                       Int off, Int areg )
 {
    switch (size) {
-      case 4: emit_movv_reg_offregmem ( 4, reg, off, areg ); break;
-      case 2: emit_movv_reg_offregmem ( 2, reg, off, areg ); break;
+      case 4: VG_(emit_movv_reg_offregmem) ( 4, reg, off, areg ); break;
+      case 2: VG_(emit_movv_reg_offregmem) ( 2, reg, off, areg ); break;
       case 1: if (reg < 4) {
-                 emit_movb_reg_offregmem ( reg, off, areg ); 
+                 VG_(emit_movb_reg_offregmem) ( reg, off, areg ); 
               }
               else {
-                 emit_swapl_reg_EAX ( reg );
-                 emit_movb_reg_offregmem ( R_AL, off, areg );
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
+                 VG_(emit_movb_reg_offregmem) ( R_AL, off, areg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
               }
               break;
       default: VG_(panic)("synth_mov_reg_offregmem");
@@ -1261,23 +1536,23 @@
    /* NB! opcode is a uinstr opcode, not an x86 one! */
    switch (size) {
       case 4: //if (rd_cc) emit_get_eflags();   (never needed --njn)
-              emit_unaryopv_reg ( 4, opcode, reg );
+              VG_(emit_unaryopv_reg) ( 4, opcode, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 2: //if (rd_cc) emit_get_eflags();   (never needed --njn)
-              emit_unaryopv_reg ( 2, opcode, reg );
+              VG_(emit_unaryopv_reg) ( 2, opcode, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 1: if (reg < 4) {
                  //if (rd_cc) emit_get_eflags();    (never needed --njn)
-                 emit_unaryopb_reg ( opcode, reg );
+                 VG_(emit_unaryopb_reg) ( opcode, reg );
                  if (wr_cc) emit_put_eflags();
               } else {
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
                  //if (rd_cc) emit_get_eflags();    (never needed --njn)
-                 emit_unaryopb_reg ( opcode, R_AL );
+                 VG_(emit_unaryopb_reg) ( opcode, R_AL );
                  if (wr_cc) emit_put_eflags();
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
               }
               break;
       default: VG_(panic)("synth_unaryop_reg");
@@ -1293,11 +1568,11 @@
    /* NB! opcode is a uinstr opcode, not an x86 one! */
    switch (size) {
       case 4: if (rd_cc) emit_get_eflags();
-              emit_nonshiftopv_reg_reg ( 4, opcode, reg1, reg2 );
+              VG_(emit_nonshiftopv_reg_reg) ( 4, opcode, reg1, reg2 );
               if (wr_cc) emit_put_eflags();
               break;
       case 2: if (rd_cc) emit_get_eflags();
-              emit_nonshiftopv_reg_reg ( 2, opcode, reg1, reg2 );
+              VG_(emit_nonshiftopv_reg_reg) ( 2, opcode, reg1, reg2 );
               if (wr_cc) emit_put_eflags();
               break;
       case 1: { /* Horrible ... */
@@ -1377,11 +1652,11 @@
             emit_nonshiftopb_offregmem_reg ( opcode, off, areg, reg );
             if (wr_cc) emit_put_eflags();
          } else {
-            emit_swapl_reg_EAX ( reg );
+            VG_(emit_swapl_reg_EAX) ( reg );
             if (rd_cc) emit_get_eflags();
             emit_nonshiftopb_offregmem_reg ( opcode, off, areg, R_AL );
             if (wr_cc) emit_put_eflags();
-            emit_swapl_reg_EAX ( reg );
+            VG_(emit_swapl_reg_EAX) ( reg );
          }
          break;
       default: 
@@ -1396,11 +1671,11 @@
 {
    switch (size) {
       case 4: if (rd_cc) emit_get_eflags();
-              emit_nonshiftopv_lit_reg ( 4, opcode, lit, reg );
+              VG_(emit_nonshiftopv_lit_reg) ( 4, opcode, lit, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 2: if (rd_cc) emit_get_eflags();
-              emit_nonshiftopv_lit_reg ( 2, opcode, lit, reg );
+              VG_(emit_nonshiftopv_lit_reg) ( 2, opcode, lit, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 1: if (reg < 4) {
@@ -1408,11 +1683,11 @@
                  emit_nonshiftopb_lit_reg ( opcode, lit, reg );
                  if (wr_cc) emit_put_eflags();
               } else {
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
                  if (rd_cc) emit_get_eflags();
                  emit_nonshiftopb_lit_reg ( opcode, lit, R_AL );
                  if (wr_cc) emit_put_eflags();
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
               }
               break;
       default: VG_(panic)("synth_nonshiftop_lit_reg");
@@ -1424,19 +1699,19 @@
 {
    switch (size) {
       case 4: 
-         emit_pushv_reg ( 4, reg ); 
+         VG_(emit_pushv_reg) ( 4, reg ); 
          break;
       case 2: 
-         emit_pushv_reg ( 2, reg ); 
+         VG_(emit_pushv_reg) ( 2, reg ); 
          break;
       /* Pray that we don't have to generate this really cruddy bit of
          code very often.  Could do better, but can I be bothered? */
       case 1: 
          vg_assert(reg != R_ESP); /* duh */
-         emit_add_lit_to_esp(-1);
-         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         VG_(emit_add_lit_to_esp)(-1);
+         if (reg != R_EAX) VG_(emit_swapl_reg_EAX) ( reg );
          emit_movb_AL_zeroESPmem();
-         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         if (reg != R_EAX) VG_(emit_swapl_reg_EAX) ( reg );
          break;
      default: 
          VG_(panic)("synth_push_reg");
@@ -1448,18 +1723,18 @@
 {
    switch (size) {
       case 4: 
-         emit_popv_reg ( 4, reg ); 
+         VG_(emit_popv_reg) ( 4, reg ); 
          break;
       case 2: 
-         emit_popv_reg ( 2, reg ); 
+         VG_(emit_popv_reg) ( 2, reg ); 
          break;
       case 1:
          /* Same comment as above applies. */
          vg_assert(reg != R_ESP); /* duh */
-         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
+         if (reg != R_EAX) VG_(emit_swapl_reg_EAX) ( reg );
          emit_movb_zeroESPmem_AL();
-         if (reg != R_EAX) emit_swapl_reg_EAX ( reg );
-         emit_add_lit_to_esp(1);
+         if (reg != R_EAX) VG_(emit_swapl_reg_EAX) ( reg );
+         VG_(emit_add_lit_to_esp)(1);
          break;
       default: VG_(panic)("synth_pop_reg");
    }
@@ -1491,11 +1766,11 @@
 {
    switch (size) {
       case 4: if (rd_cc) emit_get_eflags();
-              emit_shiftopv_lit_reg ( 4, opcode, lit, reg );
+              VG_(emit_shiftopv_lit_reg) ( 4, opcode, lit, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 2: if (rd_cc) emit_get_eflags();
-              emit_shiftopv_lit_reg ( 2, opcode, lit, reg );
+              VG_(emit_shiftopv_lit_reg) ( 2, opcode, lit, reg );
               if (wr_cc) emit_put_eflags();
               break;
       case 1: if (reg < 4) {
@@ -1503,11 +1778,11 @@
                  emit_shiftopb_lit_reg ( opcode, lit, reg );
                  if (wr_cc) emit_put_eflags();
               } else {
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
                  if (rd_cc) emit_get_eflags();
                  emit_shiftopb_lit_reg ( opcode, lit, R_AL );
                  if (wr_cc) emit_put_eflags();
-                 emit_swapl_reg_EAX ( reg );
+                 VG_(emit_swapl_reg_EAX) ( reg );
               }
               break;
       default: VG_(panic)("synth_shiftop_lit_reg");
@@ -1521,9 +1796,9 @@
    if (reg < 4) {
       emit_setb_reg ( reg, cond );
    } else {
-      emit_swapl_reg_EAX ( reg );
+      VG_(emit_swapl_reg_EAX) ( reg );
       emit_setb_reg ( R_AL, cond );
-      emit_swapl_reg_EAX ( reg );
+      VG_(emit_swapl_reg_EAX) ( reg );
    }
 }
 
@@ -1555,42 +1830,18 @@
 static void synth_cmovl_reg_reg ( Condcode cond, Int src, Int dst )
 {
    emit_get_eflags();
-   emit_jcondshort_delta ( invertCondition(cond), 
+   VG_(emit_jcondshort_delta) ( invertCondition(cond), 
                            2 /* length of the next insn */ );
    emit_movl_reg_reg ( src, dst );
 }
 
 
-/* Synthesise a minimal test (and which discards result) of reg32
-   against lit.  It's always safe do simply
-      emit_testv_lit_reg ( 4, lit, reg32 )
-   but we try to do better when possible.
-*/
-static void synth_minimal_test_lit_reg ( UInt lit, Int reg32 )
-{
-   if ((lit & 0xFFFFFF00) == 0 && reg32 < 4) {
-      /* We can get away with a byte insn. */
-      emit_testb_lit_reg ( lit, reg32 );
-   }
-   else 
-   if ((lit & 0xFFFF0000) == 0) {
-      /* Literal fits in 16 bits; do a word insn. */
-      emit_testv_lit_reg ( 2, lit, reg32 );
-   }
-   else {
-      /* Totally general ... */
-      emit_testv_lit_reg ( 4, lit, reg32 );
-   }
-}
-
-
 /*----------------------------------------------------*/
 /*--- Top level of the uinstr -> x86 translation.  ---*/
 /*----------------------------------------------------*/
 
 /* Return the byte offset from %ebp (ie, into baseBlock)
    for the specified ArchReg or SpillNo. */
-
 static Int spillOrArchOffset ( Int size, Tag tag, UInt value )
 {
    if (tag == SpillNo) {
@@ -1621,14 +1872,15 @@
    VG_(panic)("spillOrArchOffset");
 }
 
-
 static Int eflagsOffset ( void )
 {
    return 4 * VGOFF_(m_eflags);
 }
 
 
-static Int shadowOffset ( Int arch )
+/* Return the byte offset from %ebp (ie, into baseBlock)
+   for the specified shadow register */
+Int VG_(shadowRegOffset) ( Int arch )
 {
    switch (arch) {
       case R_EAX: return 4 * VGOFF_(sh_eax);
@@ -1643,539 +1895,44 @@
    }
 }
 
-
-static Int shadowFlagsOffset ( void )
+Int VG_(shadowFlagsOffset) ( void )
 {
    return 4 * VGOFF_(sh_eflags);
 }
 
 
-static void synth_LOADV ( Int sz, Int a_reg, Int tv_reg )
-{
-   Int i, j, helper_offw;
-   Int pushed[VG_MAX_REALREGS+2];
-   Int n_pushed;
-   switch (sz) {
-      case 4: helper_offw = VGOFF_(helperc_LOADV4); break;
-      case 2: helper_offw = VGOFF_(helperc_LOADV2); break;
-      case 1: helper_offw = VGOFF_(helperc_LOADV1); break;
-      default: VG_(panic)("synth_LOADV");
-   }
-   n_pushed = 0;
-   for (i = 0; i < VG_MAX_REALREGS; i++) {
-      j = VG_(rankToRealRegNo) ( i );
-      if (VG_CALLEE_SAVED(j)) continue;
-      if (j == tv_reg || j == a_reg) continue;
-      emit_pushv_reg ( 4, j );
-      pushed[n_pushed++] = j;
-   }
-   emit_pushv_reg ( 4, a_reg );
-   pushed[n_pushed++] = a_reg;
-   vg_assert(n_pushed <= VG_MAX_REALREGS+1);
-
-   synth_call_baseBlock_method ( False, helper_offw );
-   /* Result is in %eax; we need to get it to tv_reg. */
-   if (tv_reg != R_EAX)
-      emit_movv_reg_reg ( 4, R_EAX, tv_reg );
-
-   while (n_pushed > 0) {
-      n_pushed--;
-      if (pushed[n_pushed] == tv_reg) {
-         emit_add_lit_to_esp ( 4 );
-      } else {
-         emit_popv_reg ( 4, pushed[n_pushed] );
-      }
-   }
-}
-
-
-static void synth_STOREV ( Int sz,
-                           Int tv_tag, Int tv_val,
-                           Int a_reg )
-{
-   Int i, j, helper_offw;
-   vg_assert(tv_tag == RealReg || tv_tag == Literal);
-   switch (sz) {
-      case 4: helper_offw = VGOFF_(helperc_STOREV4); break;
-      case 2: helper_offw = VGOFF_(helperc_STOREV2); break;
-      case 1: helper_offw = VGOFF_(helperc_STOREV1); break;
-      default: VG_(panic)("synth_STOREV");
-   }
-   for (i = 0; i < VG_MAX_REALREGS; i++) {
-      j = VG_(rankToRealRegNo) ( i );
-      if (VG_CALLEE_SAVED(j)) continue;
-      if ((tv_tag == RealReg && j == tv_val) || j == a_reg) continue;
-      emit_pushv_reg ( 4, j );
-   }
-   if (tv_tag == RealReg) {
-      emit_pushv_reg ( 4, tv_val );
-   } else {
-     if (tv_val == VG_(extend_s_8to32)(tv_val))
-        emit_pushl_lit8 ( VG_(extend_s_8to32)(tv_val) );
-     else
-        emit_pushl_lit32(tv_val);
-   }
-   emit_pushv_reg ( 4, a_reg );
-   synth_call_baseBlock_method ( False, helper_offw );
-   emit_popv_reg ( 4, a_reg );
-   if (tv_tag == RealReg) {
-      emit_popv_reg ( 4, tv_val );
-   } else {
-      emit_add_lit_to_esp ( 4 );
-   }
-   for (i = VG_MAX_REALREGS-1; i >= 0; i--) {
-      j = VG_(rankToRealRegNo) ( i );
-      if (VG_CALLEE_SAVED(j)) continue;
-      if ((tv_tag == RealReg && j == tv_val) || j == a_reg) continue;
-      emit_popv_reg ( 4, j );
-   }
-}
-
 
 static void synth_WIDEN_signed ( Int sz_src, Int sz_dst, Int reg )
 {
    if (sz_src == 1 && sz_dst == 4) {
-      emit_shiftopv_lit_reg ( 4, SHL, 24, reg );
-      emit_shiftopv_lit_reg ( 4, SAR, 24, reg );
+      VG_(emit_shiftopv_lit_reg) ( 4, SHL, 24, reg );
+      VG_(emit_shiftopv_lit_reg) ( 4, SAR, 24, reg );
    }
    else if (sz_src == 2 && sz_dst == 4) {
-      emit_shiftopv_lit_reg ( 4, SHL, 16, reg );
-      emit_shiftopv_lit_reg ( 4, SAR, 16, reg );
+      VG_(emit_shiftopv_lit_reg) ( 4, SHL, 16, reg );
+      VG_(emit_shiftopv_lit_reg) ( 4, SAR, 16, reg );
    }
    else if (sz_src == 1 && sz_dst == 2) {
-      emit_shiftopv_lit_reg ( 2, SHL, 8, reg );
-      emit_shiftopv_lit_reg ( 2, SAR, 8, reg );
+      VG_(emit_shiftopv_lit_reg) ( 2, SHL, 8, reg );
+      VG_(emit_shiftopv_lit_reg) ( 2, SAR, 8, reg );
    }
    else
       VG_(panic)("synth_WIDEN");
 }
 
 
-static void synth_SETV ( Int sz, Int reg )
+static void synth_handle_esp_assignment ( Int i, Int reg,
+                                          RRegSet regs_live_before,
+                                          RRegSet regs_live_after )
 {
-   UInt val;
-   switch (sz) {
-      case 4: val = 0x00000000; break;
-      case 2: val = 0xFFFF0000; break;
-      case 1: val = 0xFFFFFF00; break;
-      case 0: val = 0xFFFFFFFE; break;
-      default: VG_(panic)("synth_SETV");
-   }
-   emit_movv_lit_reg ( 4, val, reg );
+   UInt argv[] = { reg };
+   Tag  tagv[] = { RealReg };
+
+   VG_(synth_ccall) ( (Addr) VG_(handle_esp_assignment), 1, 1, argv, tagv, 
+                      INVALID_REALREG, regs_live_before, regs_live_after);
 }
 
 
-static void synth_TESTV ( Int sz, Int tag, Int val )
-{
-   vg_assert(tag == ArchReg || tag == RealReg);
-   if (tag == ArchReg) {
-      switch (sz) {
-         case 4: 
-            emit_testv_lit_offregmem ( 
-               4, 0xFFFFFFFF, shadowOffset(val), R_EBP );
-            break;
-         case 2: 
-            emit_testv_lit_offregmem ( 
-               4, 0x0000FFFF, shadowOffset(val), R_EBP );
-            break;
-         case 1:
-            if (val < 4) {
-               emit_testv_lit_offregmem ( 
-                  4, 0x000000FF, shadowOffset(val), R_EBP );
-            } else {
-               emit_testv_lit_offregmem ( 
-                  4, 0x0000FF00, shadowOffset(val-4), R_EBP );
-            }
-            break;
-         case 0: 
-            /* should never happen */
-         default: 
-            VG_(panic)("synth_TESTV(ArchReg)");
-      }
-   } else {
-      switch (sz) {
-         case 4:
-            /* Works, but holds the entire 32-bit literal, hence
-               generating a 6-byte insn.  We want to know if any bits
-               in the reg are set, but since this is for the full reg,
-               we might as well compare it against zero, which can be
-               done with a shorter insn. */
-            /* synth_minimal_test_lit_reg ( 0xFFFFFFFF, val ); */
-            emit_cmpl_zero_reg ( val );
-            break;
-         case 2:
-            synth_minimal_test_lit_reg ( 0x0000FFFF, val );
-            break;
-         case 1:
-            synth_minimal_test_lit_reg ( 0x000000FF, val );
-            break;
-         case 0:
-            synth_minimal_test_lit_reg ( 0x00000001, val );
-            break;
-         default: 
-            VG_(panic)("synth_TESTV(RealReg)");
-      }
-   }
-   emit_jcondshort_delta ( CondZ, 3 );
-   synth_call_baseBlock_method (
-      True, /* needed to guarantee that this insn is indeed 3 bytes long */
-      (sz==4 ? VGOFF_(helper_value_check4_fail)
-             : (sz==2 ? VGOFF_(helper_value_check2_fail)
-                      : sz == 1 ? VGOFF_(helper_value_check1_fail)
-                                : VGOFF_(helper_value_check0_fail)))
-   );
-}
-
-
-static void synth_GETV ( Int sz, Int arch, Int reg )
-{
-   /* VG_(printf)("synth_GETV %d of Arch %s\n", sz, nameIReg(sz, arch)); */
-   switch (sz) {
-      case 4: 
-         emit_movv_offregmem_reg ( 4, shadowOffset(arch), R_EBP, reg );
-         break;
-      case 2: 
-         emit_movzwl_offregmem_reg ( shadowOffset(arch), R_EBP, reg );
-         emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFF0000, reg );
-         break;
-      case 1: 
-         if (arch < 4) {
-            emit_movzbl_offregmem_reg ( shadowOffset(arch), R_EBP, reg );
-         } else {
-            emit_movzbl_offregmem_reg ( shadowOffset(arch-4)+1, R_EBP, reg );
-         }
-         emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFFFF00, reg );
-         break;
-      default: 
-         VG_(panic)("synth_GETV");
-   }
-}
-
-
-static void synth_PUTV ( Int sz, Int srcTag, UInt lit_or_reg, Int arch )
-{
-   if (srcTag == Literal) {
-     /* PUTV with a Literal is only ever used to set the corresponding
-        ArchReg to `all valid'.  Should really be a kind of SETV. */
-      UInt lit = lit_or_reg;
-      switch (sz) {
-         case 4:
-            vg_assert(lit == 0x00000000);
-            emit_movv_lit_offregmem ( 4, 0x00000000, 
-                                      shadowOffset(arch), R_EBP );
-            break;
-         case 2:
-            vg_assert(lit == 0xFFFF0000);
-            emit_movv_lit_offregmem ( 2, 0x0000, 
-                                      shadowOffset(arch), R_EBP );
-            break;
-         case 1:
-            vg_assert(lit == 0xFFFFFF00);
-            if (arch < 4) {
-               emit_movb_lit_offregmem ( 0x00, 
-                                         shadowOffset(arch), R_EBP );
-            } else {
-               emit_movb_lit_offregmem ( 0x00, 
-                                         shadowOffset(arch-4)+1, R_EBP );
-            }
-            break;
-         default: 
-            VG_(panic)("synth_PUTV(lit)");
-      }
-
-   } else {
-
-      UInt reg;
-      vg_assert(srcTag == RealReg);
-
-      if (sz == 1 && lit_or_reg >= 4) {
-         emit_swapl_reg_EAX ( lit_or_reg );
-         reg = R_EAX;
-      } else {
-         reg = lit_or_reg;
-      }
-
-      if (sz == 1) vg_assert(reg < 4);
-
-      switch (sz) {
-         case 4:
-            emit_movv_reg_offregmem ( 4, reg,
-                                      shadowOffset(arch), R_EBP );
-            break;
-         case 2:
-            emit_movv_reg_offregmem ( 2, reg,
-                                      shadowOffset(arch), R_EBP );
-            break;
-         case 1:
-            if (arch < 4) {
-               emit_movb_reg_offregmem ( reg,
-                                         shadowOffset(arch), R_EBP );
-	    } else {
-               emit_movb_reg_offregmem ( reg,
-                                         shadowOffset(arch-4)+1, R_EBP );
-            }
-            break;
-         default: 
-            VG_(panic)("synth_PUTV(reg)");
-      }
-
-      if (sz == 1 && lit_or_reg >= 4) {
-         emit_swapl_reg_EAX ( lit_or_reg );
-      }
-   }
-}
-
-
-static void synth_GETVF ( Int reg )
-{
-   emit_movv_offregmem_reg ( 4, shadowFlagsOffset(), R_EBP, reg );
-   /* paranoia only; should be unnecessary ... */
-   /* emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFFFFFE, reg ); */
-}
-
-
-static void synth_PUTVF ( UInt reg )
-{
-   emit_movv_reg_offregmem ( 4, reg, shadowFlagsOffset(), R_EBP );
-}
-
-
-static void synth_handle_esp_assignment ( Int reg )
-{
-   emit_pushal();
-   emit_pushv_reg ( 4, reg );
-   synth_call_baseBlock_method ( False, VGOFF_(handle_esp_assignment) );
-   emit_add_lit_to_esp ( 4 );
-   emit_popal();
-}
-
-
-static void synth_fpu_mem_check_actions ( Bool isWrite, 
-                                          Int size, Int a_reg )
-{
-   Int helper_offw
-     = isWrite ? VGOFF_(fpu_write_check)
-               : VGOFF_(fpu_read_check);
-   emit_pushal();
-   emit_pushl_lit8 ( size );
-   emit_pushv_reg ( 4, a_reg );
-   synth_call_baseBlock_method ( False, helper_offw );
-   emit_add_lit_to_esp ( 8 );   
-   emit_popal();
-}
-
-
-#if 0
-/* FixMe.  Useful for debugging. */
-void VG_(oink) ( Int n )
-{
-   VG_(printf)("OiNk(%d): ", n );
-   VG_(show_reg_tags)( &VG_(m_shadow) );
-}
-
-static void synth_OINK ( Int n )
-{
-   emit_pushal();
-   emit_movv_lit_reg ( 4, n, R_EBP );
-   emit_pushl_reg ( R_EBP );
-   emit_movv_lit_reg ( 4, (Addr)&VG_(oink), R_EBP );
-   emit_call_reg ( R_EBP );
-   emit_add_lit_to_esp ( 4 );
-   emit_popal();
-}
-#endif
-
-static void synth_TAG1_op ( VgTagOp op, Int reg )
-{
-   switch (op) {
-
-      /* Scheme is
-            neg<sz> %reg          -- CF = %reg==0 ? 0 : 1
-            sbbl %reg, %reg       -- %reg = -CF
-            or 0xFFFFFFFE, %reg   -- invalidate all bits except lowest
-      */
-      case VgT_PCast40:
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg);
-         break;
-      case VgT_PCast20:
-         emit_unaryopv_reg(2, NEG, reg);
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg);
-         break;
-      case VgT_PCast10:
-         if (reg >= 4) {
-            emit_swapl_reg_EAX(reg);
-            emit_unaryopb_reg(NEG, R_EAX);
-            emit_swapl_reg_EAX(reg);
-         } else {
-            emit_unaryopb_reg(NEG, reg);
-         }
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg);
-         break;
-
-      /* Scheme is
-            andl $1, %reg -- %reg is 0 or 1
-            negl %reg -- %reg is 0 or 0xFFFFFFFF
-            and possibly an OR to invalidate unused bits.
-      */
-      case VgT_PCast04:
-         emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         break;
-      case VgT_PCast02:
-         emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
-         break;
-      case VgT_PCast01:
-         emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, reg);
-         break;
-
-      /* Scheme is
-            shl $24, %reg -- make irrelevant bits disappear
-            negl %reg             -- CF = %reg==0 ? 0 : 1
-            sbbl %reg, %reg       -- %reg = -CF
-            and possibly an OR to invalidate unused bits.
-      */
-      case VgT_PCast14:
-         emit_shiftopv_lit_reg(4, SHL, 24, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         break;
-      case VgT_PCast12:
-         emit_shiftopv_lit_reg(4, SHL, 24, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
-         break;
-      case VgT_PCast11:
-         emit_shiftopv_lit_reg(4, SHL, 24, reg);
-         emit_unaryopv_reg(4, NEG, reg);
-         emit_nonshiftopv_reg_reg(4, SBB, reg, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, reg);
-         break;
-
-      /* We steal %ebp (a non-allocable reg) as a temporary:
-            pushl %ebp
-            movl %reg, %ebp
-            negl %ebp
-            orl %ebp, %reg
-            popl %ebp
-         This sequence turns out to be correct regardless of the 
-         operation width.
-      */
-      case VgT_Left4:
-      case VgT_Left2:
-      case VgT_Left1:
-         vg_assert(reg != R_EDI);
-         emit_movv_reg_reg(4, reg, R_EDI);
-         emit_unaryopv_reg(4, NEG, R_EDI);
-         emit_nonshiftopv_reg_reg(4, OR, R_EDI, reg);
-         break;
-
-      /* These are all fairly obvious; do the op and then, if
-         necessary, invalidate unused bits. */
-      case VgT_SWiden14:
-         emit_shiftopv_lit_reg(4, SHL, 24, reg);
-         emit_shiftopv_lit_reg(4, SAR, 24, reg);
-         break;
-      case VgT_SWiden24:
-         emit_shiftopv_lit_reg(4, SHL, 16, reg);
-         emit_shiftopv_lit_reg(4, SAR, 16, reg);
-         break;
-      case VgT_SWiden12:
-         emit_shiftopv_lit_reg(4, SHL, 24, reg);
-         emit_shiftopv_lit_reg(4, SAR, 24, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
-         break;
-      case VgT_ZWiden14:
-         emit_nonshiftopv_lit_reg(4, AND, 0x000000FF, reg);
-         break;
-      case VgT_ZWiden24:
-         emit_nonshiftopv_lit_reg(4, AND, 0x0000FFFF, reg);
-         break;
-      case VgT_ZWiden12:
-         emit_nonshiftopv_lit_reg(4, AND, 0x000000FF, reg);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg);
-         break;
-
-      default:
-         VG_(panic)("synth_TAG1_op");
-   }
-}
-
-
-static void synth_TAG2_op ( VgTagOp op, Int regs, Int regd )
-{
-   switch (op) {
-
-      /* UifU is implemented by OR, since 1 means Undefined. */
-      case VgT_UifU4:
-      case VgT_UifU2:
-      case VgT_UifU1:
-      case VgT_UifU0:
-         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
-         break;
-
-      /* DifD is implemented by AND, since 0 means Defined. */
-      case VgT_DifD4:
-      case VgT_DifD2:
-      case VgT_DifD1:
-         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
-         break;
-
-      /* ImproveAND(value, tags) = value OR tags.
-	 Defined (0) value 0s give defined (0); all other -> undefined (1).
-         value is in regs; tags is in regd. 
-         Be paranoid and invalidate unused bits; I don't know whether 
-         or not this is actually necessary. */
-      case VgT_ImproveAND4_TQ:
-         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
-         break;
-      case VgT_ImproveAND2_TQ:
-         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, regd);
-         break;
-      case VgT_ImproveAND1_TQ:
-         emit_nonshiftopv_reg_reg(4, OR, regs, regd);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, regd);
-         break;
-
-      /* ImproveOR(value, tags) = (not value) OR tags.
-	 Defined (0) value 1s give defined (0); all other -> undefined (1).
-         value is in regs; tags is in regd. 
-         To avoid trashing value, this is implemented (re de Morgan) as
-               not (value AND (not tags))
-         Be paranoid and invalidate unused bits; I don't know whether 
-         or not this is actually necessary. */
-      case VgT_ImproveOR4_TQ:
-         emit_unaryopv_reg(4, NOT, regd);
-         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
-         emit_unaryopv_reg(4, NOT, regd);
-         break;
-      case VgT_ImproveOR2_TQ:
-         emit_unaryopv_reg(4, NOT, regd);
-         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
-         emit_unaryopv_reg(4, NOT, regd);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, regd);
-         break;
-      case VgT_ImproveOR1_TQ:
-         emit_unaryopv_reg(4, NOT, regd);
-         emit_nonshiftopv_reg_reg(4, AND, regs, regd);
-         emit_unaryopv_reg(4, NOT, regd);
-         emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, regd);
-         break;
-
-      default:
-         VG_(panic)("synth_TAG2_op");
-   }
-}
-
 /*----------------------------------------------------*/
 /*--- Generate code for a single UInstr.           ---*/
 /*----------------------------------------------------*/
@@ -2190,10 +1947,13 @@
    return (u->flags_w != FlagsEmpty); 
 }
 
-static void emitUInstr ( Int i, UInstr* u )
+static void emitUInstr ( UCodeBlock* cb, Int i, RRegSet regs_live_before )
 {
+   Int     old_emitted_code_used;
+   UInstr* u = &cb->instrs[i];
+
    if (dis)
-      VG_(ppUInstr)(i, u);
+      VG_(ppUInstrWithRegs)(i, u);
 
 #  if 0
    if (0&& VG_(translations_done) >= 600) {
@@ -2204,13 +1964,79 @@
    }
 #  endif
 
+   old_emitted_code_used = emitted_code_used;
+   
    switch (u->opcode) {
-
       case NOP: case CALLM_S: case CALLM_E: break;
 
       case INCEIP: {
-         vg_assert(u->tag1 == Lit16);
-         emit_addlit8_offregmem ( u->val1, R_EBP, 4 * VGOFF_(m_eip) );
+        /* Note: Redundant INCEIP merging.  A potentially useful
+           performance enhancementa, but currently disabled.  Reason
+           is that it needs a surefire way to know if a UInstr might
+           give rise to a stack snapshot being taken.  The logic below
+           is correct (hopefully ...) for the core UInstrs, but is
+           incorrect if a skin has its own UInstrs, since the logic
+           currently assumes that none of them can cause a stack
+           trace, and that's just wrong.  Note this isn't
+           mission-critical -- the system still functions -- but will
+           cause incorrect source locations in some situations,
+           specifically for the memcheck skin.  This is known to
+           confuse programmers, understandable.  */
+#        if 0
+         Bool    can_skip;
+         Int     j;
+
+         /* Scan forwards to see if this INCEIP dominates (in the
+            technical sense) a later one, AND there are no CCALLs in
+            between.  If so, skip this one and instead add its count
+            with the later one. */
+         can_skip = True;
+	 j = i+1;
+         while (True) {
+            if (cb->instrs[j].opcode == CCALL 
+                || cb->instrs[j].opcode == CALLM) {
+               /* CCALL -- we can't skip this INCEIP. */
+               can_skip = False; 
+               break;
+            }
+            if (cb->instrs[j].opcode == INCEIP) {
+               /* Another INCEIP.  Check that the sum will fit. */
+               if (cb->instrs[i].val1 + cb->instrs[j].val1 > 127)
+                  can_skip = False;
+               break;
+            }
+            if (cb->instrs[j].opcode == JMP || cb->instrs[j].opcode == JIFZ) {
+               /* Execution is not guaranteed to get beyond this
+                  point.  Give up. */
+               can_skip = False; 
+               break;
+            }
+            j++;
+            /* Assertion should hold because all blocks should end in an
+               unconditional JMP, so the above test should get us out of
+               the loop at the end of a block. */
+            vg_assert(j < cb->used);
+         }
+         if (can_skip) {
+            /* yay!  Accumulate the delta into the next INCEIP. */
+            // VG_(printf)("skip INCEIP %d\n", cb->instrs[i].val1);
+            vg_assert(j > i);
+            vg_assert(j < cb->used);
+            vg_assert(cb->instrs[j].opcode == INCEIP);
+            vg_assert(cb->instrs[i].opcode == INCEIP);
+            vg_assert(cb->instrs[j].tag1 == Lit16);
+            vg_assert(cb->instrs[i].tag1 == Lit16);
+            cb->instrs[j].val1 += cb->instrs[i].val1;
+            /* do nothing now */
+         } else 
+#        endif
+
+         {
+            /* no, we really have to do this, alas */
+            // VG_(printf)("  do INCEIP %d\n", cb->instrs[i].val1);
+            vg_assert(u->tag1 == Lit16);
+            emit_addlit8_offregmem ( u->val1, R_EBP, 4 * VGOFF_(m_eip) );
+         }
          break;
       }
 
@@ -2240,41 +2066,10 @@
          break;
       }
 
-      case SETV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg);
-         synth_SETV ( u->size, u->val1 );
-         break;
-      }
-
-      case STOREV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
-         vg_assert(u->tag2 == RealReg);
-         synth_STOREV ( u->size, u->tag1, 
-                                 u->tag1==Literal ? u->lit32 : u->val1, 
-                                 u->val2 );
-         break;
-      }
-
       case STORE: {
          vg_assert(u->tag1 == RealReg);
          vg_assert(u->tag2 == RealReg);
          synth_mov_reg_memreg ( u->size, u->val1, u->val2 );
-	 /* No longer possible, but retained for illustrative purposes.
-         if (u->smc_check) 
-            synth_orig_code_write_check ( u->size, u->val2 );
-	 */
-         break;
-      }
-
-      case LOADV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg);
-         vg_assert(u->tag2 == RealReg);
-         if (0 && VG_(clo_instrument))
-            emit_AMD_prefetch_reg ( u->val1 );
-         synth_LOADV ( u->size, u->val1, u->val2 );
          break;
       }
 
@@ -2285,47 +2080,6 @@
          break;
       }
 
-      case TESTV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg || u->tag1 == ArchReg);
-         synth_TESTV(u->size, u->tag1, u->val1);
-         break;
-      }
-
-      case GETV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == ArchReg);
-         vg_assert(u->tag2 == RealReg);
-         synth_GETV(u->size, u->val1, u->val2);
-         break;
-      }
-
-      case GETVF: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg);
-         vg_assert(u->size == 0);
-         synth_GETVF(u->val1);
-         break;
-      }
-
-      case PUTV: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
-         vg_assert(u->tag2 == ArchReg);
-         synth_PUTV(u->size, u->tag1, 
-                             u->tag1==Literal ? u->lit32 : u->val1, 
-                             u->val2 );
-         break;
-      }
-
-      case PUTVF: {
-         vg_assert(VG_(clo_instrument));
-         vg_assert(u->tag1 == RealReg);
-         vg_assert(u->size == 0);
-         synth_PUTVF(u->val1);
-         break;
-      }
-
       case GET: {
          vg_assert(u->tag1 == ArchReg || u->tag1 == SpillNo);
          vg_assert(u->tag2 == RealReg);
@@ -2344,15 +2098,23 @@
          if (u->tag2 == ArchReg 
              && u->val2 == R_ESP
              && u->size == 4
-             && VG_(clo_instrument)) {
-            synth_handle_esp_assignment ( u->val1 );
+             && (VG_(track_events).new_mem_stack         || 
+                 VG_(track_events).new_mem_stack_aligned ||
+                 VG_(track_events).die_mem_stack         ||
+                 VG_(track_events).die_mem_stack_aligned ||
+                 VG_(track_events).post_mem_write))
+         {
+            synth_handle_esp_assignment ( i, u->val1, regs_live_before,
+                                          u->regs_live_after );
 	 }
-         synth_mov_reg_offregmem ( 
-            u->size, 
-            u->val1, 
-            spillOrArchOffset( u->size, u->tag2, u->val2 ),
-            R_EBP
-         );
+         else {
+            synth_mov_reg_offregmem ( 
+               u->size, 
+               u->val1, 
+               spillOrArchOffset( u->size, u->tag2, u->val2 ),
+               R_EBP
+            );
+         }
          break;
       }
 
@@ -2436,7 +2198,6 @@
       case RCR:
       case RCL:
          vg_assert(u->tag2 == RealReg);
-         vg_assert(! readFlagUse ( u ));
          switch (u->tag1) {
             case Literal: synth_shiftop_lit_reg (
                              readFlagUse(u), writeFlagUse(u),
@@ -2515,55 +2276,16 @@
          synth_jmp_ifzero_reg_lit ( u->val1, u->lit32 );
          break;
 
-      case TAG1:
-         synth_TAG1_op ( u->val3, u->val1 );
-         break;
-
-      case TAG2:
-         if (u->val3 != VgT_DebugFn) {
-            synth_TAG2_op ( u->val3, u->val1, u->val2 );
-         } else {
-            /* Assume a call to VgT_DebugFn passing both args
-               and placing the result back in the second. */
-            Int j, k;
-            /* u->val2 is the reg into which the result is written.  So
-               don't save/restore it.  And it can be used at a temp for
-               the call target, too.  Since %eax is used for the return
-               value from the C procedure, it is preserved only by
-               virtue of not being mentioned as a VG_CALLEE_SAVED reg. */
-            for (k = 0; k < VG_MAX_REALREGS; k++) {
-               j = VG_(rankToRealRegNo) ( k );
-               if (VG_CALLEE_SAVED(j)) continue;
-               if (j == u->val2) continue;
-               emit_pushv_reg ( 4, j );
-            }
-            emit_pushv_reg(4, u->val2);
-            emit_pushv_reg(4, u->val1);
-            emit_movv_lit_reg ( 4, (UInt)(&VG_(DebugFn)), u->val2 );
-            emit_call_reg ( u->val2 );
-            if (u->val2 != R_EAX)
-               emit_movv_reg_reg ( 4, R_EAX, u->val2 );
-            /* nuke args */
-            emit_add_lit_to_esp(8);
-            for (k = VG_MAX_REALREGS-1; k >= 0; k--) {
-               j = VG_(rankToRealRegNo) ( k );
-               if (VG_CALLEE_SAVED(j)) continue;
-               if (j == u->val2) continue;
-               emit_popv_reg ( 4, j );
-            }
-         }
-         break;
-
       case PUSH:
          vg_assert(u->tag1 == RealReg);
          vg_assert(u->tag2 == NoValue);
-         emit_pushv_reg ( 4, u->val1 );
+         VG_(emit_pushv_reg) ( 4, u->val1 );
          break;
 
       case POP:
          vg_assert(u->tag1 == RealReg);
          vg_assert(u->tag2 == NoValue);
-         emit_popv_reg ( 4, u->val1 );
+         VG_(emit_popv_reg) ( 4, u->val1 );
          break;
 
       case CALLM:
@@ -2572,35 +2294,34 @@
          vg_assert(u->size == 0);
          if (readFlagUse ( u )) 
             emit_get_eflags();
-         synth_call_baseBlock_method ( False, u->val1 );
+         VG_(synth_call) ( False, u->val1 );
          if (writeFlagUse ( u )) 
             emit_put_eflags();
          break;
 
-      case CCALL_1_0:
-         vg_assert(u->tag1 == RealReg);
-         vg_assert(u->tag2 == NoValue);
+      case CCALL: {
+         /* Lazy: copy all three vals;  synth_ccall ignores any unnecessary
+            ones. */
+         UInt argv[]  = { u->val1, u->val2, u->val3 };
+         UInt tagv[]  = { RealReg, RealReg, RealReg };
+         UInt ret_reg = ( u->has_ret_val ? u->val3 : INVALID_REALREG );
+
+         if (u->argc >= 1)                   vg_assert(u->tag1 == RealReg);
+         else                                vg_assert(u->tag1 == NoValue);
+         if (u->argc >= 2)                   vg_assert(u->tag2 == RealReg);
+         else                                vg_assert(u->tag2 == NoValue);
+         if (u->argc == 3 || u->has_ret_val) vg_assert(u->tag3 == RealReg);
+         else                                vg_assert(u->tag3 == NoValue);
          vg_assert(u->size == 0);
 
-         synth_ccall_saveRegs();
-         synth_ccall_pushOneArg ( u->val1 );
-         synth_ccall_call_clearStack_restoreRegs ( u->lit32, 4 );
+         VG_(synth_ccall) ( u->lit32, u->argc, u->regparms_n, argv, tagv,
+                            ret_reg, regs_live_before, u->regs_live_after );
          break;
-
-      case CCALL_2_0:
-         vg_assert(u->tag1 == RealReg);
-         vg_assert(u->tag2 == RealReg);
-         vg_assert(u->size == 0);
-
-         synth_ccall_saveRegs();
-         synth_ccall_pushTwoArgs ( u->val1, u->val2 );
-         synth_ccall_call_clearStack_restoreRegs ( u->lit32, 8 );
-         break;
-
+      }
       case CLEAR:
          vg_assert(u->tag1 == Lit16);
          vg_assert(u->tag2 == NoValue);
-         emit_add_lit_to_esp ( u->val1 );
+         VG_(emit_add_lit_to_esp) ( u->val1 );
          break;
 
       case CC2VAL:
@@ -2610,23 +2331,13 @@
          synth_setb_reg ( u->val1, u->cond );
          break;
 
-      /* We assume that writes to memory done by FPU_Ws are not going
-         to be used to create new code, so there's no orig-code-write
-         checks done by default. */
       case FPU_R: 
       case FPU_W:         
          vg_assert(u->tag1 == Lit16);
          vg_assert(u->tag2 == RealReg);
-         if (VG_(clo_instrument))
-            synth_fpu_mem_check_actions ( 
-               u->opcode==FPU_W, u->size, u->val2 );
          synth_fpu_regmem ( (u->val1 >> 8) & 0xFF,
                             u->val1 & 0xFF,
                             u->val2 );
-         /* No longer possible, but retained for illustrative purposes.
-         if (u->opcode == FPU_W && u->smc_check) 
-            synth_orig_code_write_check ( u->size, u->val2 );
-         */
          break;
 
       case FPU:
@@ -2641,11 +2352,22 @@
          break;
 
       default: 
-         VG_(printf)("emitUInstr: unhandled insn:\n");
-         VG_(ppUInstr)(0,u);
-         VG_(panic)("emitUInstr: unimplemented opcode");
+         if (VG_(needs).extended_UCode)
+            SK_(emitExtUInstr)(u, regs_live_before);
+         else {
+            VG_(printf)("\nError:\n"
+                        "  unhandled opcode: %u.  Perhaps "
+                        " VG_(needs).extended_UCode should be set?\n",
+                        u->opcode);
+            VG_(ppUInstr)(0,u);
+            VG_(panic)("emitUInstr: unimplemented opcode");
+         }
    }
 
+   /* Update UInstr histogram */
+   vg_assert(u->opcode < 100);
+   histogram[u->opcode].counts++;
+   histogram[u->opcode].size += (emitted_code_used - old_emitted_code_used);
 }
 
 
@@ -2654,67 +2376,39 @@
 UChar* VG_(emit_code) ( UCodeBlock* cb, Int* nbytes )
 {
    Int i;
+   UChar regs_live_before = 0;   /* No regs live at BB start */
+   
    emitted_code_used = 0;
    emitted_code_size = 500; /* reasonable initial size */
-   emitted_code = VG_(jitmalloc)(emitted_code_size);
+   emitted_code = VG_(arena_malloc)(VG_AR_JITTER, emitted_code_size);
 
-   if (dis) VG_(printf)("Generated code:\n");
+   if (dis) VG_(printf)("Generated x86 code:\n");
 
    for (i = 0; i < cb->used; i++) {
+      UInstr* u = &cb->instrs[i];
       if (cb->instrs[i].opcode != NOP) {
-         UInstr* u = &cb->instrs[i];
-#        if 1
+
          /* Check on the sanity of this insn. */
-         Bool sane = VG_(saneUInstr)( False, u );
+         Bool sane = VG_(saneUInstr)( False, False, u );
          if (!sane) {
             VG_(printf)("\ninsane instruction\n");
-            VG_(ppUInstr)( i, u );
+            VG_(upUInstr)( i, u );
 	 }
          vg_assert(sane);
-#        endif
-#        if 0
-         /* Pass args to TAG1/TAG2 to vg_DebugFn for sanity checking.
-            Requires a suitable definition of vg_DebugFn. */
-	 if (u->opcode == TAG1) {
-            UInstr t1;
-            vg_assert(u->tag1 == RealReg);
-            VG_(emptyUInstr)( &t1 );
-            t1.opcode = TAG2;
-            t1.tag1 = t1.tag2 = RealReg;
-            t1.val1 = t1.val2 = u->val1;
-            t1.tag3 = Lit16;
-            t1.val3 = VgT_DebugFn;
-            emitUInstr( i, &t1 );
-	 }
-	 if (u->opcode == TAG2) {
-            UInstr t1;
-            vg_assert(u->tag1 == RealReg);
-            vg_assert(u->tag2 == RealReg);
-            VG_(emptyUInstr)( &t1 );
-            t1.opcode = TAG2;
-            t1.tag1 = t1.tag2 = RealReg;
-            t1.val1 = t1.val2 = u->val1;
-            t1.tag3 = Lit16;
-            t1.val3 = VgT_DebugFn;
-            if (u->val3 == VgT_UifU1 || u->val3 == VgT_UifU2 
-                || u->val3 == VgT_UifU4 || u->val3 == VgT_DifD1 
-                || u->val3 == VgT_DifD2 || u->val3 == VgT_DifD4)
-               emitUInstr( i, &t1 );
-            t1.val1 = t1.val2 = u->val2;
-            emitUInstr( i, &t1 );
-	 }
-#        endif
-         emitUInstr( i, u );
+         emitUInstr( cb, i, regs_live_before );
       }
+      regs_live_before = u->regs_live_after;
    }
+   if (dis) VG_(printf)("\n");
 
    /* Returns a pointer to the emitted code.  This will have to be
-      copied by the caller into the translation cache, and then freed
-      using VG_(jitfree). */
+      copied by the caller into the translation cache, and then freed */
    *nbytes = emitted_code_used;
    return emitted_code;
 }
 
+#undef dis
+
 /*--------------------------------------------------------------------*/
 /*--- end                                          vg_from_ucode.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/vg_helpers.S b/vg_helpers.S
index 8262737..2315da4 100644
--- a/vg_helpers.S
+++ b/vg_helpers.S
@@ -26,7 +26,7 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.
 
-  The GNU General Public License is contained in the file LICENSE.
+  The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_constants.h"
@@ -86,36 +86,6 @@
    and the incoming parameters can be modified, to return results.
 */
 
-
-.global VG_(helper_value_check0_fail)
-VG_(helper_value_check0_fail):
-	pushal
-	call	VG_(helperc_value_check0_fail)
-	popal
-	ret
-
-.global VG_(helper_value_check1_fail)
-VG_(helper_value_check1_fail):
-	pushal
-	call	VG_(helperc_value_check1_fail)
-	popal
-	ret
-
-.global VG_(helper_value_check2_fail)
-VG_(helper_value_check2_fail):
-	pushal
-	call	VG_(helperc_value_check2_fail)
-	popal
-	ret
-
-.global VG_(helper_value_check4_fail)
-VG_(helper_value_check4_fail):
-	pushal
-	call	VG_(helperc_value_check4_fail)
-	popal
-	ret
-
-
 /* Fetch the time-stamp-ctr reg.
    On entry:
 	dummy, replaced by %EAX value
diff --git a/vg_include.h b/vg_include.h
index 74e1016..edf7aef 100644
--- a/vg_include.h
+++ b/vg_include.h
@@ -1,6 +1,6 @@
 
 /*--------------------------------------------------------------------*/
-/*--- A header file for all parts of Valgrind.                     ---*/
+/*--- A header file for all private parts of Valgrind's core.      ---*/
 /*--- Include no other!                                            ---*/
 /*---                                                 vg_include.h ---*/
 /*--------------------------------------------------------------------*/
@@ -27,17 +27,12 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #ifndef __VG_INCLUDE_H
 #define __VG_INCLUDE_H
 
-
-#include <stdarg.h>       /* ANSI varargs stuff  */
-#include <setjmp.h>       /* for jmp_buf         */
-
-
 /* ---------------------------------------------------------------------
    Where to send bug reports to.
    ------------------------------------------------------------------ */
@@ -52,21 +47,9 @@
 
 #include "vg_constants.h"
 
-
-/* Set to 1 to enable time profiling.  Since this uses SIGPROF, we
-   don't want this permanently enabled -- only for profiling
-   builds. */
-#if 0
-#  define VG_PROFILE
-#endif
-
-
-/* Total number of integer registers available for allocation.  That's
-   all of them except %esp, %edi and %ebp.  %edi is a general spare
-   temporary.  %ebp permanently points at VG_(baseBlock).  Note that
-   it's important that this tie in with what rankToRealRegNo() says.
-   DO NOT CHANGE THIS VALUE FROM 5. !  */
-#define VG_MAX_REALREGS 5
+/* All stuff visible to core and skins goes in vg_skin.h.  Things visible
+ * to core but private to skins go here. */
+#include "vg_skin.h"
 
 /* Total number of spill slots available for allocation, if a TempReg
    doesn't make it into a RealReg.  Just bomb the entire system if
@@ -111,10 +94,6 @@
    errors at all.  Counterpart to M_VG_COLLECT_NO_ERRORS_AFTER_SHOWN. */
 #define M_VG_COLLECT_NO_ERRORS_AFTER_FOUND 30000
 
-/* These many bytes below %ESP are considered addressible if we're
-   doing the --workaround-gcc296-bugs hack. */
-#define VG_GCC296_BUG_STACK_SLOP 1024
-
 /* The maximum number of calls we're prepared to save in a
    backtrace. */
 #define VG_DEEPEST_BACKTRACE 50
@@ -132,17 +111,6 @@
    give finer interleaving but much increased scheduling overheads. */
 #define VG_SCHEDULING_QUANTUM   50000
 
-/* The maximum number of pthreads that we support.  This is
-   deliberately not very high since our implementation of some of the
-   scheduler algorithms is surely O(N) in the number of threads, since
-   that's simple, at least.  And (in practice) we hope that most
-   programs do not need many threads. */
-#define VG_N_THREADS 50
-
-/* Maximum number of pthread keys available.  Again, we start low until
-   the need for a higher number presents itself. */
-#define VG_N_THREAD_KEYS 50
-
 /* Number of file descriptors that can simultaneously be waited on for
    I/O to complete.  Perhaps this should be the same as VG_N_THREADS
    (surely a thread can't wait on more than one fd at once?.  Who
@@ -165,97 +133,43 @@
 /* Number of entries in each thread's fork-handler stack. */
 #define VG_N_FORKHANDLERSTACK 2
 
+/* Max number of callers for context in a suppression. */
+#define VG_N_SUPP_CALLERS  4
+   
 
 /* ---------------------------------------------------------------------
    Basic types
    ------------------------------------------------------------------ */
 
-typedef unsigned char          UChar;
-typedef unsigned short         UShort;
-typedef unsigned int           UInt;
-typedef unsigned long long int ULong;
-
-typedef signed char          Char;
-typedef signed short         Short;
-typedef signed int           Int;
-typedef signed long long int Long;
-
-typedef unsigned int Addr;
-
-typedef unsigned char Bool;
-#define False ((Bool)0)
-#define True ((Bool)1)
-
-#define mycat_wrk(aaa,bbb) aaa##bbb
-#define mycat(aaa,bbb) mycat_wrk(aaa,bbb)
-
 /* Just pray that gcc's constant folding works properly ... */
 #define BITS(bit7,bit6,bit5,bit4,bit3,bit2,bit1,bit0)               \
    ( ((bit7) << 7) | ((bit6) << 6) | ((bit5) << 5) | ((bit4) << 4)  \
      | ((bit3) << 3) | ((bit2) << 2) | ((bit1) << 1) | (bit0))
 
-/* For cache simulation */
-typedef struct { 
-    int size;       /* bytes */
-    int assoc;
-    int line_size;  /* bytes */
-} cache_t;
-
-#define UNDEFINED_CACHE     ((cache_t) { -1, -1, -1 })
-
-/* ---------------------------------------------------------------------
-   Now the basic types are set up, we can haul in the kernel-interface
-   definitions.
-   ------------------------------------------------------------------ */
-
-#include "./vg_kerneliface.h"
-
-
 /* ---------------------------------------------------------------------
    Command-line-settable options
    ------------------------------------------------------------------ */
 
-#define VG_CLO_SMC_NONE 0
-#define VG_CLO_SMC_SOME 1
-#define VG_CLO_SMC_ALL  2
-
 #define VG_CLO_MAX_SFILES 10
 
 /* Should we stop collecting errors if too many appear?  default: YES */
 extern Bool  VG_(clo_error_limit);
-/* Shall we V-check addrs (they are always A checked too): default: YES */
-extern Bool  VG_(clo_check_addrVs);
 /* Enquire about whether to attach to GDB at errors?   default: NO */
 extern Bool  VG_(clo_GDB_attach);
 /* Sanity-check level: 0 = none, 1 (default), > 1 = expensive. */
 extern Int   VG_(sanity_level);
-/* Verbosity level: 0 = silent, 1 (default), > 1 = more verbose. */
-extern Int   VG_(clo_verbosity);
 /* Automatically attempt to demangle C++ names?  default: YES */
 extern Bool  VG_(clo_demangle);
-/* Do leak check at exit?  default: NO */
-extern Bool  VG_(clo_leak_check);
-/* In leak check, show reachable-but-not-freed blocks?  default: NO */
-extern Bool  VG_(clo_show_reachable);
-/* How closely should we compare ExeContexts in leak records? default: 2 */
-extern Int   VG_(clo_leak_resolution);
 /* Round malloc sizes upwards to integral number of words? default:
    NO */
 extern Bool  VG_(clo_sloppy_malloc);
 /* Minimum alignment in functions that don't specify alignment explicitly.
    default: 0, i.e. use default of the machine (== 4) */
 extern Int   VG_(clo_alignment);
-/* Allow loads from partially-valid addresses?  default: YES */
-extern Bool  VG_(clo_partial_loads_ok);
 /* Simulate child processes? default: NO */
 extern Bool  VG_(clo_trace_children);
 /* The file id on which we send all messages.  default: 2 (stderr). */
 extern Int   VG_(clo_logfile_fd);
-/* Max volume of the freed blocks queue. */
-extern Int   VG_(clo_freelist_vol);
-/* Assume accesses immediately below %esp are due to gcc-2.96 bugs.
-   default: NO */
-extern Bool  VG_(clo_workaround_gcc296_bugs);
 
 /* The number of suppression files specified. */
 extern Int   VG_(clo_n_suppressions);
@@ -266,20 +180,8 @@
 extern Bool  VG_(clo_single_step);
 /* Code improvement?  default: YES */
 extern Bool  VG_(clo_optimise);
-/* Memory-check instrumentation?  default: YES */
-extern Bool  VG_(clo_instrument);
-/* DEBUG: clean up instrumented code?  default: YES */
-extern Bool  VG_(clo_cleanup);
-/* Cache simulation instrumentation?  default: NO */
-extern Bool  VG_(clo_cachesim);
-/* I1 cache configuration.  default: undefined */
-extern cache_t VG_(clo_I1_cache);
-/* D1 cache configuration.  default: undefined */
-extern cache_t VG_(clo_D1_cache);
-/* L2 cache configuration.  default: undefined */
-extern cache_t VG_(clo_L2_cache);
-/* SMC write checks?  default: SOME (1,2,4 byte movs to mem) */
-extern Int   VG_(clo_smc_check);
+/* DEBUG: print generated code?  default: 00000 ( == NO ) */
+extern Bool  VG_(clo_trace_codegen);
 /* DEBUG: print system calls?  default: NO */
 extern Bool  VG_(clo_trace_syscalls);
 /* DEBUG: print signal details?  default: NO */
@@ -308,78 +210,35 @@
    Debugging and profiling stuff
    ------------------------------------------------------------------ */
 
+/* Change to 1 to get more accurate but more expensive core profiling. */
+#if 0
+#  define VGP_ACCURATE_PROFILING
+#endif
+
 /* No, really.  I _am_ that strange. */
 #define OINK(nnn) VG_(message)(Vg_DebugMsg, "OINK %d",nnn)
 
-/* Tools for building messages from multiple parts. */
-typedef
-   enum { Vg_UserMsg, Vg_DebugMsg, Vg_DebugExtraMsg }
-   VgMsgKind;
-
-extern void VG_(start_msg)  ( VgMsgKind kind );
-extern void VG_(add_to_msg) ( Char* format, ... );
-extern void VG_(end_msg)    ( void );
-
-/* Send a simple, single-part message. */
-extern void VG_(message)    ( VgMsgKind kind, Char* format, ... );
-
 /* Create a logfile into which messages can be dumped. */
 extern void VG_(startup_logging) ( void );
-extern void VG_(shutdown_logging) ( void );
-
-
-/* Profiling stuff */
-#ifdef VG_PROFILE
-
-#define VGP_M_STACK 10
-
-#define VGP_M_CCS 26  /* == the # of elems in VGP_LIST */
-#define VGP_LIST \
-   VGP_PAIR(VgpUnc=0,      "unclassified"),           \
-   VGP_PAIR(VgpRun,        "running"),                \
-   VGP_PAIR(VgpSched,      "scheduler"),              \
-   VGP_PAIR(VgpMalloc,     "low-lev malloc/free"),    \
-   VGP_PAIR(VgpCliMalloc,  "client  malloc/free"),    \
-   VGP_PAIR(VgpTranslate,  "translate-main"),         \
-   VGP_PAIR(VgpToUCode,    "to-ucode"),               \
-   VGP_PAIR(VgpFromUcode,  "from-ucode"),             \
-   VGP_PAIR(VgpImprove,    "improve"),                \
-   VGP_PAIR(VgpInstrument, "instrument"),             \
-   VGP_PAIR(VgpCleanup,    "cleanup"),                \
-   VGP_PAIR(VgpRegAlloc,   "reg-alloc"),              \
-   VGP_PAIR(VgpDoLRU,      "do-lru"),                 \
-   VGP_PAIR(VgpSlowFindT,  "slow-search-transtab"),   \
-   VGP_PAIR(VgpInitAudit,  "init-mem-audit"),         \
-   VGP_PAIR(VgpExeContext, "exe-context"),            \
-   VGP_PAIR(VgpReadSyms,   "read-syms"),              \
-   VGP_PAIR(VgpAddToT,     "add-to-transtab"),        \
-   VGP_PAIR(VgpSARP,       "set-addr-range-perms"),   \
-   VGP_PAIR(VgpSyscall,    "syscall wrapper"),        \
-   VGP_PAIR(VgpCacheInstrument, "cache instrument"),  \
-   VGP_PAIR(VgpCacheGetBBCC,"cache get BBCC"),        \
-   VGP_PAIR(VgpCacheSimulate, "cache simulate"),      \
-   VGP_PAIR(VgpCacheDump,  "cache stats dump"),       \
-   VGP_PAIR(VgpSpare1,     "spare 1"),                \
-   VGP_PAIR(VgpSpare2,     "spare 2")
-
-#define VGP_PAIR(enumname,str) enumname
-typedef enum { VGP_LIST } VgpCC;
-#undef VGP_PAIR
+extern void VG_(shutdown_logging)( void );
 
 extern void VGP_(init_profiling) ( void );
 extern void VGP_(done_profiling) ( void );
-extern void VGP_(pushcc) ( VgpCC );
-extern void VGP_(popcc) ( void );
 
-#define VGP_PUSHCC(cc) VGP_(pushcc)(cc)
-#define VGP_POPCC      VGP_(popcc)()
+#undef  VGP_PUSHCC
+#undef  VGP_POPCC
+#define VGP_PUSHCC(x)   if (VG_(clo_profile)) VGP_(pushcc)(x)
+#define VGP_POPCC(x)    if (VG_(clo_profile)) VGP_(popcc)(x)
 
+/* Use this for ones that happen a lot and thus we don't want to put in
+   all the time, eg. for %esp assignment. */
+#ifdef VGP_ACCURATE_PROFILING
+#  define VGP_MAYBE_PUSHCC(x)   if (VG_(clo_profile)) VGP_(pushcc)(x)
+#  define VGP_MAYBE_POPCC(x)    if (VG_(clo_profile)) VGP_(popcc)(x)
 #else
-
-#define VGP_PUSHCC(cc) /* */
-#define VGP_POPCC      /* */
-
-#endif /* VG_PROFILE */
+#  define VGP_MAYBE_PUSHCC(x)
+#  define VGP_MAYBE_POPCC(x)
+#endif
 
 
 /* ---------------------------------------------------------------------
@@ -387,37 +246,40 @@
    ------------------------------------------------------------------ */
 
 /* Allocation arenas.  
+      CORE      is for the core's general use.
+      SKIN      is for the skin to use (and the only one it uses).
       SYMTAB    is for Valgrind's symbol table storage.
+      JITTER    is for small storage during translation.
       CLIENT    is for the client's mallocs/frees.
       DEMANGLE  is for the C++ demangler.
       EXECTXT   is for storing ExeContexts.
-      ERRCTXT   is for storing ErrContexts.
-      PRIVATE   is for Valgrind general stuff.
+      ERRORS    is for storing CoreErrors.
       TRANSIENT is for very short-term use.  It should be empty
                 in between uses.
-   When adding a new arena, remember also to add it
-   to ensure_mm_init(). 
+   When adding a new arena, remember also to add it to ensure_mm_init(). 
 */
 typedef Int ArenaId;
 
-#define VG_N_ARENAS 7
+#define VG_N_ARENAS 9
 
-#define VG_AR_PRIVATE   0    /* :: ArenaId */
-#define VG_AR_SYMTAB    1    /* :: ArenaId */
-#define VG_AR_CLIENT    2    /* :: ArenaId */
-#define VG_AR_DEMANGLE  3    /* :: ArenaId */
-#define VG_AR_EXECTXT   4    /* :: ArenaId */
-#define VG_AR_ERRCTXT   5    /* :: ArenaId */
-#define VG_AR_TRANSIENT 6    /* :: ArenaId */
+#define VG_AR_CORE      0    /* :: ArenaId */
+#define VG_AR_SKIN      1    /* :: ArenaId */
+#define VG_AR_SYMTAB    2    /* :: ArenaId */
+#define VG_AR_JITTER    3    /* :: ArenaId */
+#define VG_AR_CLIENT    4    /* :: ArenaId */
+#define VG_AR_DEMANGLE  5    /* :: ArenaId */
+#define VG_AR_EXECTXT   6    /* :: ArenaId */
+#define VG_AR_ERRORS    7    /* :: ArenaId */
+#define VG_AR_TRANSIENT 8    /* :: ArenaId */
 
-extern void* VG_(malloc)  ( ArenaId arena, Int nbytes );
-extern void  VG_(free)    ( ArenaId arena, void* ptr );
-extern void* VG_(calloc)  ( ArenaId arena, Int nmemb, Int nbytes );
-extern void* VG_(realloc) ( ArenaId arena, void* ptr, Int size );
-extern void* VG_(malloc_aligned) ( ArenaId aid, Int req_alignB, 
+extern void* VG_(arena_malloc)  ( ArenaId arena, Int nbytes );
+extern void  VG_(arena_free)    ( ArenaId arena, void* ptr );
+extern void* VG_(arena_calloc)  ( ArenaId arena, Int nmemb, Int nbytes );
+extern void* VG_(arena_realloc) ( ArenaId arena, void* ptr, Int alignment,
+                                  Int size );
+extern void* VG_(arena_malloc_aligned) ( ArenaId aid, Int req_alignB, 
                                                 Int req_pszB );
 
-extern void  VG_(mallocSanityCheckArena) ( ArenaId arena );
 extern void  VG_(mallocSanityCheckAll)   ( void );
 
 extern void  VG_(show_all_arena_stats) ( void );
@@ -433,13 +295,13 @@
 
 
 /* ---------------------------------------------------------------------
-   Exports of vg_clientfuns.c
+   Exports of vg_clientfuncs.c
    ------------------------------------------------------------------ */
 
 /* This doesn't export code or data that valgrind.so needs to link
    against.  However, the scheduler does need to know the following
    request codes.  A few, publically-visible, request codes are also
-   defined in valgrind.h. */
+   defined in valgrind.h, and similar headers for some skins. */
 
 #define VG_USERREQ__MALLOC              0x2001
 #define VG_USERREQ__BUILTIN_NEW         0x2002
@@ -552,16 +414,6 @@
    Exports of vg_scheduler.c
    ------------------------------------------------------------------ */
 
-/* ThreadIds are simply indices into the vg_threads[] array. */
-typedef 
-   UInt 
-   ThreadId;
-
-/* Special magic value for an invalid ThreadId.  It corresponds to
-   LinuxThreads using zero as the initial value for
-   pthread_mutex_t.__m_owner and pthread_cond_t.__c_waiting. */
-#define VG_INVALID_THREADID ((ThreadId)(0))
-
 typedef
    enum { 
       VgTs_Empty,      /* this slot is not in use */
@@ -594,140 +446,138 @@
    ForkHandlerEntry;
 
 
-typedef
-   struct {
-      /* ThreadId == 0 (and hence vg_threads[0]) is NEVER USED.
-         The thread identity is simply the index in vg_threads[].
-         ThreadId == 1 is the root thread and has the special property
-         that we don't try and allocate or deallocate its stack.  For
-         convenience of generating error message, we also put the
-         ThreadId in this tid field, but be aware that it should
-         ALWAYS == the index in vg_threads[]. */
-      ThreadId tid;
+struct _ThreadState {
+   /* ThreadId == 0 (and hence vg_threads[0]) is NEVER USED.
+      The thread identity is simply the index in vg_threads[].
+      ThreadId == 1 is the root thread and has the special property
+      that we don't try and allocate or deallocate its stack.  For
+      convenience of generating error message, we also put the
+      ThreadId in this tid field, but be aware that it should
+      ALWAYS == the index in vg_threads[]. */
+   ThreadId tid;
 
-      /* Current scheduling status. 
+   /* Current scheduling status. 
 
-         Complications: whenever this is set to VgTs_WaitMX, you
-         should also set .m_edx to whatever the required return value
-         is for pthread_mutex_lock / pthread_cond_timedwait for when
-         the mutex finally gets unblocked. */
-      ThreadStatus status;
+      Complications: whenever this is set to VgTs_WaitMX, you
+      should also set .m_edx to whatever the required return value
+      is for pthread_mutex_lock / pthread_cond_timedwait for when
+      the mutex finally gets unblocked. */
+   ThreadStatus status;
 
-      /* When .status == WaitMX, points to the mutex I am waiting for.
-         When .status == WaitCV, points to the mutex associated with
-         the condition variable indicated by the .associated_cv field.
-         In all other cases, should be NULL. */
-      void* /* pthread_mutex_t* */ associated_mx;
+   /* When .status == WaitMX, points to the mutex I am waiting for.
+      When .status == WaitCV, points to the mutex associated with
+      the condition variable indicated by the .associated_cv field.
+      In all other cases, should be NULL. */
+   void* /*pthread_mutex_t* */ associated_mx;
 
-      /* When .status == WaitCV, points to the condition variable I am
-         waiting for.  In all other cases, should be NULL. */
-      void* /* pthread_cond_t* */ associated_cv;
+   /* When .status == WaitCV, points to the condition variable I am
+      waiting for.  In all other cases, should be NULL. */
+   void* /*pthread_cond_t* */ associated_cv;
 
-      /* If VgTs_Sleeping, this is when we should wake up, measured in
-         milliseconds as supplied by VG_(read_millisecond_counter). 
- 
-         If VgTs_WaitCV, this indicates the time at which
-         pthread_cond_timedwait should wake up.  If == 0xFFFFFFFF,
-         this means infinitely far in the future, viz,
-         pthread_cond_wait. */
-      UInt awaken_at;
+   /* If VgTs_Sleeping, this is when we should wake up, measured in
+      milliseconds as supplied by VG_(read_millisecond_counter). 
 
-      /* If VgTs_WaitJoiner, return value, as generated by joinees. */
-      void* joinee_retval;
+      If VgTs_WaitCV, this indicates the time at which
+      pthread_cond_timedwait should wake up.  If == 0xFFFFFFFF,
+      this means infinitely far in the future, viz,
+      pthread_cond_wait. */
+   UInt awaken_at;
 
-      /* If VgTs_WaitJoinee, place to copy the return value to, and
-         the identity of the thread we're waiting for. */
-      void**   joiner_thread_return;
-      ThreadId joiner_jee_tid;      
+   /* If VgTs_WaitJoiner, return value, as generated by joinees. */
+   void* joinee_retval;
 
-      /* Whether or not detached. */
-      Bool detached;
+   /* If VgTs_WaitJoinee, place to copy the return value to, and
+      the identity of the thread we're waiting for. */
+   void**   joiner_thread_return;
+   ThreadId joiner_jee_tid;      
 
-      /* Cancelability state and type. */
-      Bool cancel_st; /* False==PTH_CANCEL_DISABLE; True==.._ENABLE */
-      Bool cancel_ty; /* False==PTH_CANC_ASYNCH; True==..._DEFERRED */
-     
-      /* Pointer to fn to call to do cancellation.  Indicates whether
-         or not cancellation is pending.  If NULL, not pending.  Else
-         should be &thread_exit_wrapper(), indicating that
-         cancallation is pending. */
-      void (*cancel_pend)(void*);
+   /* Whether or not detached. */
+   Bool detached;
 
-      /* The cleanup stack. */
-      Int          custack_used;
-      CleanupEntry custack[VG_N_CLEANUPSTACK];
+   /* Cancelability state and type. */
+   Bool cancel_st; /* False==PTH_CANCEL_DISABLE; True==.._ENABLE */
+   Bool cancel_ty; /* False==PTH_CANC_ASYNCH; True==..._DEFERRED */
+  
+   /* Pointer to fn to call to do cancellation.  Indicates whether
+      or not cancellation is pending.  If NULL, not pending.  Else
+      should be &thread_exit_wrapper(), indicating that
+      cancallation is pending. */
+   void (*cancel_pend)(void*);
 
-      /* thread-specific data */
-      void* specifics[VG_N_THREAD_KEYS];
+   /* The cleanup stack. */
+   Int          custack_used;
+   CleanupEntry custack[VG_N_CLEANUPSTACK];
 
-      /* This thread's blocked-signals mask.  Semantics is that for a
-         signal to be delivered to this thread, the signal must not be
-         blocked by either the process-wide signal mask nor by this
-         one.  So, if this thread is prepared to handle any signal that
-         the process as a whole is prepared to handle, this mask should
-         be made empty -- and that it is its default, starting
-         state. */
-      vki_ksigset_t sig_mask;
+   /* thread-specific data */
+   void* specifics[VG_N_THREAD_KEYS];
 
-      /* When not VgTs_WaitSIG, has no meaning.  When VgTs_WaitSIG,
-         is the set of signals for which we are sigwait()ing. */
-      vki_ksigset_t sigs_waited_for;
+   /* This thread's blocked-signals mask.  Semantics is that for a
+      signal to be delivered to this thread, the signal must not be
+      blocked by either the process-wide signal mask nor by this
+      one.  So, if this thread is prepared to handle any signal that
+      the process as a whole is prepared to handle, this mask should
+      be made empty -- and that it is its default, starting
+      state. */
+   vki_ksigset_t sig_mask;
 
-      /* Counts the number of times a signal handler for this thread
-         has returned.  This makes it easy to implement pause(), by
-         polling this value, of course interspersed with nanosleeps,
-         and waiting till it changes. */
-      UInt n_signals_returned;
+   /* When not VgTs_WaitSIG, has no meaning.  When VgTs_WaitSIG,
+      is the set of signals for which we are sigwait()ing. */
+   vki_ksigset_t sigs_waited_for;
 
-      /* Stacks.  When a thread slot is freed, we don't deallocate its
-         stack; we just leave it lying around for the next use of the
-         slot.  If the next use of the slot requires a larger stack,
-         only then is the old one deallocated and a new one
-         allocated. 
- 
-         For the main thread (threadid == 0), this mechanism doesn't
-         apply.  We don't know the size of the stack since we didn't
-         allocate it, and furthermore we never reallocate it. */
+   /* Counts the number of times a signal handler for this thread
+      has returned.  This makes it easy to implement pause(), by
+      polling this value, of course interspersed with nanosleeps,
+      and waiting till it changes. */
+   UInt n_signals_returned;
 
-      /* The allocated size of this thread's stack (permanently zero
-         if this is ThreadId == 0, since we didn't allocate its stack) */
-      UInt stack_size;
+   /* Stacks.  When a thread slot is freed, we don't deallocate its
+      stack; we just leave it lying around for the next use of the
+      slot.  If the next use of the slot requires a larger stack,
+      only then is the old one deallocated and a new one
+      allocated. 
 
-      /* Address of the lowest word in this thread's stack.  NULL means
-         not allocated yet.
-      */
-      Addr stack_base;
+      For the main thread (threadid == 0), this mechanism doesn't
+      apply.  We don't know the size of the stack since we didn't
+      allocate it, and furthermore we never reallocate it. */
 
-     /* Address of the highest legitimate word in this stack.  This is
-        used for error messages only -- not critical for execution
-        correctness.  Is is set for all stacks, specifically including
-        ThreadId == 0 (the main thread). */
-      Addr stack_highest_word;
+   /* The allocated size of this thread's stack (permanently zero
+      if this is ThreadId == 0, since we didn't allocate its stack) */
+   UInt stack_size;
 
-      /* Saved machine context. */
-      UInt m_eax;
-      UInt m_ebx;
-      UInt m_ecx;
-      UInt m_edx;
-      UInt m_esi;
-      UInt m_edi;
-      UInt m_ebp;
-      UInt m_esp;
-      UInt m_eflags;
-      UInt m_eip;
-      UInt m_fpu[VG_SIZE_OF_FPUSTATE_W];
+   /* Address of the lowest word in this thread's stack.  NULL means
+      not allocated yet.
+   */
+   Addr stack_base;
 
-      UInt sh_eax;
-      UInt sh_ebx;
-      UInt sh_ecx;
-      UInt sh_edx;
-      UInt sh_esi;
-      UInt sh_edi;
-      UInt sh_ebp;
-      UInt sh_esp;
-      UInt sh_eflags;
-   }
-   ThreadState;
+  /* Address of the highest legitimate word in this stack.  This is
+     used for error messages only -- not critical for execution
+     correctness.  Is is set for all stacks, specifically including
+     ThreadId == 0 (the main thread). */
+   Addr stack_highest_word;
+
+   /* Saved machine context. */
+   UInt m_eax;
+   UInt m_ebx;
+   UInt m_ecx;
+   UInt m_edx;
+   UInt m_esi;
+   UInt m_edi;
+   UInt m_ebp;
+   UInt m_esp;
+   UInt m_eflags;
+   UInt m_eip;
+   UInt m_fpu[VG_SIZE_OF_FPUSTATE_W];
+
+   UInt sh_eax;
+   UInt sh_ebx;
+   UInt sh_ecx;
+   UInt sh_edx;
+   UInt sh_esi;
+   UInt sh_edi;
+   UInt sh_ebp;
+   UInt sh_esp;
+   UInt sh_eflags;
+};
 
 
 /* The thread table. */
@@ -753,10 +603,6 @@
 /* Similarly ... */
 extern ThreadId VG_(get_current_tid) ( void );
 
-/* Which thread is this address in the stack of, if any?  Used for
-   error message generation. */
-extern ThreadId VG_(identify_stack_addr)( Addr a );
-
 /* Nuke all threads except tid. */
 extern void VG_(nuke_all_threads_except) ( ThreadId me );
 
@@ -795,12 +641,14 @@
    the initial stack, which we can't move, is allocated here.
    VG_(scheduler_init) checks this.  Andrea Archelangi's 2.4 kernels
    have been rumoured to start stacks at 0x80000000, so that too is
-   considered. It seems systems with longer uptimes tend to to use
-   stacks which start at 0x40000000 sometimes.  
-*/
+   considered.  It seems systems with longer uptimes tend to to use
+   stacks which start at 0x40000000 sometimes.  JRS 2002-Aug-21: I
+   also have reports of stacks starting at 0xE0000000.*/
+
 #define VG_STARTUP_STACK_BASE_1  (Addr)0xC0000000
 #define VG_STARTUP_STACK_BASE_2  (Addr)0x80000000
 #define VG_STARTUP_STACK_BASE_3  (Addr)0x40000000
+#define VG_STARTUP_STACK_BASE_4  (Addr)0xE0000000
 #define VG_STARTUP_STACK_SMALLERTHAN  0x100000 /* 1024k */
 
 #define VG_STACK_MATCHES_BASE(zzstack, zzbase)                 \
@@ -819,17 +667,24 @@
 #define VG_AR_CLIENT_STACKBASE_REDZONE_SZB \
    (VG_AR_CLIENT_STACKBASE_REDZONE_SZW * VKI_BYTES_PER_WORD)
 
+/* Junk to fill up a thread's shadow regs with when shadow regs aren't
+ * being used. */
+#define VG_UNUSED_SHADOW_REG_VALUE  0x27182818
+
+/* What we set a shadow register to when written by SET_EAX and similar
+ * things. */
+extern UInt VG_(written_shadow_reg);
 
 /* Write a value to the client's %EDX (request return value register)
    and set the shadow to indicate it is defined. */
-#define SET_EDX(zztid, zzval)                          \
-   do { VG_(threads)[zztid].m_edx = (zzval);             \
-        VG_(threads)[zztid].sh_edx = VGM_WORD_VALID;     \
+#define SET_EDX(zztid, zzval)                                  \
+   do { VG_(threads)[zztid].m_edx = (zzval);                   \
+        VG_(threads)[zztid].sh_edx = VG_(written_shadow_reg);  \
    } while (0)
 
-#define SET_EAX(zztid, zzval)                          \
-   do { VG_(threads)[zztid].m_eax = (zzval);             \
-        VG_(threads)[zztid].sh_eax = VGM_WORD_VALID;     \
+#define SET_EAX(zztid, zzval)                                  \
+   do { VG_(threads)[zztid].m_eax = (zzval);                   \
+        VG_(threads)[zztid].sh_eax = VG_(written_shadow_reg);  \
    } while (0)
 
 
@@ -875,87 +730,21 @@
    Exports of vg_mylibc.c
    ------------------------------------------------------------------ */
 
+__attribute__((noreturn))
+extern void VG_(skin_error) ( Char* s );
 
-#if !defined(NULL)
-#  define NULL ((void*)0)
-#endif
+/* VG_(brk) not public so skins cannot screw with curr_dataseg_end */
+extern void* VG_(brk) ( void* end_data_segment );
 
-extern void VG_(exit)( Int status )
-            __attribute__ ((__noreturn__));
+/* Skins use VG_(strdup)() which doesn't expose ArenaId */
+extern Char* VG_(arena_strdup) ( ArenaId aid, const Char* s);
 
-extern void VG_(printf) ( const char *format, ... );
-/* too noisy ...  __attribute__ ((format (printf, 1, 2))) ; */
-
-extern void VG_(sprintf) ( Char* buf, Char *format, ... );
-
-extern void VG_(vprintf) ( void(*send)(Char), 
-                          const Char *format, va_list vargs );
-
-extern Bool VG_(isspace) ( Char c );
-extern Bool VG_(isdigit) ( Char c );
-
-extern Int VG_(strlen) ( const Char* str );
-
-extern Long VG_(atoll) ( Char* str );
-extern Long VG_(atoll36) ( Char* str );
-
-extern Char* VG_(strcat) ( Char* dest, const Char* src );
-extern Char* VG_(strncat) ( Char* dest, const Char* src, Int n );
-extern Char* VG_(strpbrk) ( const Char* s, const Char* accept );
-
-extern Char* VG_(strcpy) ( Char* dest, const Char* src );
-
-extern Int VG_(strcmp)    ( const Char* s1, const Char* s2 );
-extern Int VG_(strcmp_ws) ( const Char* s1, const Char* s2 );
-
-extern Int VG_(strncmp)    ( const Char* s1, const Char* s2, Int nmax );
-extern Int VG_(strncmp_ws) ( const Char* s1, const Char* s2, Int nmax );
-
-extern Char* VG_(strstr) ( const Char* haystack, Char* needle );
-extern Char* VG_(strchr) ( const Char* s, Char c );
-extern Char* VG_(strdup) ( ArenaId aid, const Char* s);
-
-extern Char* VG_(getenv) ( Char* name );
-extern Int   VG_(getpid) ( void );
-
+/* Skins shouldn't need these...(?) */
 extern void VG_(start_rdtsc_calibration) ( void );
 extern void VG_(end_rdtsc_calibration) ( void );
 extern UInt VG_(read_millisecond_timer) ( void );
 
-
-extern Char VG_(toupper) ( Char c );
-
-extern void VG_(strncpy_safely) ( Char* dest, const Char* src, Int ndest );
-
-extern void VG_(strncpy) ( Char* dest, const Char* src, Int ndest );
-
-extern Bool VG_(stringMatch) ( Char* pat, Char* str );
-
-
-#define VG__STRING(__str)  #__str
-
-/* Asserts are permanently enabled.  Hurrah! */
-#define vg_assert(expr)                                               \
-  ((void) ((expr) ? 0 :						      \
-	   (VG_(assert_fail) (VG__STRING(expr),			      \
-			      __FILE__, __LINE__,                     \
-                              __PRETTY_FUNCTION__), 0)))
-
-extern void VG_(assert_fail) ( Char* expr, Char* file, 
-                               Int line, Char* fn )
-            __attribute__ ((__noreturn__));
-
-/* Reading and writing files. */
-extern Int  VG_(open_read) ( Char* pathname );
-extern Int  VG_(open_write)       ( Char* pathname );
-extern Int  VG_(create_and_write) ( Char* pathname );
-extern void VG_(close)     ( Int fd );
-extern Int  VG_(read)      ( Int fd, void* buf, Int count);
-extern Int  VG_(write)     ( Int fd, void* buf, Int count);
-extern Int  VG_(stat) ( Char* file_name, struct vki_stat* buf );
-
-extern Int  VG_(fcntl) ( Int fd, Int cmd, Int arg );
-
+extern Int VG_(fcntl) ( Int fd, Int cmd, Int arg );
 extern Int VG_(select)( Int n, 
                         vki_fd_set* readfds, 
                         vki_fd_set* writefds, 
@@ -964,306 +753,37 @@
 extern Int VG_(nanosleep)( const struct vki_timespec *req, 
                            struct vki_timespec *rem );
 
-
-/* mmap-ery ... */
-extern void* VG_(mmap)( void* start, UInt length, 
-                        UInt prot, UInt flags, UInt fd, UInt offset );
-
-extern Int  VG_(munmap)( void* start, Int length );
-
-extern void* VG_(brk) ( void* end_data_segment );
-
-
-/* Print a (panic) message, and abort. */
-extern void VG_(panic) ( Char* str )
-            __attribute__ ((__noreturn__));
-
-/* Get memory by anonymous mmap. */
-extern void* VG_(get_memory_from_mmap) ( Int nBytes, Char* who );
-
-/* Crude stand-in for the glibc system() call. */
-extern Int VG_(system) ( Char* cmd );
-
-
-/* Signal stuff.  Note that these use the vk_ (kernel) structure
-   definitions, which are different in places from those that glibc
-   defines.  Since we're operating right at the kernel interface,
-   glibc's view of the world is entirely irrelevant. */
-
-/* --- Signal set ops --- */
-extern Int  VG_(ksigfillset)( vki_ksigset_t* set );
-extern Int  VG_(ksigemptyset)( vki_ksigset_t* set );
-
-extern Bool VG_(kisfullsigset)( vki_ksigset_t* set );
-extern Bool VG_(kisemptysigset)( vki_ksigset_t* set );
-
-extern Int  VG_(ksigaddset)( vki_ksigset_t* set, Int signum );
-extern Int  VG_(ksigdelset)( vki_ksigset_t* set, Int signum );
-extern Int  VG_(ksigismember) ( vki_ksigset_t* set, Int signum );
-
-extern void VG_(ksigaddset_from_set)( vki_ksigset_t* dst, 
-                                      vki_ksigset_t* src );
-extern void VG_(ksigdelset_from_set)( vki_ksigset_t* dst, 
-                                      vki_ksigset_t* src );
-
-/* --- Mess with the kernel's sig state --- */
-extern Int VG_(ksigprocmask)( Int how, const vki_ksigset_t* set, 
-                                       vki_ksigset_t* oldset );
-extern Int VG_(ksigaction) ( Int signum,  
-                             const vki_ksigaction* act,  
-                             vki_ksigaction* oldact );
-
-extern Int VG_(ksignal)(Int signum, void (*sighandler)(Int));
-
-extern Int VG_(ksigaltstack)( const vki_kstack_t* ss, vki_kstack_t* oss );
-
-extern Int VG_(kill)( Int pid, Int signo );
-extern Int VG_(sigpending) ( vki_ksigset_t* set );
-
-
 /* ---------------------------------------------------------------------
    Definitions for the JITter (vg_translate.c, vg_to_ucode.c,
    vg_from_ucode.c).
    ------------------------------------------------------------------ */
 
-/* Tags which describe what operands are. */
-typedef
-   enum { TempReg=0, ArchReg=1, RealReg=2, 
-          SpillNo=3, Literal=4, Lit16=5, 
-          NoValue=6 }
-   Tag;
-
-
-/* Microinstruction opcodes. */
-typedef
-   enum {
-      NOP,
-      GET,
-      PUT,
-      LOAD,
-      STORE,
-      MOV,
-      CMOV, /* Used for cmpxchg and cmov */
-      WIDEN,
-      JMP,
-
-      /* Read/write the %EFLAGS register into a TempReg. */
-      GETF, PUTF,
-
-      ADD, ADC, AND, OR,  XOR, SUB, SBB,
-      SHL, SHR, SAR, ROL, ROR, RCL, RCR,
-      NOT, NEG, INC, DEC, BSWAP,
-      CC2VAL,
-
-      /* Not strictly needed, but useful for making better
-         translations of address calculations. */
-      LEA1,  /* reg2 := const + reg1 */
-      LEA2,  /* reg3 := const + reg1 + reg2 * 1,2,4 or 8 */
-
-      /* not for translating x86 calls -- only to call helpers */
-      CALLM_S, CALLM_E, /* Mark start and end of push/pop sequences
-                           for CALLM. */
-      PUSH, POP, CLEAR, /* Add/remove/zap args for helpers. */
-      CALLM,  /* call to a machine-code helper */
-
-      /* for calling C functions -- CCALL_M_N passes M arguments and returns N
-       * (0 or 1) return values */
-      CCALL_1_0, CCALL_2_0,
-
-      /* Hack for translating string (REP-) insns.  Jump to literal if
-         TempReg/RealReg is zero. */
-      JIFZ,
-
-      /* FPU ops which read/write mem or don't touch mem at all. */
-      FPU_R,
-      FPU_W,
-      FPU,
-
-      /* Advance the simulated %eip by some small (< 128) number. */
-      INCEIP,
-
-      /* uinstrs which are not needed for mere translation of x86 code,
-         only for instrumentation of it. */
-      LOADV,
-      STOREV,
-      GETV,
-      PUTV,
-      TESTV,
-      SETV,
-      /* Get/set the v-bit (and it is only one bit) for the simulated
-         %eflags register. */
-      GETVF,
-      PUTVF,
-
-      /* Do a unary or binary tag op.  Only for post-instrumented
-         code.  For TAG1, first and only arg is a TempReg, and is both
-         arg and result reg.  For TAG2, first arg is src, second is
-         dst, in the normal way; both are TempRegs.  In both cases,
-         3rd arg is a RiCHelper with a Lit16 tag.  This indicates
-         which tag op to do. */
-      TAG1,
-      TAG2
-   }
-   Opcode;
-
-
-/* Condition codes, observing the Intel encoding.  CondAlways is an
-   extra. */
-typedef
-   enum {
-      CondO      = 0,  /* overflow           */
-      CondNO     = 1,  /* no overflow        */
-      CondB      = 2,  /* below              */
-      CondNB     = 3,  /* not below          */
-      CondZ      = 4,  /* zero               */
-      CondNZ     = 5,  /* not zero           */
-      CondBE     = 6,  /* below or equal     */
-      CondNBE    = 7,  /* not below or equal */
-      CondS      = 8,  /* negative           */
-      ConsNS     = 9,  /* not negative       */
-      CondP      = 10, /* parity even        */
-      CondNP     = 11, /* not parity even    */
-      CondL      = 12, /* jump less          */
-      CondNL     = 13, /* not less           */
-      CondLE     = 14, /* less or equal      */
-      CondNLE    = 15, /* not less or equal  */
-      CondAlways = 16  /* Jump always        */
-   } 
-   Condcode;
-
-
-/* Descriptions of additional properties of *unconditional* jumps. */
-typedef
-   enum {
-     JmpBoring=0,   /* boring unconditional jump */
-     JmpCall=1,     /* jump due to an x86 call insn */
-     JmpRet=2,      /* jump due to an x86 ret insn */
-     JmpSyscall=3,  /* do a system call, then jump */
-     JmpClientReq=4 /* do a client request, then jump */
-   }
-   JmpKind;
-
-
-/* Flags.  User-level code can only read/write O(verflow), S(ign),
-   Z(ero), A(ux-carry), C(arry), P(arity), and may also write
-   D(irection).  That's a total of 7 flags.  A FlagSet is a bitset,
-   thusly: 
-      76543210
-       DOSZACP
-   and bit 7 must always be zero since it is unused.
-*/
-typedef UChar FlagSet;
-
-#define FlagD (1<<6)
-#define FlagO (1<<5)
-#define FlagS (1<<4)
-#define FlagZ (1<<3)
-#define FlagA (1<<2)
-#define FlagC (1<<1)
-#define FlagP (1<<0)
-
-#define FlagsOSZACP (FlagO | FlagS | FlagZ | FlagA | FlagC | FlagP)
-#define FlagsOSZAP  (FlagO | FlagS | FlagZ | FlagA |         FlagP)
-#define FlagsOSZCP  (FlagO | FlagS | FlagZ |         FlagC | FlagP)
-#define FlagsOSACP  (FlagO | FlagS |         FlagA | FlagC | FlagP)
-#define FlagsSZACP  (        FlagS | FlagZ | FlagA | FlagC | FlagP)
-#define FlagsSZAP   (        FlagS | FlagZ | FlagA |         FlagP)
-#define FlagsZCP    (                FlagZ         | FlagC | FlagP)
-#define FlagsOC     (FlagO |                         FlagC        )
-#define FlagsAC     (                        FlagA | FlagC        )
-
-#define FlagsALL    (FlagsOSZACP | FlagD)
-#define FlagsEmpty  (FlagSet)0
-
 #define VG_IS_FLAG_SUBSET(set1,set2) \
    (( ((FlagSet)set1) & ((FlagSet)set2) ) == ((FlagSet)set1) )
 
 #define VG_UNION_FLAG_SETS(set1,set2) \
    ( ((FlagSet)set1) | ((FlagSet)set2) )
 
-
-
-/* A Micro (u)-instruction. */
-typedef
-   struct {
-      /* word 1 */
-      UInt    lit32;      /* 32-bit literal */
-
-      /* word 2 */
-      UShort  val1;       /* first operand */
-      UShort  val2;       /* second operand */
-
-      /* word 3 */
-      UShort  val3;       /* third operand */
-      UChar   opcode;     /* opcode */
-      UChar   size;       /* data transfer size */
-
-      /* word 4 */
-      FlagSet flags_r;    /* :: FlagSet */
-      FlagSet flags_w;    /* :: FlagSet */
-      UChar   tag1:4;     /* first  operand tag */
-      UChar   tag2:4;     /* second operand tag */
-      UChar   tag3:4;     /* third  operand tag */
-      UChar   extra4b:4;  /* Spare field, used by WIDEN for src
-                             -size, and by LEA2 for scale 
-                             (1,2,4 or 8), and by unconditional JMPs for
-                             orig x86 instr size if --cachesim=yes */
-
-
-      /* word 5 */
-      UChar   cond;            /* condition, for jumps */
-      Bool    smc_check:1;     /* do a smc test, if writes memory. */
-      Bool    signed_widen:1;  /* signed or unsigned WIDEN ? */
-      JmpKind jmpkind:3;       /* additional properties of unconditional JMP */
-   }
-   UInstr;
-
-
-/* Expandable arrays of uinstrs. */
-typedef 
-   struct { 
-      Int     used; 
-      Int     size; 
-      UInstr* instrs;
-      Int     nextTemp;
-   }
-   UCodeBlock;
-
-/* Refer to `the last instruction stuffed in', including as an
-   lvalue. */
-#define LAST_UINSTR(cb) (cb)->instrs[(cb)->used-1]
-
-/* An invalid temporary number :-) */
-#define INVALID_TEMPREG 999999999
-
-
 /* ---------------------------------------------------------------------
    Exports of vg_demangle.c
    ------------------------------------------------------------------ */
 
 extern void VG_(demangle) ( Char* orig, Char* result, Int result_size );
 
-
 /* ---------------------------------------------------------------------
    Exports of vg_from_ucode.c
    ------------------------------------------------------------------ */
 
 extern UChar* VG_(emit_code) ( UCodeBlock* cb, Int* nbytes );
 
+extern void   VG_(print_ccall_stats)      ( void );
+extern void   VG_(print_UInstr_histogram) ( void );
 
 /* ---------------------------------------------------------------------
    Exports of vg_to_ucode.c
    ------------------------------------------------------------------ */
 
 extern Int   VG_(disBB)          ( UCodeBlock* cb, Addr eip0 );
-extern Char* VG_(nameOfIntReg)   ( Int size, Int reg );
-extern Char  VG_(nameOfIntSize)  ( Int size );
-extern UInt  VG_(extend_s_8to32) ( UInt x );
-extern Int   VG_(getNewTemp)     ( UCodeBlock* cb );
-extern Int   VG_(getNewShadow)   ( UCodeBlock* cb );
-
-#define SHADOW(tempreg)  ((tempreg)+1)
-
 
 /* ---------------------------------------------------------------------
    Exports of vg_translate.c
@@ -1275,41 +795,11 @@
                                Addr* trans_addr,
                                UInt* trans_size );
 
-extern void  VG_(emptyUInstr) ( UInstr* u );
-extern void  VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz );
-extern void  VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
-                               Tag tag1, UInt val1 );
-extern void  VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
-                               Tag tag1, UInt val1,
-                               Tag tag2, UInt val2 );
-extern void  VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
-                               Tag tag1, UInt val1,
-                               Tag tag2, UInt val2,
-                               Tag tag3, UInt val3 );
-extern void VG_(setFlagRW) ( UInstr* u, 
-                             FlagSet fr, FlagSet fw );
-
-extern void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 );
-extern Bool VG_(anyFlagUse) ( UInstr* u );
-
-
-
-extern void  VG_(ppUInstr)        ( Int instrNo, UInstr* u );
-extern void  VG_(ppUCodeBlock)    ( UCodeBlock* cb, Char* title );
-
-extern UCodeBlock* VG_(allocCodeBlock) ( void );
-extern void  VG_(freeCodeBlock)        ( UCodeBlock* cb );
-extern void  VG_(copyUInstr)                ( UCodeBlock* cb, UInstr* instr );
-
-extern Char* VG_(nameCondcode)    ( Condcode cond );
-extern Bool  VG_(saneUInstr)      ( Bool beforeRA, UInstr* u );
-extern Bool  VG_(saneUCodeBlock)  ( UCodeBlock* cb );
-extern Char* VG_(nameUOpcode)     ( Bool upper, Opcode opc );
-extern Int   VG_(rankToRealRegNo) ( Int rank );
-
-extern void* VG_(jitmalloc) ( Int nbytes );
-extern void  VG_(jitfree)   ( void* ptr );
-
+extern Char* VG_(nameCondcode)        ( Condcode cond );
+extern Bool  VG_(saneUInstr)          ( Bool beforeRA, Bool beforeLiveness,
+                                        UInstr* u );
+extern void  VG_(saneUCodeBlock)      ( UCodeBlock* cb );
+extern Bool  VG_(saneUCodeBlockCalls) ( UCodeBlock* cb );
 
 /* ---------------------------------------------------------------------
    Exports of vg_execontext.c.
@@ -1320,15 +810,13 @@
    comparing against suppression specifications.  The rest are purely
    informational (but often important). */
 
-typedef
-   struct _ExeContextRec {
-      struct _ExeContextRec * next;
-      /* The size of this array is VG_(clo_backtrace_size); at least
-         2, at most VG_DEEPEST_BACKTRACE.  [0] is the current %eip,
-         [1] is its caller, [2] is the caller of [1], etc. */
-      Addr eips[0];
-   }
-   ExeContext;
+struct _ExeContext {
+   struct _ExeContext * next;
+   /* Variable-length array.  The size is VG_(clo_backtrace_size); at
+      least 2, at most VG_DEEPEST_BACKTRACE.  [0] is the current %eip,
+      [1] is its caller, [2] is the caller of [1], etc. */
+   Addr eips[0];
+};
 
 
 /* Initialise the ExeContext storage mechanism. */
@@ -1337,91 +825,86 @@
 /* Print stats (informational only). */
 extern void VG_(show_ExeContext_stats) ( void );
 
-
-/* Take a snapshot of the client's stack.  Search our collection of
-   ExeContexts to see if we already have it, and if not, allocate a
-   new one.  Either way, return a pointer to the context. */
-extern ExeContext* VG_(get_ExeContext) ( Bool skip_top_frame,
-                                         Addr eip, Addr ebp );
-
-/* Print an ExeContext. */
-extern void VG_(pp_ExeContext) ( ExeContext* );
-
-/* Compare two ExeContexts, just comparing the top two callers. */
-extern Bool VG_(eq_ExeContext_top2) ( ExeContext* e1, ExeContext* e2 );
-
-/* Compare two ExeContexts, just comparing the top four callers. */
-extern Bool VG_(eq_ExeContext_top4) ( ExeContext* e1, ExeContext* e2 );
-
-/* Compare two ExeContexts, comparing all callers. */
-extern Bool VG_(eq_ExeContext_all) ( ExeContext* e1, ExeContext* e2 );
-
+/* Like VG_(get_ExeContext), but with a slightly different type */
+extern ExeContext* VG_(get_ExeContext2) ( Addr eip, Addr ebp,
+                                          Addr ebp_min, Addr ebp_max );
 
 
 /* ---------------------------------------------------------------------
    Exports of vg_errcontext.c.
    ------------------------------------------------------------------ */
 
-extern void VG_(load_suppressions)    ( void );
-extern void VG_(show_all_errors)      ( void );
-extern void VG_(record_value_error)   ( Int size );
-extern void VG_(record_free_error)    ( ThreadState* tst, Addr a );
-extern void VG_(record_freemismatch_error)    ( ThreadState* tst, Addr a );
-extern void VG_(record_address_error) ( Addr a, Int size, 
-                                        Bool isWrite );
-
-extern void VG_(record_jump_error) ( ThreadState* tst, Addr a );
-
-extern void VG_(record_param_err) ( ThreadState* tst,
-                                    Addr a, 
-                                    Bool isWriteLack, 
-                                    Char* msg );
-extern void VG_(record_user_err) ( ThreadState* tst,
-                                   Addr a, Bool isWriteLack );
-extern void VG_(record_pthread_err) ( ThreadId tid, Char* msg );
-
-
-
-/* The classification of a faulting address. */
-typedef 
-   enum { Undescribed, /* as-yet unclassified */
-          Stack, 
-          Unknown, /* classification yielded nothing useful */
-          Freed, Mallocd, 
-          UserG, UserS }
-   AddrKind;
-
-/* Records info about a faulting address. */
+/* Note: it is imperative this doesn't overlap with (0..) at all, as skins
+ * effectively extend it by defining their own enums in the (0..) range. */
 typedef
-   struct {
-      /* ALL */
-      AddrKind akind;
-      /* Freed, Mallocd */
-      Int blksize;
-      /* Freed, Mallocd */
-      Int rwoffset;
-      /* Freed, Mallocd */
-      ExeContext* lastchange;
-      /* Stack */
-      ThreadId stack_tid;
-      /* True if is just-below %esp -- could be a gcc bug. */
-      Bool maybe_gcc;
+   enum {
+      PThreadSupp = -1,    /* Matches PThreadErr */
    }
-   AddrInfo;
+   CoreSuppKind;
+
+/* For each caller specified for a suppression, record the nature of
+   the caller name.  Not of interest to skins. */
+typedef
+   enum { 
+      ObjName,    /* Name is of an shared object file. */
+      FunName     /* Name is of a function. */
+   }
+   SuppLocTy;
+
+/* Suppressions.  Skin part `SkinSupp' (which is all skins have to deal
+   with) is in vg_skin.h */
+typedef
+   struct _CoreSupp {
+      struct _CoreSupp* next;
+      /* The number of times this error has been suppressed. */
+      Int count;
+      /* The name by which the suppression is referred to. */
+      Char* sname;
+      /* First two (name of fn where err occurs, and immediate caller)
+       * are mandatory;  extra two are optional. */
+      SuppLocTy caller_ty[VG_N_SUPP_CALLERS];
+      Char*     caller   [VG_N_SUPP_CALLERS];
+      /* The skin-specific part */
+      SkinSupp  skin_supp;
+   } 
+   CoreSupp;
+
+/* Note: it is imperative this doesn't overlap with (0..) at all, as skins
+ * effectively extend it by defining their own enums in the (0..) range. */
+typedef
+   enum { 
+      PThreadErr      = -1,   /* Pthreading error */
+   }
+   CoreErrorKind;
+
+/* Errors.  Skin part `SkinError' (which is all skins have to deal
+   with) is in vg_skin.h */
+typedef
+   struct _CoreErrContext {
+      struct _CoreErrContext* next;
+      /* NULL if unsuppressed; or ptr to suppression record. */
+      CoreSupp* supp;
+      Int count;
+      ExeContext* where;
+      ThreadId tid;
+      /* These record %EIP, %ESP and %EBP at the error point.  They
+         are only used to make GDB-attaching convenient; there is no
+         other purpose; specifically they are not used to do
+         comparisons between errors. */
+      UInt m_eip;
+      UInt m_esp;
+      UInt m_ebp;
+      /* The skin-specific part */
+      SkinError skin_err;
+   } 
+   CoreError;
 
 
-/* ---------------------------------------------------------------------
-   Exports of vg_clientperms.c
-   ------------------------------------------------------------------ */
+extern void VG_(load_suppressions)    ( void );
 
-extern Bool VG_(client_perm_maybe_describe)( Addr a, AddrInfo* ai );
+extern void VG_(record_pthread_error) ( ThreadId tid, Char* msg );
 
-extern UInt VG_(handle_client_request) ( ThreadState* tst, UInt* arg_block );
-
-extern void VG_(delete_client_stack_blocks_following_ESP_change) ( void );
-
-extern void VG_(show_client_block_stats) ( void );
-
+extern void VG_(show_all_errors)      ( void );
 
 /* ---------------------------------------------------------------------
    Exports of vg_procselfmaps.c
@@ -1438,52 +921,26 @@
    ------------------------------------------------------------------ */
 
 /* We assume the executable is loaded here ... can't really find
-   out.  There is a hacky sanity check in vg_init_memory_audit()
+   out.  There is a hacky sanity check in VG_(init_memory)()
    which should trip up most stupidities.
 */
 #define VG_ASSUMED_EXE_BASE  (Addr)0x8048000
 
-extern void VG_(read_symbols) ( void );
-extern void VG_(mini_stack_dump) ( ExeContext* ec );
-extern void VG_(what_obj_and_fun_is_this)
-                                     ( Addr a,
-                                       Char* obj_buf, Int n_obj_buf,
-                                       Char* fun_buf, Int n_fun_buf );
-extern Bool VG_(what_line_is_this) ( Addr a,
-                                     UChar* filename, Int n_filename,
-                                     UInt* lineno );
-extern Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a,
-                                     Char* fn_name, Int n_fn_name);
+extern void VG_(maybe_read_symbols)   ( void );
+extern void VG_(read_symtab_callback) ( Addr start, UInt size, 
+                                        Char rr, Char ww, Char xx,
+                                        UInt foffset, UChar* filename );
+extern void VG_(maybe_unload_symbols) ( Addr start, UInt length );
 
-extern Bool VG_(symtab_notify_munmap) ( Addr start, UInt length );
+extern Bool VG_(get_fnname_nodemangle)( Addr a, Char* fnname, Int n_fnname );
+extern void VG_(mini_stack_dump)      ( ExeContext* ec );
 
 
 /* ---------------------------------------------------------------------
    Exports of vg_clientmalloc.c
    ------------------------------------------------------------------ */
 
-typedef
-   enum { 
-      Vg_AllocMalloc = 0,
-      Vg_AllocNew    = 1,
-      Vg_AllocNewVec = 2 
-   }
-   VgAllocKind;
-
-/* Description of a malloc'd chunk. */
-typedef 
-   struct _ShadowChunk {
-      struct _ShadowChunk* next;
-      ExeContext*   where;          /* where malloc'd/free'd */
-      UInt          size : 30;      /* size requested.       */
-      VgAllocKind   allockind : 2;  /* which wrapper did the allocation */
-      Addr          data;           /* ptr to actual block.  */
-   } 
-   ShadowChunk;
-
-extern void          VG_(clientmalloc_done) ( void );
-extern void          VG_(describe_addr) ( Addr a, AddrInfo* ai );
-extern ShadowChunk** VG_(get_malloc_shadows) ( /*OUT*/ UInt* n_shadows );
+extern void  VG_(client_malloc_init)();
 
 /* These are called from the scheduler, when it intercepts a user
    request. */
@@ -1503,11 +960,14 @@
    Exports of vg_main.c
    ------------------------------------------------------------------ */
 
+/* Sanity checks which may be done at any time.  The scheduler decides when. */
+extern void VG_(do_sanity_checks) ( Bool force_expensive );
+
 /* A structure used as an intermediary when passing the simulated
    CPU's state to some assembly fragments, particularly system calls.
    Stuff is copied from baseBlock to here, the assembly magic runs,
-   and then the inverse copy is done. */
-
+   and then the inverse copy is done. 
+ */
 extern UInt VG_(m_state_static) [8 /* int regs, in Intel order */ 
                                  + 1 /* %eflags */ 
                                  + 1 /* %eip */
@@ -1520,30 +980,27 @@
 
 /* Called when some unhandleable client behaviour is detected.
    Prints a msg and aborts. */
-extern void VG_(unimplemented) ( Char* msg );
+extern void VG_(unimplemented) ( Char* msg )
+            __attribute__((__noreturn__));
 extern void VG_(nvidia_moan) ( void );
 
 /* The stack on which Valgrind runs.  We can't use the same stack as the
    simulatee -- that's an important design decision.  */
 extern UInt VG_(stack)[10000];
 
-/* Similarly, we have to ask for signals to be delivered on an
-   alternative stack, since it is possible, although unlikely, that
-   we'll have to run client code from inside the Valgrind-installed
-   signal handler.  If this happens it will be done by
-   vg_deliver_signal_immediately(). */
+/* Similarly, we have to ask for signals to be delivered on an alternative
+   stack, since it is possible, although unlikely, that we'll have to run
+   client code from inside the Valgrind-installed signal handler.  If this
+   happens it will be done by vg_deliver_signal_immediately(). */
 extern UInt VG_(sigstack)[10000];
 
 /* Holds client's %esp at the point we gained control.  From this the
    client's argc, argv and envp are deduced. */
 extern Addr   VG_(esp_at_startup);
-extern Int    VG_(client_argc);
-extern Char** VG_(client_argv);
-extern Char** VG_(client_envp);
 
-/* Remove valgrind.so from a LD_PRELOAD=... string so child processes
-   don't get traced into.  Also mess up $libdir/valgrind so that our
-   libpthread.so disappears from view. */
+/* Remove valgrind.so and skin's .so from a LD_PRELOAD=... string so child
+   processes don't get traced into.  Also mess up $libdir/valgrind so that
+   our libpthread.so disappears from view. */
 void VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH) ( Char* ld_preload_str,
                                                 Char* ld_library_path_str );
 
@@ -1553,9 +1010,6 @@
    the client program really was running on the real cpu. */
 extern void VG_(start_GDB_whilst_on_client_stack) ( void );
 
-/* Spew out vast amounts of junk during JITting? */
-extern Bool  VG_(disassemble);
-
 /* 64-bit counter for the number of basic blocks done. */
 extern ULong VG_(bbs_done);
 /* 64-bit counter for the number of bbs to go before a debug exit. */
@@ -1573,6 +1027,11 @@
 /* This is the ThreadId of the last thread the scheduler ran. */
 extern ThreadId VG_(last_run_tid);
 
+/* This is the argument to __NR_exit() supplied by the first thread to
+   call that syscall.  We eventually pass that to __NR_exit() for
+   real. */
+extern UInt VG_(exitcode);
+
 
 /* --- Counters, for informational purposes only. --- */
 
@@ -1628,83 +1087,38 @@
    Exports of vg_memory.c
    ------------------------------------------------------------------ */
 
-extern void VGM_(init_memory_audit) ( void );
-extern Addr VGM_(curr_dataseg_end);
-extern void VG_(show_reg_tags) ( void );
-extern void VG_(detect_memory_leaks) ( void );
-extern void VG_(done_prof_mem) ( void );
+extern void VG_(init_memory)            ( void );
+extern void VG_(new_exe_segment)        ( Addr a, UInt len );
+extern void VG_(remove_if_exe_segment)  ( Addr a, UInt len );
 
-/* Set permissions for an address range.  Not speed-critical. */
-extern void VGM_(make_noaccess) ( Addr a, UInt len );
-extern void VGM_(make_writable) ( Addr a, UInt len );
-extern void VGM_(make_readable) ( Addr a, UInt len );
-/* Use with care! (read: use for shmat only) */
-extern void VGM_(make_readwritable) ( Addr a, UInt len );
-extern void VGM_(copy_address_range_perms) ( Addr src, Addr dst,
-                                             UInt len );
-
-/* Check permissions for an address range.  Not speed-critical. */
-extern Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr );
-extern Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr );
-extern Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr );
-
-/* Sanity checks which may be done at any time.  The scheduler decides
-   when. */
-extern void VG_(do_sanity_checks) ( Bool force_expensive );
-/* Very cheap ... */
-extern Bool VG_(first_and_last_secondaries_look_plausible) ( void );
-
-/* These functions are called from generated code. */
-extern void VG_(helperc_STOREV4) ( UInt, Addr );
-extern void VG_(helperc_STOREV2) ( UInt, Addr );
-extern void VG_(helperc_STOREV1) ( UInt, Addr );
-
-extern UInt VG_(helperc_LOADV1) ( Addr );
-extern UInt VG_(helperc_LOADV2) ( Addr );
-extern UInt VG_(helperc_LOADV4) ( Addr );
-
-extern void VGM_(handle_esp_assignment) ( Addr new_espA );
-extern void VGM_(fpu_write_check) ( Addr addr, Int size );
-extern void VGM_(fpu_read_check)  ( Addr addr, Int size );
-
-/* Safely (avoiding SIGSEGV / SIGBUS) scan the entire valid address
-   space and pass the addresses and values of all addressible,
-   defined, aligned words to notify_word.  This is the basis for the
-   leak detector.  Returns the number of calls made to notify_word.  */
-UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) );
-
-/* Is this address within some small distance below %ESP?  Used only
-   for the --workaround-gcc296-bugs kludge. */
-extern Bool VG_(is_just_below_ESP)( Addr esp, Addr aa );
+/* Called from generated code. */
+extern void VG_(handle_esp_assignment) ( Addr new_espA );
 
 /* Nasty kludgery to deal with applications which switch stacks,
    like netscape. */
 #define VG_PLAUSIBLE_STACK_SIZE 8000000
 
-/* Needed by the pthreads implementation. */
-#define VGM_WORD_VALID     0
-#define VGM_WORD_INVALID   0xFFFFFFFF
-
-
 /* ---------------------------------------------------------------------
-   Exports of vg_syscall_mem.c
+   Exports of vg_syscalls.c
    ------------------------------------------------------------------ */
 
+extern void VG_(init_dataseg_end_for_brk) ( void );
+
 extern void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid );
 
-extern void VG_(check_known_blocking_syscall) ( ThreadId tid, 
-                                                Int syscallno,
-                                                Int* /*IN*/ res );
+extern void* VG_(pre_known_blocking_syscall) ( ThreadId tid, Int syscallno );
+extern void  VG_(post_known_blocking_syscall)( ThreadId tid, Int syscallno,
+                                               void* pre_res, Int res );
 
 extern Bool VG_(is_kerror) ( Int res );
 
-#define KERNEL_DO_SYSCALL(thread_id, result_lvalue)        \
-         VG_(load_thread_state)(thread_id);                \
-         VG_(copy_baseBlock_to_m_state_static)();          \
-         VG_(do_syscall)();                                \
-         VG_(copy_m_state_static_to_baseBlock)();          \
-         VG_(save_thread_state)(thread_id);                \
-         VG_(threads)[thread_id].sh_eax = VGM_WORD_VALID;  \
+#define KERNEL_DO_SYSCALL(thread_id, result_lvalue)               \
+         VG_(load_thread_state)(thread_id);                       \
+         VG_(copy_baseBlock_to_m_state_static)();                 \
+         VG_(do_syscall)();                                       \
+         VG_(copy_m_state_static_to_baseBlock)();                 \
+         VG_(save_thread_state)(thread_id);                       \
+         VG_(threads)[thread_id].sh_eax = VG_(written_shadow_reg);\
          result_lvalue = VG_(threads)[thread_id].m_eax;
 
 
@@ -1726,6 +1140,9 @@
 /* The number of basic blocks in an epoch (one age-step). */
 #define VG_BBS_PER_EPOCH 20000
 
+/* The fast-cache for tt-lookup. */
+extern Addr VG_(tt_fast)[VG_TT_FAST_SIZE];
+
 extern void VG_(get_tt_tc_used) ( UInt* tt_used, UInt* tc_used );
 extern void VG_(maybe_do_lru_pass) ( void );
 extern void VG_(flush_transtab) ( void );
@@ -1742,40 +1159,6 @@
 
 
 /* ---------------------------------------------------------------------
-   Exports of vg_vtagops.c
-   ------------------------------------------------------------------ */
-
-/* Lists the names of value-tag operations used in instrumented
-   code.  These are the third argument to TAG1 and TAG2 uinsns. */
-
-typedef
-   enum { 
-     /* Unary. */
-     VgT_PCast40, VgT_PCast20, VgT_PCast10,
-     VgT_PCast01, VgT_PCast02, VgT_PCast04,
-
-     VgT_PCast14, VgT_PCast12, VgT_PCast11,
-
-     VgT_Left4, VgT_Left2, VgT_Left1,
-
-     VgT_SWiden14, VgT_SWiden24, VgT_SWiden12,
-     VgT_ZWiden14, VgT_ZWiden24, VgT_ZWiden12,
-
-     /* Binary; 1st is rd; 2nd is rd+wr */
-     VgT_UifU4, VgT_UifU2, VgT_UifU1, VgT_UifU0,
-     VgT_DifD4, VgT_DifD2, VgT_DifD1,
-
-     VgT_ImproveAND4_TQ, VgT_ImproveAND2_TQ, VgT_ImproveAND1_TQ, 
-     VgT_ImproveOR4_TQ, VgT_ImproveOR2_TQ, VgT_ImproveOR1_TQ,
-     VgT_DebugFn
-   }
-   VgTagOp;
-
-extern Char* VG_(nameOfTagOp) ( VgTagOp );
-extern UInt VG_(DebugFn) ( UInt a1, UInt a2 );
-
-
-/* ---------------------------------------------------------------------
    Exports of vg_syscall.S
    ------------------------------------------------------------------ */
 
@@ -1844,60 +1227,24 @@
 extern void VG_(helper_DAS);
 extern void VG_(helper_DAA);
 
-extern void VG_(helper_value_check4_fail);
-extern void VG_(helper_value_check2_fail);
-extern void VG_(helper_value_check1_fail);
-extern void VG_(helper_value_check0_fail);
-
 /* NOT A FUNCTION; this is a bogus RETURN ADDRESS. */
 extern void VG_(signalreturn_bogusRA)( void );
 
-
 /* ---------------------------------------------------------------------
-   Exports of vg_cachesim.c
+   Things relating to the used skin
    ------------------------------------------------------------------ */
 
-extern Int VG_(log2) ( Int x );
-
-extern UCodeBlock* VG_(cachesim_instrument) ( UCodeBlock* cb_in, 
-                                              Addr orig_addr );
-
-typedef struct  _iCC  iCC;
-typedef struct _idCC idCC;
-
-extern void VG_(init_cachesim)      ( void );
-extern void VG_(do_cachesim_results)( Int client_argc, Char** client_argv );
-
-extern void VG_(cachesim_log_non_mem_instr)(  iCC* cc );
-extern void VG_(cachesim_log_mem_instr)    ( idCC* cc, Addr data_addr );
-
-extern void VG_(cachesim_notify_discard) ( TTEntry* tte );
+#define VG_TRACK(fn, args...)          \
+   do {                                \
+      if (VG_(track_events).fn)        \
+         VG_(track_events).fn(args);   \
+   } while (0)
 
 
 /* ---------------------------------------------------------------------
    The state of the simulated CPU.
    ------------------------------------------------------------------ */
 
-/* This is the Intel register encoding. */
-#define R_EAX 0
-#define R_ECX 1
-#define R_EDX 2
-#define R_EBX 3
-#define R_ESP 4
-#define R_EBP 5
-#define R_ESI 6
-#define R_EDI 7
-
-#define R_AL (0+R_EAX)
-#define R_CL (0+R_ECX)
-#define R_DL (0+R_EDX)
-#define R_BL (0+R_EBX)
-#define R_AH (4+R_EAX)
-#define R_CH (4+R_ECX)
-#define R_DH (4+R_EDX)
-#define R_BH (4+R_EBX)
-
-
 /* ---------------------------------------------------------------------
    Offsets into baseBlock for everything which needs to referred to
    from generated code.  The order of these decls does not imply 
@@ -1948,7 +1295,6 @@
 extern Int VGOFF_(sh_edi);
 extern Int VGOFF_(sh_eflags);
 
-
 /* -----------------------------------------------------
    Read-only parts of baseBlock.
    -------------------------------------------------- */
@@ -1993,25 +1339,22 @@
 extern Int VGOFF_(helper_DAS);
 extern Int VGOFF_(helper_DAA);
 
-extern Int VGOFF_(helper_value_check4_fail);
-extern Int VGOFF_(helper_value_check2_fail);
-extern Int VGOFF_(helper_value_check1_fail);
-extern Int VGOFF_(helper_value_check0_fail);
-
-extern Int VGOFF_(helperc_STOREV4); /* :: UInt -> Addr -> void */
-extern Int VGOFF_(helperc_STOREV2); /* :: UInt -> Addr -> void */
-extern Int VGOFF_(helperc_STOREV1); /* :: UInt -> Addr -> void */
-
-extern Int VGOFF_(helperc_LOADV4); /* :: Addr -> UInt -> void */
-extern Int VGOFF_(helperc_LOADV2); /* :: Addr -> UInt -> void */
-extern Int VGOFF_(helperc_LOADV1); /* :: Addr -> UInt -> void */
-
 extern Int VGOFF_(handle_esp_assignment); /* :: Addr -> void */
-extern Int VGOFF_(fpu_write_check);       /* :: Addr -> Int -> void */
-extern Int VGOFF_(fpu_read_check);        /* :: Addr -> Int -> void */
 
-extern Int VGOFF_(cachesim_log_non_mem_instr);
-extern Int VGOFF_(cachesim_log_mem_instr);
+/* For storing extension-specific helpers, determined at runtime.  The addr 
+ * and offset arrays together form a (addr, offset) map that allows a 
+ * helper's baseBlock offset to be computed from its address.  It's done 
+ * like this so CCALL_M_Ns and other helper calls can use the function 
+ * address rather than having to much around with offsets. */
+extern UInt VG_(n_compact_helpers);
+extern UInt VG_(n_noncompact_helpers);
+
+extern Addr VG_(compact_helper_addrs)  [];
+extern Int  VG_(compact_helper_offsets)[];
+
+extern Addr VG_(noncompact_helper_addrs)  [];
+extern Int  VG_(noncompact_helper_offsets)[];
+
 
 #endif /* ndef __VG_INCLUDE_H */
 
diff --git a/vg_instrument.c b/vg_instrument.c
new file mode 100644
index 0000000..9a062ee
--- /dev/null
+++ b/vg_instrument.c
@@ -0,0 +1,96 @@
+/*--------------------------------------------------------------------*/
+/*--- Higher-level UCode sequence builders                         ---*/
+/*---                                              vg_instrument.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+// SSS: should this file should eventually not be in core, but included in
+// skins that use it??  Reduces size of core, but increases size of every
+// skin that uses it...
+
+/* We only import vg_skin.h here, because this file only provides functions
+   for doing things that could be done directly by the skin -- it's just to
+   make skins' lives easier, rather than let them do something they
+   couldn't otherwise do. */
+#include "vg_skin.h"
+
+#define uInstr0   VG_(newUInstr0)
+#define uInstr1   VG_(newUInstr1)
+#define uInstr2   VG_(newUInstr2)
+#define uLiteral  VG_(setLiteralField)
+#define uCCall    VG_(setCCallFields)
+#define newTemp   VG_(getNewTemp)
+
+
+void VG_(callHelper_0_0)(UCodeBlock* cb, Addr f)
+{
+   uInstr0(cb, CCALL, 0);
+   uCCall(cb, f, 0, 0, 0);
+}
+
+void VG_(callHelper_1_0)(UCodeBlock* cb, Addr f, UInt arg1, UInt regparms_n)
+{
+   UInt t1 = newTemp(cb);
+
+   vg_assert(regparms_n <= 1);
+   uInstr2(cb, MOV,   4, Literal, 0, TempReg, t1);
+   uLiteral(cb, arg1);
+   uInstr1(cb, CCALL, 0, TempReg, t1);
+   uCCall(cb, f, 1, regparms_n, 0);
+}
+
+void VG_(callHelper_2_0)(UCodeBlock* cb, Addr f, UInt arg1, UInt arg2,
+                         UInt regparms_n)
+{
+   UInt t1 = newTemp(cb);
+   UInt t2 = newTemp(cb);
+
+   vg_assert(regparms_n <= 2);
+   uInstr2(cb, MOV,   4, Literal, 0, TempReg, t1);
+   uLiteral(cb, arg1);
+   uInstr2(cb, MOV,   4, Literal, 0, TempReg, t2);
+   uLiteral(cb, arg2);
+   uInstr2(cb, CCALL, 0, TempReg, t1, TempReg, t2);
+   uCCall(cb, f, 2, regparms_n, 0);
+}
+
+void VG_(set_global_var)(UCodeBlock* cb, Addr globvar_ptr, UInt val)
+{
+   Int t_gv  = newTemp(cb);        
+   Int t_val = newTemp(cb);        
+
+   uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_val);
+   uLiteral(cb, val);
+   uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_gv);
+   uLiteral(cb, globvar_ptr);
+   uInstr2(cb, STORE, 4, TempReg, t_val, TempReg, t_gv);
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                          vg_instrument.c ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/vg_kerneliface.h b/vg_kerneliface.h
index bcc10f5..ede3049 100644
--- a/vg_kerneliface.h
+++ b/vg_kerneliface.h
@@ -27,7 +27,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #ifndef __VG_KERNELIFACE_H
@@ -139,6 +139,40 @@
 #define VKI_MAP_PRIVATE    0x02            /* Changes are private.  */
 #define VKI_MAP_FIXED      0x10            /* Interpret addr exactly */
 
+/* Copied from linux-2.4.19/include/asm-i386/fcntl.h */
+
+#define VKI_O_RDONLY             00
+#define VKI_O_WRONLY             01
+#define VKI_O_RDWR               02
+#define VKI_O_CREAT            0100 /* not fcntl */
+#define VKI_O_EXCL             0200 /* not fcntl */
+#define VKI_O_TRUNC           01000 /* not fcntl */
+#define VKI_O_APPEND          02000
+#define VKI_O_NONBLOCK        04000
+#define VKI_O_SYNC           010000
+#define VKI_FASYNC           020000 /* fcntl, for BSD compatibility */
+#define VKI_O_DIRECT         040000 /* direct disk access hint */
+#define VKI_O_LARGEFILE     0100000
+#define VKI_O_DIRECTORY     0200000 /* must be a directory */
+#define VKI_O_NOFOLLOW      0400000 /* don't follow links */
+
+/* Copied from linux-2.4.19/include/linux/stat.h */
+
+#define VKI_S_IRWXU 00700
+#define VKI_S_IRUSR 00400
+#define VKI_S_IWUSR 00200
+#define VKI_S_IXUSR 00100
+
+#define VKI_S_IRWXG 00070
+#define VKI_S_IRGRP 00040
+#define VKI_S_IWGRP 00020
+#define VKI_S_IXGRP 00010
+
+#define VKI_S_IRWXO 00007
+#define VKI_S_IROTH 00004
+#define VKI_S_IWOTH 00002
+#define VKI_S_IXOTH 00001
+
 
 /* Copied from /usr/src/linux-2.4.9-13/include/asm/errno.h */
 
diff --git a/vg_lackey.c b/vg_lackey.c
new file mode 100644
index 0000000..4592cc6
--- /dev/null
+++ b/vg_lackey.c
@@ -0,0 +1,224 @@
+/*--------------------------------------------------------------------*/
+/*--- Simple skin for counting UInstrs, using a C helper.          ---*/
+/*---                                                  vg_lackey.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2002 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_skin.h"
+
+//#define uInstr0   VG_(newUInstr0)
+//#define uLiteral  VG_(setLiteralField)
+
+/* Nb: use ULongs because the numbers can get very big */
+static ULong n_dlrr_calls   = 0;
+static ULong n_BBs          = 0;
+static ULong n_UInstrs      = 0;
+static ULong n_x86_instrs   = 0;
+static ULong n_Jccs         = 0;
+static ULong n_Jccs_untaken = 0;
+
+static void add_one_dlrr_call(void)
+{
+   n_dlrr_calls++;
+}
+
+/* See comment above SK_(instrument) for reason why n_x86_instrs is
+   incremented here. */
+static void add_one_BB(void)
+{
+   n_BBs++;
+   n_x86_instrs++;
+}
+
+static void add_one_UInstr(void)
+{
+   n_UInstrs++;
+}
+
+static void add_one_x86_instr(void)
+{
+   n_x86_instrs++;
+}
+
+static void add_one_Jcc(void)
+{
+   n_Jccs++;
+}
+
+static void add_one_Jcc_untaken(void)
+{
+   n_Jccs_untaken++;
+}
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* not_used)
+{
+   needs->name        = "lackey";
+   needs->description = "a UInstr counter";
+
+   //VG_(register_compact_helper)((Addr) & add_one_dlrr_call);
+   VG_(register_compact_helper)((Addr) & add_one_BB);
+   VG_(register_compact_helper)((Addr) & add_one_x86_instr);
+   VG_(register_compact_helper)((Addr) & add_one_UInstr);
+   VG_(register_compact_helper)((Addr) & add_one_Jcc);
+   VG_(register_compact_helper)((Addr) & add_one_Jcc_untaken);
+}
+
+void SK_(post_clo_init)(void)
+{
+}
+
+/* Note: x86 instructions are marked by an INCEIP at the end of each one,
+   except for the final one in the basic block which ends in an
+   unconditional JMP.  Sometimes the final unconditional JMP is preceded by
+   a conditional JMP (Jcc), and thus it isn't reached.  Eg:
+
+      <code a>
+      INCEIP ...
+
+      <code b>
+      Jcc ...
+      JMP ...     (will not be reached if Jcc succeeds)
+
+   If we simplemindedly added calls to add_one_x86_instr() before INCEIPs
+   and unconditional JMPs, we'd sometimes miss the final call (when a
+   preceding conditional JMP succeeds), underestimating the x86 instruction
+   count.
+
+      <code a>
+      call add_one_x86_instr()
+      INCEIP ...
+
+      <code b>
+      Jcc ...
+      call add_one_x86_instr()
+      JMP ...
+
+   Instead we add a call before each INCEIP, and also one at the start of the
+   block, but not one at the end, viz:
+
+      call add_one_x86_instr()
+
+      <code a>
+      call add_one_x86_instr()
+      INCEIP ...
+
+      <code b>
+      Jcc ...
+      JMP ...
+
+   Which gives us the right answer.  And just to avoid two C calls, we fold
+   the basic-block-beginning call in with add_one_BB().  Phew.
+*/ 
+UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
+{
+   UCodeBlock* cb;
+   Int         i;
+   UInstr*     u;
+   Char        fnname[100];
+
+   cb = VG_(allocCodeBlock)();
+   cb->nextTemp = cb_in->nextTemp;
+
+   /* Count call to dlrr(), if this BB is dlrr()'s entry point */
+   if (VG_(get_fnname_if_entry)(orig_addr, fnname, 100) &&
+       0 == VG_(strcmp)(fnname, "_dl_runtime_resolve")) 
+   {
+      VG_(callHelper_0_0)(cb, (Addr) & add_one_dlrr_call);
+   }
+
+   /* Count basic block */
+   VG_(callHelper_0_0)(cb, (Addr) & add_one_BB);
+
+   for (i = 0; i < cb_in->used; i++) {
+      u = &cb_in->instrs[i];
+
+      switch (u->opcode) {
+         case NOP: case CALLM_S: case CALLM_E:
+            break;
+   
+         case INCEIP:
+            /* Count x86 instr */
+            VG_(callHelper_0_0)(cb, (Addr) & add_one_x86_instr);
+            VG_(copyUInstr)(cb, u);
+            break;
+
+         case JMP:
+            if (u->cond != CondAlways) {
+               /* Count Jcc */
+               VG_(callHelper_0_0)(cb, (Addr) & add_one_Jcc);
+               VG_(copyUInstr)(cb, u);
+               /* Count non-taken Jcc */
+               VG_(callHelper_0_0)(cb, (Addr) & add_one_Jcc_untaken);
+            } else {
+               VG_(copyUInstr)(cb, u);
+            }
+            break;
+            
+         default:
+            /* Count UInstr */
+            VG_(callHelper_0_0)(cb, (Addr) & add_one_UInstr);
+            VG_(copyUInstr)(cb, u);
+            break;
+      }
+   }
+
+   VG_(freeCodeBlock)(cb_in);
+   return cb;
+}
+
+void SK_(fini)(void)
+{
+    VG_(message)(Vg_UserMsg,
+                 "Counted %d calls to _dl_runtime_resolve()", n_dlrr_calls);
+
+    VG_(message)(Vg_UserMsg, "");
+    VG_(message)(Vg_UserMsg, "Executed:");
+    VG_(message)(Vg_UserMsg, "  BBs:         %u", n_BBs);
+    VG_(message)(Vg_UserMsg, "  x86 instrs:  %u", n_x86_instrs);
+    VG_(message)(Vg_UserMsg, "  UInstrs:     %u", n_UInstrs);
+
+    VG_(message)(Vg_UserMsg, "");
+    VG_(message)(Vg_UserMsg, "Jccs:");
+    VG_(message)(Vg_UserMsg, "  total:       %u", n_Jccs);
+    VG_(message)(Vg_UserMsg, "  %% taken:     %u%%",
+                             (n_Jccs - n_Jccs_untaken)*100 / n_Jccs);
+
+    VG_(message)(Vg_UserMsg, "");
+    VG_(message)(Vg_UserMsg, "Ratios:");
+    VG_(message)(Vg_UserMsg, "  x86 instrs : BB        = %3u : 10",
+                             10 * n_x86_instrs / n_BBs);
+    VG_(message)(Vg_UserMsg, "     UInstrs : BB        = %3u : 10",
+                             10 * n_UInstrs / n_BBs);
+    VG_(message)(Vg_UserMsg, "     UInstrs : x86_instr = %3u : 10",
+                             10 * n_UInstrs / n_x86_instrs);
+
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              vg_lackey.c ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/vg_libpthread.c b/vg_libpthread.c
index 994cdb7..5972dfa 100644
--- a/vg_libpthread.c
+++ b/vg_libpthread.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 /* ALL THIS CODE RUNS ON THE SIMULATED CPU.
@@ -257,6 +257,12 @@
    return 0;
 }
 
+int pthread_attr_getdetachstate(const pthread_attr_t *attr, int *detachstate)
+{
+   *detachstate = attr->__detachstate;
+   return 0;
+}
+
 int pthread_attr_setinheritsched(pthread_attr_t *attr, int inherit)
 {
    static int moans = N_MOANS;
@@ -1044,6 +1050,7 @@
 void __my_pthread_testcancel(void)
 {
    int res;
+   ensure_valgrind("__my_pthread_testcancel");
    VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
                            VG_USERREQ__TESTCANCEL,
                            0, 0, 0, 0);
@@ -1178,7 +1185,7 @@
       if (n_now != n_orig) break;
 
       nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 52 * 1000 * 1000; /* 52 milliseconds */
+      nanosleep_interval.tv_nsec = 12 * 1000 * 1000; /* 12 milliseconds */
       /* It's critical here that valgrind's nanosleep implementation
          is nonblocking. */
       (void)my_do_syscall2(__NR_nanosleep, 
@@ -1381,13 +1388,14 @@
 /* Relies on assumption that initial private data is NULL.  This
    should be fixed somehow. */
 
-/* The allowable keys (indices) (all 2 of them). 
+/* The allowable keys (indices) (all 3 of them). 
    From sysdeps/pthread/bits/libc-tsd.h
 */
-#define N_LIBC_TSD_EXTRA_KEYS 1
+#define N_LIBC_TSD_EXTRA_KEYS 0
 
 enum __libc_tsd_key_t { _LIBC_TSD_KEY_MALLOC = 0,
                         _LIBC_TSD_KEY_DL_ERROR,
+                        _LIBC_TSD_KEY_RPC_VARS,
                         _LIBC_TSD_KEY_N };
 
 /* Auto-initialising subsystem.  libc_specifics_inited is set 
@@ -1877,6 +1885,10 @@
 }
 
 
+pid_t __vfork(void)
+{
+   return __fork();
+}
 
 
 /* ---------------------------------------------------------------------
@@ -1965,7 +1977,7 @@
    Basic idea is: modify the timeout parameter to select so that it
    returns immediately.  Poll like this until select returns non-zero,
    indicating something interesting happened, or until our time is up.
-   Space out the polls with nanosleeps of say 20 milliseconds, which
+   Space out the polls with nanosleeps of say 11 milliseconds, which
    is required to be nonblocking; this allows other threads to run.  
 
    Assumes:
@@ -2083,7 +2095,7 @@
       /* fprintf(stderr, "MY_SELECT: nanosleep\n"); */
       /* nanosleep and go round again */
       nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 50 * 1000 * 1000; /* 50 milliseconds */
+      nanosleep_interval.tv_nsec = 11 * 1000 * 1000; /* 11 milliseconds */
       /* It's critical here that valgrind's nanosleep implementation
          is nonblocking. */
       res = my_do_syscall2(__NR_nanosleep, 
@@ -2193,7 +2205,7 @@
       /* fprintf(stderr, "MY_POLL: nanosleep\n"); */
       /* nanosleep and go round again */
       nanosleep_interval.tv_sec  = 0;
-      nanosleep_interval.tv_nsec = 51 * 1000 * 1000; /* 51 milliseconds */
+      nanosleep_interval.tv_nsec = 13 * 1000 * 1000; /* 13 milliseconds */
       /* It's critical here that valgrind's nanosleep implementation
          is nonblocking. */
       (void)my_do_syscall2(__NR_nanosleep, 
@@ -2810,6 +2822,7 @@
 weak_alias (__pread64, pread64)
 weak_alias (__pwrite64, pwrite64)
 weak_alias(__fork, fork)
+weak_alias(__vfork, vfork)
 
 weak_alias (__pthread_kill_other_threads_np, pthread_kill_other_threads_np)
 
diff --git a/vg_libpthread_unimp.c b/vg_libpthread_unimp.c
index f413887..f3938ec 100644
--- a/vg_libpthread_unimp.c
+++ b/vg_libpthread_unimp.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 /* ---------------------------------------------------------------------
@@ -82,7 +82,7 @@
 //void longjmp ( void )  { unimp("longjmp"); }
 //void pthread_atfork ( void )  { unimp("pthread_atfork"); }
 //void pthread_attr_destroy ( void )  { unimp("pthread_attr_destroy"); }
-void pthread_attr_getdetachstate ( void )  { unimp("pthread_attr_getdetachstate"); }
+//void pthread_attr_getdetachstate ( void )  { unimp("pthread_attr_getdetachstate"); }
 void pthread_attr_getinheritsched ( void )  { unimp("pthread_attr_getinheritsched"); }
 //void pthread_attr_getschedparam ( void )  { unimp("pthread_attr_getschedparam"); }
 //void pthread_attr_getschedpolicy ( void )  { unimp("pthread_attr_getschedpolicy"); }
diff --git a/vg_main.c b/vg_main.c
index 5cce13d..582b652 100644
--- a/vg_main.c
+++ b/vg_main.c
@@ -26,12 +26,10 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
-
 
 /* ---------------------------------------------------------------------
    Compute offsets into baseBlock.  See comments in vg_include.h.
@@ -62,6 +60,7 @@
 Int VGOFF_(sh_esi) = INVALID_OFFSET;
 Int VGOFF_(sh_edi) = INVALID_OFFSET;
 Int VGOFF_(sh_eflags) = INVALID_OFFSET;
+
 Int VGOFF_(helper_idiv_64_32) = INVALID_OFFSET;
 Int VGOFF_(helper_div_64_32) = INVALID_OFFSET;
 Int VGOFF_(helper_idiv_32_16) = INVALID_OFFSET;
@@ -92,25 +91,25 @@
 Int VGOFF_(helper_SAHF) = INVALID_OFFSET;
 Int VGOFF_(helper_DAS) = INVALID_OFFSET;
 Int VGOFF_(helper_DAA) = INVALID_OFFSET;
-Int VGOFF_(helper_value_check4_fail) = INVALID_OFFSET;
-Int VGOFF_(helper_value_check2_fail) = INVALID_OFFSET;
-Int VGOFF_(helper_value_check1_fail) = INVALID_OFFSET;
-Int VGOFF_(helper_value_check0_fail) = INVALID_OFFSET;
-Int VGOFF_(helperc_LOADV4) = INVALID_OFFSET;
-Int VGOFF_(helperc_LOADV2) = INVALID_OFFSET;
-Int VGOFF_(helperc_LOADV1) = INVALID_OFFSET;
-Int VGOFF_(helperc_STOREV4) = INVALID_OFFSET;
-Int VGOFF_(helperc_STOREV2) = INVALID_OFFSET;
-Int VGOFF_(helperc_STOREV1) = INVALID_OFFSET;
 Int VGOFF_(handle_esp_assignment) = INVALID_OFFSET;
-Int VGOFF_(fpu_write_check) = INVALID_OFFSET;
-Int VGOFF_(fpu_read_check) = INVALID_OFFSET;
-Int VGOFF_(cachesim_log_non_mem_instr) = INVALID_OFFSET;
-Int VGOFF_(cachesim_log_mem_instr)     = INVALID_OFFSET;
+
+/* MAX_NONCOMPACT_HELPERS can be increased easily.  If MAX_COMPACT_HELPERS is
+ * increased too much, they won't really be compact any more... */
+#define  MAX_COMPACT_HELPERS     8
+#define  MAX_NONCOMPACT_HELPERS  8 
+
+UInt VG_(n_compact_helpers)    = 0;
+UInt VG_(n_noncompact_helpers) = 0;
+
+Addr VG_(compact_helper_addrs)  [MAX_COMPACT_HELPERS];
+Int  VG_(compact_helper_offsets)[MAX_COMPACT_HELPERS];
+Addr VG_(noncompact_helper_addrs)  [MAX_NONCOMPACT_HELPERS];
+Int  VG_(noncompact_helper_offsets)[MAX_NONCOMPACT_HELPERS];
 
 /* This is the actual defn of baseblock. */
 UInt VG_(baseBlock)[VG_BASEBLOCK_WORDS];
 
+
 /* Words. */
 static Int baB_off = 0;
 
@@ -133,6 +132,41 @@
    return off;
 }
 
+/* Registers a function in compact_helper_addrs;  compact_helper_offsets is
+ * filled in later.
+ */
+void VG_(register_compact_helper)(Addr a)
+{
+   if (MAX_COMPACT_HELPERS <= VG_(n_compact_helpers)) {
+      VG_(printf)("Can only register %d compact helpers\n", 
+                  MAX_COMPACT_HELPERS);
+      VG_(panic)("Too many compact helpers registered");
+   }
+   VG_(compact_helper_addrs)[VG_(n_compact_helpers)] = a;
+   VG_(n_compact_helpers)++;
+}
+
+/* Registers a function in noncompact_helper_addrs;  noncompact_helper_offsets
+ * is filled in later.
+ */
+void VG_(register_noncompact_helper)(Addr a)
+{
+   if (MAX_NONCOMPACT_HELPERS <= VG_(n_noncompact_helpers)) {
+      VG_(printf)("Can only register %d non-compact helpers\n", 
+                  MAX_NONCOMPACT_HELPERS);
+      VG_(printf)("Try increasing MAX_NON_COMPACT_HELPERS\n");
+      VG_(panic)("Too many non-compact helpers registered");
+   }
+   VG_(noncompact_helper_addrs)[VG_(n_noncompact_helpers)] = a;
+   VG_(n_noncompact_helpers)++;
+}
+
+/* Allocate offsets in baseBlock for the skin helpers */
+static void assign_helpers_in_baseBlock(UInt n, Int offsets[], Addr addrs[])
+{
+   Int i;
+   for (i = 0; i < n; i++) offsets[i] = alloc_BaB_1_set( addrs[i] );
+}
 
 /* Here we assign actual offsets.  It's important to get the most
    popular referents within 128 bytes of the start, so we can take
@@ -143,8 +177,6 @@
 
 static void vg_init_baseBlock ( void )
 {
-   baB_off = 0;
-
    /* Those with offsets under 128 are carefully chosen. */
 
    /* WORD offsets in this column */
@@ -158,82 +190,42 @@
    /* 7   */ VGOFF_(m_edi)     = alloc_BaB(1);
    /* 8   */ VGOFF_(m_eflags)  = alloc_BaB(1);
 
-   /* 9   */ VGOFF_(sh_eax)    = alloc_BaB(1);
-   /* 10  */ VGOFF_(sh_ecx)    = alloc_BaB(1);
-   /* 11  */ VGOFF_(sh_edx)    = alloc_BaB(1);
-   /* 12  */ VGOFF_(sh_ebx)    = alloc_BaB(1);
-   /* 13  */ VGOFF_(sh_esp)    = alloc_BaB(1);
-   /* 14  */ VGOFF_(sh_ebp)    = alloc_BaB(1);
-   /* 15  */ VGOFF_(sh_esi)    = alloc_BaB(1);
-   /* 16  */ VGOFF_(sh_edi)    = alloc_BaB(1);
-   /* 17  */ VGOFF_(sh_eflags) = alloc_BaB(1);
+   if (VG_(needs).shadow_regs) {
+      /* 9   */ VGOFF_(sh_eax)    = alloc_BaB(1);
+      /* 10  */ VGOFF_(sh_ecx)    = alloc_BaB(1);
+      /* 11  */ VGOFF_(sh_edx)    = alloc_BaB(1);
+      /* 12  */ VGOFF_(sh_ebx)    = alloc_BaB(1);
+      /* 13  */ VGOFF_(sh_esp)    = alloc_BaB(1);
+      /* 14  */ VGOFF_(sh_ebp)    = alloc_BaB(1);
+      /* 15  */ VGOFF_(sh_esi)    = alloc_BaB(1);
+      /* 16  */ VGOFF_(sh_edi)    = alloc_BaB(1);
+      /* 17  */ VGOFF_(sh_eflags) = alloc_BaB(1);
+   }
 
-   /* 17a */ 
-   VGOFF_(cachesim_log_non_mem_instr)  
-      = alloc_BaB_1_set( (Addr) & VG_(cachesim_log_non_mem_instr) );
-   /* 17b */ 
-   VGOFF_(cachesim_log_mem_instr)  
-      = alloc_BaB_1_set( (Addr) & VG_(cachesim_log_mem_instr) );
+   /* 9,10,11 or 18,19,20... depends on number whether shadow regs are used
+    * and on compact helpers registered */ 
 
-   /* 18  */ 
-   VGOFF_(helper_value_check4_fail) 
-      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check4_fail) );
-   /* 19 */
-   VGOFF_(helper_value_check0_fail)
-      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check0_fail) );
+   /* (9 or 18) + n_compact_helpers  */
+   /* Register VG_(handle_esp_assignment) if needed. */
+   if (VG_(track_events).new_mem_stack_aligned || 
+       VG_(track_events).die_mem_stack_aligned) 
+      VG_(register_compact_helper)( (Addr) & VG_(handle_esp_assignment) );
 
-   /* 20  */
-   VGOFF_(helperc_STOREV4)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV4) );
-   /* 21  */
-   VGOFF_(helperc_STOREV1)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV1) );
+   /* Allocate slots for compact helpers */
+   assign_helpers_in_baseBlock(VG_(n_compact_helpers), 
+                               VG_(compact_helper_offsets), 
+                               VG_(compact_helper_addrs));
 
-   /* 22  */
-   VGOFF_(helperc_LOADV4)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV4) );
-   /* 23  */
-   VGOFF_(helperc_LOADV1)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV1) );
-
-   /* 24  */
-   VGOFF_(handle_esp_assignment)
-      = alloc_BaB_1_set( (Addr) & VGM_(handle_esp_assignment) );
-
-   /* 25 */
+   /* (9/10 or 18/19) + n_compact_helpers */
    VGOFF_(m_eip) = alloc_BaB(1);
 
    /* There are currently 24 spill slots */
-   /* 26 .. 49  This overlaps the magic boundary at >= 32 words, but
-      most spills are to low numbered spill slots, so the ones above
-      the boundary don't see much action. */
+   /* (11+/20+ .. 32+/43+) + n_compact_helpers.  This can overlap the magic
+    * boundary at >= 32 words, but most spills are to low numbered spill
+    * slots, so the ones above the boundary don't see much action. */
    VGOFF_(spillslots) = alloc_BaB(VG_MAX_SPILLSLOTS);
 
-   /* These two pushed beyond the boundary because 2-byte transactions
-      are rare. */
-   /* 50  */
-   VGOFF_(helperc_STOREV2)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV2) );
-   /* 51  */
-   VGOFF_(helperc_LOADV2)
-      = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV2) );
-
-   /* 52  */
-   VGOFF_(fpu_write_check)
-      = alloc_BaB_1_set( (Addr) & VGM_(fpu_write_check) );
-   /* 53  */
-   VGOFF_(fpu_read_check)
-      = alloc_BaB_1_set( (Addr) & VGM_(fpu_read_check) );
-
-   /* Actually I don't think these two are ever used. */
-   /* 54  */ 
-   VGOFF_(helper_value_check2_fail)
-      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check2_fail) );
-   /* 55  */ 
-   VGOFF_(helper_value_check1_fail)
-      = alloc_BaB_1_set( (Addr) & VG_(helper_value_check1_fail) );
-
-   /* I gave up counting at this point.  Since they're way above the
+   /* I gave up counting at this point.  Since they're above the
       short-amode-boundary, there's no point. */
 
    VGOFF_(m_fpustate) = alloc_BaB(VG_SIZE_OF_FPUSTATE_W);
@@ -303,6 +295,31 @@
       = alloc_BaB_1_set( (Addr) & VG_(helper_DAS) );
    VGOFF_(helper_DAA)
       = alloc_BaB_1_set( (Addr) & VG_(helper_DAA) );
+
+   /* Allocate slots for compact helpers */
+   assign_helpers_in_baseBlock(VG_(n_noncompact_helpers), 
+                               VG_(noncompact_helper_offsets), 
+                               VG_(noncompact_helper_addrs));
+}
+
+static void vg_init_shadow_regs ( void )
+{
+   if (VG_(needs).shadow_regs) {
+      UInt eflags;
+   
+      SK_(written_shadow_regs_values) ( & VG_(written_shadow_reg), & eflags );
+      VG_(baseBlock)[VGOFF_(sh_esp)]    = 
+      VG_(baseBlock)[VGOFF_(sh_ebp)]    =
+      VG_(baseBlock)[VGOFF_(sh_eax)]    =
+      VG_(baseBlock)[VGOFF_(sh_ecx)]    =
+      VG_(baseBlock)[VGOFF_(sh_edx)]    =
+      VG_(baseBlock)[VGOFF_(sh_ebx)]    =
+      VG_(baseBlock)[VGOFF_(sh_esi)]    =
+      VG_(baseBlock)[VGOFF_(sh_edi)]    = VG_(written_shadow_reg);
+      VG_(baseBlock)[VGOFF_(sh_eflags)] = eflags;
+
+   } else
+      VG_(written_shadow_reg) = VG_UNUSED_SHADOW_REG_VALUE;
 }
 
 
@@ -330,15 +347,17 @@
 /* 64-bit counter for the number of bbs to go before a debug exit. */
 ULong VG_(bbs_to_go);
 
-/* Produce debugging output? */
-Bool VG_(disassemble) = False;
-
 /* The current LRU epoch. */
 UInt VG_(current_epoch) = 0;
 
 /* This is the ThreadId of the last thread the scheduler ran. */
 ThreadId VG_(last_run_tid) = 0;
 
+/* This is the argument to __NR_exit() supplied by the first thread to
+   call that syscall.  We eventually pass that to __NR_exit() for
+   real. */
+UInt VG_(exitcode) = 0;
+
 
 /* ---------------------------------------------------------------------
    Counters, for informational purposes only.
@@ -396,46 +415,111 @@
 
 
 /* ---------------------------------------------------------------------
+   Skin data structure initialisation
+   ------------------------------------------------------------------ */
+
+/* Init with default values. */
+VgNeeds VG_(needs) = {
+   .name                    = NULL,
+   .description             = NULL,
+
+   .core_errors             = False,
+   .skin_errors             = False,
+   .run_libc_freeres        = False,
+
+   .sizeof_shadow_block     = 0,
+
+   .basic_block_discards    = False,
+   .shadow_regs             = False,
+   .command_line_options    = False,
+   .client_requests         = False,
+   .extended_UCode          = False,
+   .syscall_wrapper         = False,
+   .alternative_free        = False,
+   .sanity_checks           = False,
+};
+
+VgTrackEvents VG_(track_events) = {
+   /* Memory events */
+   .new_mem_startup       = NULL,
+   .new_mem_heap          = NULL,
+   .new_mem_stack         = NULL,
+   .new_mem_stack_aligned = NULL,
+   .new_mem_stack_signal  = NULL,
+   .new_mem_brk           = NULL,
+   .new_mem_mmap          = NULL,
+
+   .copy_mem_heap         = NULL,
+   .change_mem_mprotect   = NULL,
+
+   .ban_mem_heap          = NULL,
+   .ban_mem_stack         = NULL,
+
+   .die_mem_heap          = NULL,
+   .die_mem_stack         = NULL,
+   .die_mem_stack_aligned = NULL,
+   .die_mem_stack_signal  = NULL,
+   .die_mem_brk           = NULL,
+   .die_mem_munmap        = NULL,
+
+   .bad_free              = NULL,
+   .mismatched_free       = NULL,
+
+   .pre_mem_read          = NULL,
+   .pre_mem_read_asciiz   = NULL,
+   .pre_mem_write         = NULL,
+   .post_mem_write        = NULL,
+
+   /* Mutex events */
+   .post_mutex_lock       = NULL,
+   .post_mutex_unlock     = NULL,
+};
+
+static void sanity_check_needs ( void )
+{
+#define CHECK_NOT(var, value)                                     \
+   if ((var)==(value)) {                                          \
+      VG_(printf)("\n`%s' not initialised\n", VG__STRING(var));   \
+      VG_(skin_error)("Uninitialised needs field\n");             \
+   }
+   
+   CHECK_NOT(VG_(needs).name,        NULL);
+   CHECK_NOT(VG_(needs).description, NULL);
+
+#undef CHECK_NOT
+#undef INVALID_Bool
+}
+
+/* ---------------------------------------------------------------------
    Values derived from command-line options.
    ------------------------------------------------------------------ */
 
-Bool   VG_(clo_error_limit);
-Bool   VG_(clo_check_addrVs);
-Bool   VG_(clo_GDB_attach);
-Int    VG_(sanity_level);
-Int    VG_(clo_verbosity);
-Bool   VG_(clo_demangle);
-Bool   VG_(clo_leak_check);
-Bool   VG_(clo_show_reachable);
-Int    VG_(clo_leak_resolution);
-Bool   VG_(clo_sloppy_malloc);
-Int    VG_(clo_alignment);
-Bool   VG_(clo_partial_loads_ok);
-Bool   VG_(clo_trace_children);
-Int    VG_(clo_logfile_fd);
-Int    VG_(clo_freelist_vol);
-Bool   VG_(clo_workaround_gcc296_bugs);
-Int    VG_(clo_n_suppressions);
+/* Define, and set defaults. */
+Bool   VG_(clo_error_limit)    = True;
+Bool   VG_(clo_GDB_attach)     = False;
+Int    VG_(sanity_level)       = 1;
+Int    VG_(clo_verbosity)      = 1;
+Bool   VG_(clo_demangle)       = True;
+Bool   VG_(clo_sloppy_malloc)  = False;
+Int    VG_(clo_alignment)      = 4;
+Bool   VG_(clo_trace_children) = False;
+Int    VG_(clo_logfile_fd)     = 2;
+Int    VG_(clo_n_suppressions) = 0;
 Char*  VG_(clo_suppressions)[VG_CLO_MAX_SFILES];
-Bool   VG_(clo_single_step);
-Bool   VG_(clo_optimise);
-Bool   VG_(clo_instrument);
-Bool   VG_(clo_cleanup);
-Bool   VG_(clo_cachesim);
-cache_t VG_(clo_I1_cache);
-cache_t VG_(clo_D1_cache);
-cache_t VG_(clo_L2_cache);
-Int    VG_(clo_smc_check);
-Bool   VG_(clo_trace_syscalls);
-Bool   VG_(clo_trace_signals);
-Bool   VG_(clo_trace_symtab);
-Bool   VG_(clo_trace_malloc);
-Bool   VG_(clo_trace_sched);
-Int    VG_(clo_trace_pthread_level);
-ULong  VG_(clo_stop_after);
-Int    VG_(clo_dump_error);
-Int    VG_(clo_backtrace_size);
-Char*  VG_(clo_weird_hacks);
+Bool   VG_(clo_profile)        = False;
+Bool   VG_(clo_single_step)    = False;
+Bool   VG_(clo_optimise)       = True;
+UChar  VG_(clo_trace_codegen)  = 0; // 00000000b
+Bool   VG_(clo_trace_syscalls) = False;
+Bool   VG_(clo_trace_signals)  = False;
+Bool   VG_(clo_trace_symtab)   = False;
+Bool   VG_(clo_trace_malloc)   = False;
+Bool   VG_(clo_trace_sched)    = False;
+Int    VG_(clo_trace_pthread_level) = 0;
+ULong  VG_(clo_stop_after)     = 1000000000000LL;
+Int    VG_(clo_dump_error)     = 0;
+Int    VG_(clo_backtrace_size) = 4;
+Char*  VG_(clo_weird_hacks)    = NULL;
 
 /* This Bool is needed by wrappers in vg_clientmalloc.c to decide how
    to behave.  Initially we say False. */
@@ -454,12 +538,11 @@
    don't have to modify the original. */
 static Char vg_cmdline_copy[M_VG_CMDLINE_STRLEN];
 
-
 /* ---------------------------------------------------------------------
    Processing of command-line options.
    ------------------------------------------------------------------ */
 
-static void bad_option ( Char* opt )
+void VG_(bad_option) ( Char* opt )
 {
    VG_(shutdown_logging)();
    VG_(clo_logfile_fd) = 2; /* stderr */
@@ -487,91 +570,85 @@
    config_error("couldn't find client's argc/argc/envp");
 }   
 
-static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len )
+static void usage ( void )
 {
-   int   i1, i2, i3;
-   int   i;
-   char *opt = VG_(strdup)(VG_AR_PRIVATE, orig_opt);
+   Char* usage1 = 
+"usage: valgrind [options] prog-and-args\n"
+"\n"
+"  core user options, with defaults in [ ], are:\n"
+"    --help                    show this message\n"
+"    --version                 show version\n"
+"    --skin=<name>             main task (skin to use) [Valgrind]\n"
+"    -q --quiet                run silently; only print error msgs\n"
+"    -v --verbose              be more verbose, incl counts of errors\n"
+"    --gdb-attach=no|yes       start GDB when errors detected? [no]\n"
+"    --demangle=no|yes         automatically demangle C++ names? [yes]\n"
+"    --num-callers=<number>    show <num> callers in stack traces [4]\n"
+"    --error-limit=no|yes      stop showing new errors if too many? [yes]\n"
+"    --sloppy-malloc=no|yes    round malloc sizes to next word? [no]\n"
+"    --alignment=<number>      set minimum alignment of allocations [4]\n"
+"    --trace-children=no|yes   Valgrind-ise child processes? [no]\n"
+"    --logfile-fd=<number>     file descriptor for messages [2=stderr]\n"
+"    --suppressions=<filename> suppress errors described in\n"
+"                              suppressions file <filename>\n"
+"    --weird-hacks=hack1,hack2,...  [no hacks selected]\n"
+"         recognised hacks are: ioctl-VTIME truncate-writes\n"
+"\n"
+"  %s skin user options:\n";
 
-   i = i1 = opt_len;
 
-   /* Option looks like "--I1=65536,2,64".
-    * Find commas, replace with NULs to make three independent 
-    * strings, then extract numbers.  Yuck. */
-   while (VG_(isdigit)(opt[i])) i++;
-   if (',' == opt[i]) {
-      opt[i++] = '\0';
-      i2 = i;
-   } else goto bad;
-   while (VG_(isdigit)(opt[i])) i++;
-   if (',' == opt[i]) {
-      opt[i++] = '\0';
-      i3 = i;
-   } else goto bad;
-   while (VG_(isdigit)(opt[i])) i++;
-   if ('\0' != opt[i]) goto bad;
+   Char* usage2 = 
+"\n"
+"  core options for debugging Valgrind itself are:\n"
+"    --sanity-level=<number>   level of sanity checking to do [1]\n"
+"    --single-step=no|yes      translate each instr separately? [no]\n"
+"    --optimise=no|yes         improve intermediate code? [yes]\n"
+"    --profile=no|yes          profile? (skin must be built for it) [no]\n"
+"    --trace-codegen=<XXXXX>   show generated code? (X = 0|1) [00000]\n"
+"    --trace-syscalls=no|yes   show all system calls? [no]\n"
+"    --trace-signals=no|yes    show signal handling details? [no]\n"
+"    --trace-symtab=no|yes     show symbol table details? [no]\n"
+"    --trace-malloc=no|yes     show client malloc details? [no]\n"
+"    --trace-sched=no|yes      show thread scheduler details? [no]\n"
+"    --trace-pthread=none|some|all  show pthread event details? [no]\n"
+"    --stop-after=<number>     switch to real CPU after executing\n"
+"                              <number> basic blocks [infinity]\n"
+"    --dump-error=<number>     show translation for basic block\n"
+"                              associated with <number>'th\n"
+"                              error context [0=don't show any]\n"
+"\n"
+"  Extra options are read from env variable $VALGRIND_OPTS\n"
+"\n"
+"  Valgrind is Copyright (C) 2000-2002 Julian Seward\n"
+"  and licensed under the GNU General Public License, version 2.\n"
+"  Bug reports, feedback, admiration, abuse, etc, to: %s.\n"
+"\n";
 
-   cache->size      = (Int)VG_(atoll)(opt + i1);
-   cache->assoc     = (Int)VG_(atoll)(opt + i2);
-   cache->line_size = (Int)VG_(atoll)(opt + i3);
+   VG_(printf)(usage1, VG_(needs).name);
+   /* Don't print skin string directly for security, ha! */
+   if (VG_(needs).command_line_options)
+      VG_(printf)("%s", SK_(usage)());
+   else
+      VG_(printf)("    (none)\n");
+   VG_(printf)(usage2, VG_EMAIL_ADDR);
 
-   VG_(free)(VG_AR_PRIVATE, opt);
-   return;
-
-  bad:    
-   bad_option(orig_opt);
+   VG_(shutdown_logging)();
+   VG_(clo_logfile_fd) = 2; /* stderr */
+   VG_(exit)(1);
 }
 
 static void process_cmd_line_options ( void )
 {
-   UChar* argv[M_VG_CMDLINE_OPTS];
-   UInt   argc;
-   UChar* p;
-   UChar* str;
-   Int    i, eventually_logfile_fd, ctr;
+   Char* argv[M_VG_CMDLINE_OPTS];
+   UInt  argc;
+   Char* p;
+   Char* str;
+   Int   i, eventually_logfile_fd, ctr;
 
 #  define ISSPACE(cc)      ((cc) == ' ' || (cc) == '\t' || (cc) == '\n')
 #  define STREQ(s1,s2)     (0==VG_(strcmp_ws)((s1),(s2)))
 #  define STREQN(nn,s1,s2) (0==VG_(strncmp_ws)((s1),(s2),(nn)))
 
-   /* Set defaults. */
-   VG_(clo_error_limit)      = True;
-   VG_(clo_check_addrVs)     = True;
-   VG_(clo_GDB_attach)       = False;
-   VG_(sanity_level)         = 1;
-   VG_(clo_verbosity)        = 1;
-   VG_(clo_demangle)         = True;
-   VG_(clo_leak_check)       = False;
-   VG_(clo_show_reachable)   = False;
-   VG_(clo_leak_resolution)  = 2;
-   VG_(clo_sloppy_malloc)    = False;
-   VG_(clo_alignment)        = 4;
-   VG_(clo_partial_loads_ok) = True;
-   VG_(clo_trace_children)   = False;
-   VG_(clo_logfile_fd)       = 2; /* stderr */
-   VG_(clo_freelist_vol)     = 1000000;
-   VG_(clo_workaround_gcc296_bugs) = False;
-   VG_(clo_n_suppressions)   = 0;
-   VG_(clo_single_step)      = False;
-   VG_(clo_optimise)         = True;
-   VG_(clo_instrument)       = True;
-   VG_(clo_cachesim)         = False;
-   VG_(clo_I1_cache)         = UNDEFINED_CACHE;
-   VG_(clo_D1_cache)         = UNDEFINED_CACHE;
-   VG_(clo_L2_cache)         = UNDEFINED_CACHE;
-   VG_(clo_cleanup)          = True;
-   VG_(clo_smc_check)        = /* VG_CLO_SMC_SOME */ VG_CLO_SMC_NONE;
-   VG_(clo_trace_syscalls)   = False;
-   VG_(clo_trace_signals)    = False;
-   VG_(clo_trace_symtab)     = False;
-   VG_(clo_trace_malloc)     = False;
-   VG_(clo_trace_sched)      = False;
-   VG_(clo_trace_pthread_level) = 0;
-   VG_(clo_stop_after)       = 1000000000000LL;
-   VG_(clo_dump_error)       = 0;
-   VG_(clo_backtrace_size)   = 4;
-   VG_(clo_weird_hacks)      = NULL;
-
    eventually_logfile_fd = VG_(clo_logfile_fd);
 
    /* Once logging is started, we can safely send messages pertaining
@@ -603,7 +680,10 @@
        if (VG_STACK_MATCHES_BASE( VG_(esp_at_startup), 
                                   VG_STARTUP_STACK_BASE_3 )) {
           sp = (UInt*)VG_STARTUP_STACK_BASE_3;
- 
+       } else 
+       if (VG_STACK_MATCHES_BASE( VG_(esp_at_startup), 
+                                  VG_STARTUP_STACK_BASE_4 )) {
+          sp = (UInt*)VG_STARTUP_STACK_BASE_4;
        } else {
           args_grok_error(
              "startup %esp is not near any VG_STARTUP_STACK_BASE_*\n   "
@@ -723,7 +803,7 @@
 
    for (i = 0; i < argc; i++) {
 
-      if (STREQ(argv[i], "-v") || STREQ(argv[i], "--verbose"))
+      if      (STREQ(argv[i], "-v") || STREQ(argv[i], "--verbose"))
          VG_(clo_verbosity)++;
       else if (STREQ(argv[i], "-q") || STREQ(argv[i], "--quiet"))
          VG_(clo_verbosity)--;
@@ -733,11 +813,6 @@
       else if (STREQ(argv[i], "--error-limit=no"))
          VG_(clo_error_limit) = False;
 
-      else if (STREQ(argv[i], "--check-addrVs=yes"))
-         VG_(clo_check_addrVs) = True;
-      else if (STREQ(argv[i], "--check-addrVs=no"))
-         VG_(clo_check_addrVs) = False;
-
       else if (STREQ(argv[i], "--gdb-attach=yes"))
          VG_(clo_GDB_attach) = True;
       else if (STREQ(argv[i], "--gdb-attach=no"))
@@ -748,28 +823,6 @@
       else if (STREQ(argv[i], "--demangle=no"))
          VG_(clo_demangle) = False;
 
-      else if (STREQ(argv[i], "--partial-loads-ok=yes"))
-         VG_(clo_partial_loads_ok) = True;
-      else if (STREQ(argv[i], "--partial-loads-ok=no"))
-         VG_(clo_partial_loads_ok) = False;
-
-      else if (STREQ(argv[i], "--leak-check=yes"))
-         VG_(clo_leak_check) = True;
-      else if (STREQ(argv[i], "--leak-check=no"))
-         VG_(clo_leak_check) = False;
-
-      else if (STREQ(argv[i], "--show-reachable=yes"))
-         VG_(clo_show_reachable) = True;
-      else if (STREQ(argv[i], "--show-reachable=no"))
-         VG_(clo_show_reachable) = False;
-
-      else if (STREQ(argv[i], "--leak-resolution=low"))
-         VG_(clo_leak_resolution) = 2;
-      else if (STREQ(argv[i], "--leak-resolution=med"))
-         VG_(clo_leak_resolution) = 4;
-      else if (STREQ(argv[i], "--leak-resolution=high"))
-         VG_(clo_leak_resolution) = VG_DEEPEST_BACKTRACE;
-
       else if (STREQ(argv[i], "--sloppy-malloc=yes"))
          VG_(clo_sloppy_malloc) = True;
       else if (STREQ(argv[i], "--sloppy-malloc=no"))
@@ -783,32 +836,27 @@
       else if (STREQ(argv[i], "--trace-children=no"))
          VG_(clo_trace_children) = False;
 
-      else if (STREQ(argv[i], "--workaround-gcc296-bugs=yes"))
-         VG_(clo_workaround_gcc296_bugs) = True;
-      else if (STREQ(argv[i], "--workaround-gcc296-bugs=no"))
-         VG_(clo_workaround_gcc296_bugs) = False;
-
       else if (STREQN(15, argv[i], "--sanity-level="))
          VG_(sanity_level) = (Int)VG_(atoll)(&argv[i][15]);
 
       else if (STREQN(13, argv[i], "--logfile-fd="))
          eventually_logfile_fd = (Int)VG_(atoll)(&argv[i][13]);
 
-      else if (STREQN(15, argv[i], "--freelist-vol=")) {
-         VG_(clo_freelist_vol) = (Int)VG_(atoll)(&argv[i][15]);
-         if (VG_(clo_freelist_vol) < 0) VG_(clo_freelist_vol) = 2;
-      }
-
       else if (STREQN(15, argv[i], "--suppressions=")) {
          if (VG_(clo_n_suppressions) >= VG_CLO_MAX_SFILES) {
-            VG_(message)(Vg_UserMsg, "Too many logfiles specified.");
+            VG_(message)(Vg_UserMsg, "Too many suppression files specified.");
             VG_(message)(Vg_UserMsg, 
                          "Increase VG_CLO_MAX_SFILES and recompile.");
-            bad_option(argv[i]);
+            VG_(bad_option)(argv[i]);
          }
          VG_(clo_suppressions)[VG_(clo_n_suppressions)] = &argv[i][15];
          VG_(clo_n_suppressions)++;
       }
+      else if (STREQ(argv[i], "--profile=yes"))
+         VG_(clo_profile) = True;
+      else if (STREQ(argv[i], "--profile=no"))
+         VG_(clo_profile) = False;
+
       else if (STREQ(argv[i], "--single-step=yes"))
          VG_(clo_single_step) = True;
       else if (STREQ(argv[i], "--single-step=no"))
@@ -819,35 +867,26 @@
       else if (STREQ(argv[i], "--optimise=no"))
          VG_(clo_optimise) = False;
 
-      else if (STREQ(argv[i], "--instrument=yes"))
-         VG_(clo_instrument) = True;
-      else if (STREQ(argv[i], "--instrument=no"))
-         VG_(clo_instrument) = False;
-
-      else if (STREQ(argv[i], "--cleanup=yes"))
-         VG_(clo_cleanup) = True;
-      else if (STREQ(argv[i], "--cleanup=no"))
-         VG_(clo_cleanup) = False;
-
-      else if (STREQ(argv[i], "--cachesim=yes"))
-         VG_(clo_cachesim) = True;     
-      else if (STREQ(argv[i], "--cachesim=no"))
-         VG_(clo_cachesim) = False;
-
-      /* 5 is length of "--I1=" */
-      else if (0 == VG_(strncmp)(argv[i], "--I1=",    5))
-         parse_cache_opt(&VG_(clo_I1_cache), argv[i], 5);
-      else if (0 == VG_(strncmp)(argv[i], "--D1=",    5))
-         parse_cache_opt(&VG_(clo_D1_cache), argv[i], 5);
-      else if (0 == VG_(strncmp)(argv[i], "--L2=",    5))
-         parse_cache_opt(&VG_(clo_L2_cache), argv[i], 5);
-
-      else if (STREQ(argv[i], "--smc-check=none"))
-         VG_(clo_smc_check) = VG_CLO_SMC_NONE;
-      else if (STREQ(argv[i], "--smc-check=some"))
-         VG_(clo_smc_check) = VG_CLO_SMC_SOME;
-      else if (STREQ(argv[i], "--smc-check=all"))
-         VG_(clo_smc_check) = VG_CLO_SMC_ALL;
+      /* "vwxyz" --> 000zyxwv (binary) */
+      else if (STREQN(16, argv[i], "--trace-codegen=")) {
+         Int j;
+         char* opt = & argv[i][16];
+   
+         if (5 != VG_(strlen)(opt)) {
+            VG_(message)(Vg_UserMsg, 
+                         "--trace-codegen argument must have 5 digits");
+            VG_(bad_option)(argv[i]);
+         }
+         for (j = 0; j < 5; j++) {
+            if      ('0' == opt[j]) { /* do nothing */ }
+            else if ('1' == opt[j]) VG_(clo_trace_codegen) |= (1 << j);
+            else {
+               VG_(message)(Vg_UserMsg, "--trace-codegen argument can only "
+                                        "contain 0s and 1s");
+               VG_(bad_option)(argv[i]);
+            }
+         }
+      }
 
       else if (STREQ(argv[i], "--trace-syscalls=yes"))
          VG_(clo_trace_syscalls) = True;
@@ -899,8 +938,13 @@
             VG_(clo_backtrace_size) = VG_DEEPEST_BACKTRACE;
       }
 
+      else if (VG_(needs).command_line_options) {
+         Bool ok = SK_(process_cmd_line_option)(argv[i]);
+         if (!ok)
+            usage();
+      }
       else
-         bad_option(argv[i]);
+         usage();
    }
 
 #  undef ISSPACE
@@ -917,7 +961,7 @@
       VG_(message)(Vg_UserMsg, 
          "Invalid --alignment= setting.  "
          "Should be a power of 2, >= 4, <= 4096.");
-      bad_option("--alignment");
+      VG_(bad_option)("--alignment");
    }
 
    if (VG_(clo_GDB_attach) && VG_(clo_trace_children)) {
@@ -926,26 +970,14 @@
          "--gdb-attach=yes conflicts with --trace-children=yes");
       VG_(message)(Vg_UserMsg, 
          "Please choose one or the other, but not both.");
-      bad_option("--gdb-attach=yes and --trace-children=yes");
+      VG_(bad_option)("--gdb-attach=yes and --trace-children=yes");
    }
 
    VG_(clo_logfile_fd) = eventually_logfile_fd;
 
-   /* Don't do memory checking if simulating the cache. */
-   if (VG_(clo_cachesim)) {
-       VG_(clo_instrument) = False;
-   }
-
    if (VG_(clo_verbosity > 0)) {
-      if (VG_(clo_cachesim)) {
-         VG_(message)(Vg_UserMsg, 
-            "cachegrind-%s, an I1/D1/L2 cache profiler for x86 GNU/Linux.",
-            VERSION);
-      } else {
-         VG_(message)(Vg_UserMsg, 
-            "valgrind-%s, a memory error detector for x86 GNU/Linux.",
-            VERSION);
-      }
+      VG_(message)(Vg_UserMsg, "%s-%s, %s for x86 GNU/Linux.",
+         VG_(needs).name, VERSION, VG_(needs).description);
    }
 
    if (VG_(clo_verbosity > 0))
@@ -958,12 +990,12 @@
       }
    }
 
-   if (VG_(clo_n_suppressions) == 0 && !VG_(clo_cachesim)) {
+   if (VG_(clo_n_suppressions) == 0 && 
+       (VG_(needs).core_errors || VG_(needs).skin_errors)) {
       config_error("No error-suppression files were specified.");
    }
 }
 
-
 /* ---------------------------------------------------------------------
    Copying to/from m_state_static.
    ------------------------------------------------------------------ */
@@ -1015,11 +1047,40 @@
          = VG_(m_state_static)[40/4 + i];
 }
 
+Addr VG_(get_stack_pointer) ( void )
+{
+   return VG_(baseBlock)[VGOFF_(m_esp)];
+}
+
+/* Some random tests needed for leak checking */
+
+Bool VG_(within_stack)(Addr a)
+{
+   if (a >= ((Addr)(&VG_(stack)))
+       && a <= ((Addr)(&VG_(stack))) + sizeof(VG_(stack)))
+      return True;
+   else
+      return False;
+}
+
+Bool VG_(within_m_state_static)(Addr a)
+{
+   if (a >= ((Addr)(&VG_(m_state_static)))
+       && a <= ((Addr)(&VG_(m_state_static))) + sizeof(VG_(m_state_static)))
+      return True;
+   else
+      return False;
+}
 
 /* ---------------------------------------------------------------------
    Show accumulated counts.
    ------------------------------------------------------------------ */
 
+static __inline__ Int safe_idiv(Int a, Int b)
+{
+   return (b == 0 ? 0 : a / b);
+}
+
 static void vg_show_counts ( void )
 {
    VG_(message)(Vg_DebugMsg,
@@ -1027,13 +1088,17 @@
 		VG_(current_epoch),
                 VG_(number_of_lrus) );
    VG_(message)(Vg_DebugMsg,
-                "translate: new %d (%d -> %d), discard %d (%d -> %d).",
+                "translate: new     %d (%d -> %d; ratio %d:10)",
                 VG_(overall_in_count),
                 VG_(overall_in_osize),
                 VG_(overall_in_tsize),
+                safe_idiv(10*VG_(overall_in_tsize), VG_(overall_in_osize)));
+   VG_(message)(Vg_DebugMsg,
+                "           discard %d (%d -> %d; ratio %d:10).",
                 VG_(overall_out_count),
                 VG_(overall_out_osize),
-                VG_(overall_out_tsize) );
+                VG_(overall_out_tsize),
+                safe_idiv(10*VG_(overall_out_tsize), VG_(overall_out_osize)));
    VG_(message)(Vg_DebugMsg,
       " dispatch: %lu basic blocks, %d/%d sched events, %d tt_fast misses.", 
       VG_(bbs_done), VG_(num_scheduling_events_MAJOR), 
@@ -1050,6 +1115,7 @@
                 "   sanity: %d cheap, %d expensive checks.",
                 VG_(sanity_fast_count), 
                 VG_(sanity_slow_count) );
+   VG_(print_ccall_stats)();
 }
 
 
@@ -1072,21 +1138,32 @@
       VG_(stack)[10000-1-i] = (UInt)(&VG_(stack)[10000-i-1]) ^ 0xABCD4321;
    }
 
-   /* Set up baseBlock offsets and copy the saved machine's state into
-      it. */
+   /* Setup stuff that depends on the skin.  Must be before:
+      - vg_init_baseBlock(): to register helpers
+      - process_cmd_line_options(): to register skin name and description,
+        and turn on/off 'command_line_options' need
+      - init_memory() (to setup memory event trackers).
+    */
+   SK_(pre_clo_init) ( & VG_(needs), & VG_(track_events) );
+   sanity_check_needs();
+
+   /* Set up baseBlock offsets and copy the saved machine's state into it. */
    vg_init_baseBlock();
    VG_(copy_m_state_static_to_baseBlock)();
+   vg_init_shadow_regs();
 
    /* Process Valgrind's command-line opts (from env var VG_OPTS). */
    process_cmd_line_options();
 
    /* Hook to delay things long enough so we can get the pid and
       attach GDB in another shell. */
-   if (0) { 
+#if 0
+   { 
       Int p, q;
       for (p = 0; p < 50000; p++)
          for (q = 0; q < 50000; q++) ;
    }
+#endif
 
    /* Initialise the scheduler, and copy the client's state from
       baseBlock into VG_(threads)[1].  This has to come before signal
@@ -1098,31 +1175,34 @@
    VG_(sigstartup_actions)();
 
    /* Perhaps we're profiling Valgrind? */
-#  ifdef VG_PROFILE
-   VGP_(init_profiling)();
-#  endif
+   if (VG_(clo_profile))
+      VGP_(init_profiling)();
 
    /* Start calibration of our RDTSC-based clock. */
    VG_(start_rdtsc_calibration)();
 
-   if (VG_(clo_instrument) || VG_(clo_cachesim)) {
-      VGP_PUSHCC(VgpInitAudit);
-      VGM_(init_memory_audit)();
-      VGP_POPCC;
-   }
+   /* Do this here just to give rdtsc calibration more time */
+   SK_(post_clo_init)();
 
-   VGP_PUSHCC(VgpReadSyms);
-   VG_(read_symbols)();
-   VGP_POPCC;
+   /* Must come after SK_(init) so memory handler accompaniments (eg.
+    * shadow memory) can be setup ok */
+   VGP_PUSHCC(VgpInitMem);
+   VG_(init_memory)();
+   VGP_POPCC(VgpInitMem);
+
+   /* Read the list of errors to suppress.  This should be found in
+      the file specified by vg_clo_suppressions. */
+   if (VG_(needs).core_errors || VG_(needs).skin_errors)
+      VG_(load_suppressions)();
 
    /* End calibration of our RDTSC-based clock, leaving it as long as
       we can. */
    VG_(end_rdtsc_calibration)();
 
-   /* This should come after init_memory_audit; otherwise the latter
-      carefully sets up the permissions maps to cover the anonymous
-      mmaps for the translation table and translation cache, which
-      wastes > 20M of virtual address space. */
+   /* This should come after init_memory_and_symbols(); otherwise the 
+      latter carefully sets up the permissions maps to cover the 
+      anonymous mmaps for the translation table and translation cache, 
+      which wastes > 20M of virtual address space. */
    VG_(init_tt_tc)();
 
    if (VG_(clo_verbosity) == 1) {
@@ -1132,26 +1212,18 @@
 
    /* Now it is safe for malloc et al in vg_clientmalloc.c to act
       instrumented-ly. */
-   VG_(running_on_simd_CPU) = True;
-   if (VG_(clo_instrument)) {
-      VGM_(make_readable) ( (Addr)&VG_(running_on_simd_CPU), 1 );
-      VGM_(make_readable) ( (Addr)&VG_(clo_instrument), 1 );
-      VGM_(make_readable) ( (Addr)&VG_(clo_trace_malloc), 1 );
-      VGM_(make_readable) ( (Addr)&VG_(clo_sloppy_malloc), 1 );
-   }
-
-   if (VG_(clo_cachesim)) 
-      VG_(init_cachesim)();
-
    if (VG_(clo_verbosity) > 0)
       VG_(message)(Vg_UserMsg, "");
 
    VG_(bbs_to_go) = VG_(clo_stop_after);
 
+
    /* Run! */
+   VG_(running_on_simd_CPU) = True;
    VGP_PUSHCC(VgpSched);
    src = VG_(scheduler)();
-   VGP_POPCC;
+   VGP_POPCC(VgpSched);
+   VG_(running_on_simd_CPU) = False;
 
    if (VG_(clo_verbosity) > 0)
       VG_(message)(Vg_UserMsg, "");
@@ -1161,25 +1233,19 @@
         "Warning: pthread scheduler exited due to deadlock");
    }
 
-   if (VG_(clo_instrument)) {
+   if (VG_(needs).core_errors || VG_(needs).skin_errors)
       VG_(show_all_errors)();
-      VG_(clientmalloc_done)();
-      if (VG_(clo_verbosity) == 1) {
-         VG_(message)(Vg_UserMsg, 
-                      "For counts of detected errors, rerun with: -v");
-      }
-      if (VG_(clo_leak_check)) VG_(detect_memory_leaks)();
-   }
-   VG_(running_on_simd_CPU) = False;
 
-   if (VG_(clo_cachesim))
-      VG_(do_cachesim_results)(VG_(client_argc), VG_(client_argv));
+   SK_(fini)();
 
    VG_(do_sanity_checks)( True /*include expensive checks*/ );
 
    if (VG_(clo_verbosity) > 1)
       vg_show_counts();
 
+   if (VG_(clo_verbosity) > 2)
+      VG_(print_UInstr_histogram)();
+
    if (0) {
       VG_(message)(Vg_DebugMsg, "");
       VG_(message)(Vg_DebugMsg, 
@@ -1189,16 +1255,10 @@
       VG_(message)(Vg_DebugMsg, 
          "------ Valgrind's ExeContext management stats follow ------" );
       VG_(show_ExeContext_stats)();
-      VG_(message)(Vg_DebugMsg, 
-         "------ Valgrind's client block stats follow ---------------" );
-      VG_(show_client_block_stats)();
    }
  
-#  ifdef VG_PROFILE
-   VGP_(done_profiling)();
-#  endif
-
-   VG_(done_prof_mem)();
+   if (VG_(clo_profile))
+      VGP_(done_profiling)();
 
    VG_(shutdown_logging)();
 
@@ -1220,9 +1280,10 @@
                    && VG_(last_run_tid) < VG_N_THREADS);
          tst = & VG_(threads)[VG_(last_run_tid)];
          vg_assert(tst->status == VgTs_Runnable);
-         /* The thread's %EBX will hold the arg to exit(), so we just
-            do exit with that arg. */
-         VG_(exit)( tst->m_ebx );
+         /* The thread's %EBX at the time it did __NR_exit() will hold
+            the arg to __NR_exit(), so we just do __NR_exit() with
+            that arg. */
+         VG_(exit)( VG_(exitcode) );
          /* NOT ALIVE HERE! */
          VG_(panic)("entered the afterlife in vg_main() -- ExitSyscall");
          break; /* what the hell :) */
@@ -1267,6 +1328,10 @@
    tracing into child processes.  To make this work the build system
    also supplies a dummy file, "valgrinq.so". 
 
+   Also replace "vgskin_<foo>.so" with whitespace, for the same reason;
+   without it, child processes try to find valgrind.so symbols in the 
+   skin .so.
+
    Also look for $(libdir)/lib/valgrind in LD_LIBRARY_PATH and change
    it to $(libdir)/lib/valgrinq, so as to make our libpthread.so
    disappear.  
@@ -1274,20 +1339,22 @@
 void VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH) ( Char* ld_preload_str,
                                                 Char* ld_library_path_str )
 {
-   Char* p_prel = NULL;
-   Char* p_path = NULL;
-   Int   what = 0;
+   Char* p_prel  = NULL;
+   Char* sk_prel = NULL;
+   Char* p_path  = NULL;
+   Int   what    = 0;
    if (ld_preload_str == NULL || ld_library_path_str == NULL)
       goto mutancy;
 
    /* VG_(printf)("%s %s\n", ld_preload_str, ld_library_path_str); */
 
    p_prel = VG_(strstr)(ld_preload_str, "valgrind.so");
+   sk_prel = VG_(strstr)(ld_preload_str, "vgskin_");
    p_path = VG_(strstr)(ld_library_path_str, VG_LIBDIR);
 
+   what = 1;
    if (p_prel == NULL) {
       /* perhaps already happened? */
-      what = 1;
       if (VG_(strstr)(ld_preload_str, "valgrinq.so") == NULL)
          goto mutancy;
       if (VG_(strstr)(ld_library_path_str, "lib/valgrinq") == NULL)
@@ -1296,10 +1363,30 @@
    }
 
    what = 2;
+   if (sk_prel == NULL) goto mutancy;
+
+   what = 3;
    if (p_path == NULL) goto mutancy;
 
+   what = 4;
+   {  
+      /* Blank from "vgskin_" back to prev. LD_PRELOAD entry, or start */
+      Char* p = sk_prel;
+      while (*p != ':' && p > ld_preload_str) { 
+         *p = ' ';
+         p--;
+      }
+      /* Blank from "vgskin_" to next LD_PRELOAD entry */
+      while (*p != ':' && *p != '\0') { 
+         *p = ' ';
+         p++;
+      }
+      if (*p == '\0') goto mutancy;    /* valgrind.so has disappeared?! */
+      *p = ' ';                        /* blank ending ':' */
+   }
+
    /* in LD_PRELOAD, turn valgrind.so into valgrinq.so. */
-   what = 3;
+   what = 5;
    if (p_prel[7] != 'd') goto mutancy;
    p_prel[7] = 'q';
 
@@ -1307,10 +1394,10 @@
       .../lib/valgrind .../lib/valgrinq, which doesn't exist,
       so that our own libpthread.so goes out of scope. */
    p_path += VG_(strlen)(VG_LIBDIR);
-   what = 4;
+   what = 6;
    if (p_path[0] != '/') goto mutancy;
    p_path++; /* step over / */
-   what = 5;
+   what = 7;
    if (p_path[7] != 'd') goto mutancy;
    p_path[7] = 'q';
    return;
@@ -1406,6 +1493,70 @@
 }
 
 
+/* ---------------------------------------------------------------------
+   Sanity check machinery (permanently engaged).
+   ------------------------------------------------------------------ */
+
+/* A fast sanity check -- suitable for calling circa once per
+   millisecond. */
+
+void VG_(do_sanity_checks) ( Bool force_expensive )
+{
+   Int          i;
+
+   if (VG_(sanity_level) < 1) return;
+
+   /* --- First do all the tests that we can do quickly. ---*/
+
+   VG_(sanity_fast_count)++;
+
+   /* Check that we haven't overrun our private stack. */
+   for (i = 0; i < 10; i++) {
+      vg_assert(VG_(stack)[i]
+                == ((UInt)(&VG_(stack)[i]) ^ 0xA4B3C2D1));
+      vg_assert(VG_(stack)[10000-1-i] 
+                == ((UInt)(&VG_(stack)[10000-i-1]) ^ 0xABCD4321));
+   }
+
+   /* Check stuff pertaining to the memory check system. */
+
+   /* Check that nobody has spuriously claimed that the first or
+      last 16 pages of memory have become accessible [...] */
+   if (VG_(needs).sanity_checks)
+      vg_assert(SK_(cheap_sanity_check)());
+
+   /* --- Now some more expensive checks. ---*/
+
+   /* Once every 25 times, check some more expensive stuff. */
+   if ( force_expensive
+     || VG_(sanity_level) > 1
+     || (VG_(sanity_level) == 1 && (VG_(sanity_fast_count) % 25) == 0)) {
+
+      VG_(sanity_slow_count)++;
+
+#     if 0
+      { void zzzmemscan(void); zzzmemscan(); }
+#     endif
+
+      if ((VG_(sanity_fast_count) % 250) == 0)
+         VG_(sanity_check_tc_tt)();
+
+      if (VG_(needs).sanity_checks) {
+          vg_assert(SK_(expensive_sanity_check)());
+      }
+      /* 
+      if ((VG_(sanity_fast_count) % 500) == 0) VG_(mallocSanityCheckAll)(); 
+      */
+   }
+
+   if (VG_(sanity_level) > 1) {
+      /* Check sanity of the low-level memory manager.  Note that bugs
+         in the client's code can cause this to fail, so we don't do
+         this check unless specially asked for.  And because it's
+         potentially very expensive. */
+      VG_(mallocSanityCheckAll)();
+   }
+}
 /*--------------------------------------------------------------------*/
 /*--- end                                                vg_main.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/vg_malloc2.c b/vg_malloc2.c
index 87f580d..92358c1 100644
--- a/vg_malloc2.c
+++ b/vg_malloc2.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 
@@ -178,13 +178,14 @@
 /* It is important that this library is self-initialising, because it
    may get called very early on -- as a result of C++ static
    constructor initialisations -- before Valgrind itself is
-   initialised.  Hence vg_malloc() and vg_free() below always call
-   ensure_mm_init() to ensure things are correctly initialised.  */
+   initialised.  Hence VG_(arena_malloc)() and VG_(arena_free)() below always
+   call ensure_mm_init() to ensure things are correctly initialised.  */
 
 static
 void ensure_mm_init ( void )
 {
    static Bool init_done = False;
+
    if (init_done) return;
 
    /* Use a checked red zone size of 1 word for our internal stuff,
@@ -194,22 +195,28 @@
       which merely checks at the time of freeing that the red zone
       words are unchanged. */
 
-   arena_init ( &vg_arena[VG_AR_PRIVATE], "private ", 
+   arena_init ( &vg_arena[VG_AR_CORE],      "core    ", 
                 1, True, 262144 );
 
-   arena_init ( &vg_arena[VG_AR_SYMTAB],  "symtab  ", 
+   arena_init ( &vg_arena[VG_AR_SKIN],      "skin    ", 
                 1, True, 262144 );
 
-   arena_init ( &vg_arena[VG_AR_CLIENT],  "client  ",  
+   arena_init ( &vg_arena[VG_AR_SYMTAB],    "symtab  ", 
+                1, True, 262144 );
+
+   arena_init ( &vg_arena[VG_AR_JITTER],    "JITter  ", 
+                1, True, 8192 );
+
+   arena_init ( &vg_arena[VG_AR_CLIENT],    "client  ",  
                 VG_AR_CLIENT_REDZONE_SZW, False, 262144 );
 
-   arena_init ( &vg_arena[VG_AR_DEMANGLE], "demangle",  
+   arena_init ( &vg_arena[VG_AR_DEMANGLE],  "demangle",  
                 4 /*paranoid*/, True, 16384 );
 
-   arena_init ( &vg_arena[VG_AR_EXECTXT],  "exectxt ",  
+   arena_init ( &vg_arena[VG_AR_EXECTXT],   "exectxt ",  
                 1, True, 16384 );
 
-   arena_init ( &vg_arena[VG_AR_ERRCTXT],  "errctxt ",  
+   arena_init ( &vg_arena[VG_AR_ERRORS],    "errors  ",  
                 1, True, 16384 );
 
    arena_init ( &vg_arena[VG_AR_TRANSIENT], "transien",  
@@ -692,7 +699,7 @@
 
 
 /* Sanity check both the superblocks and the chains. */
-void VG_(mallocSanityCheckArena) ( ArenaId aid )
+static void mallocSanityCheckArena ( ArenaId aid )
 {
    Int         i, superblockctr, b_bszW, b_pszW, blockctr_sb, blockctr_li;
    Int         blockctr_sb_free, listno, list_min_pszW, list_max_pszW;
@@ -703,7 +710,7 @@
    UInt        arena_bytes_on_loan;
    Arena*      a;
 
-#  define BOMB VG_(panic)("vg_mallocSanityCheckArena")
+#  define BOMB VG_(panic)("mallocSanityCheckArena")
 
    a = arenaId_to_ArenaP(aid);
    
@@ -722,15 +729,15 @@
          b     = &sb->payload_words[i];
          b_bszW = get_bszW_lo(b);
          if (!blockSane(a, b)) {
-            VG_(printf)( "mallocSanityCheck: sb %p, block %d (bszW %d): "
-                         "BAD\n",
+            VG_(printf)("mallocSanityCheckArena: sb %p, block %d (bszW %d): "
+                        " BAD\n",
                          sb, i, b_bszW );
             BOMB;
          }
          thisFree = !is_inuse_bszW(b_bszW);
          if (thisFree && lastWasFree) {
-            VG_(printf)( "mallocSanityCheck: sb %p, block %d (bszW %d): "
-                         "UNMERGED FREES\n",
+            VG_(printf)("mallocSanityCheckArena: sb %p, block %d (bszW %d): "
+                        "UNMERGED FREES\n",
                          sb, i, b_bszW );
             BOMB;
          }
@@ -741,7 +748,7 @@
          i += mk_plain_bszW(b_bszW);
       }
       if (i > sb->n_payload_words) {
-         VG_(printf)( "mallocSanityCheck: sb %p: last block "
+         VG_(printf)( "mallocSanityCheckArena: sb %p: last block "
                       "overshoots end\n", sb);
          BOMB;
       }
@@ -750,7 +757,7 @@
 
    if (arena_bytes_on_loan != a->bytes_on_loan) {
             VG_(printf)( 
-                    "mallocSanityCheck: a->bytes_on_loan %d, "
+                    "mallocSanityCheckArena: a->bytes_on_loan %d, "
                     "arena_bytes_on_loan %d: "
                     "MISMATCH\n", a->bytes_on_loan, arena_bytes_on_loan);
       ppSuperblocks(a);
@@ -770,7 +777,7 @@
          b_prev = b;
          b = get_next_p(b);
          if (get_prev_p(b) != b_prev) {
-            VG_(printf)( "mallocSanityCheck: list %d at %p: "
+            VG_(printf)( "mallocSanityCheckArena: list %d at %p: "
                          "BAD LINKAGE\n", 
                          listno, b );
             BOMB;
@@ -778,7 +785,7 @@
          b_pszW = bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(b)));
          if (b_pszW < list_min_pszW || b_pszW > list_max_pszW) {
             VG_(printf)( 
-               "mallocSanityCheck: list %d at %p: "
+               "mallocSanityCheckArena: list %d at %p: "
                "WRONG CHAIN SIZE %d (%d, %d)\n", 
                listno, b, b_pszW, list_min_pszW, list_max_pszW );
             BOMB;
@@ -790,7 +797,7 @@
 
    if (blockctr_sb_free != blockctr_li) {
       VG_(printf)( 
-         "mallocSanityCheck: BLOCK COUNT MISMATCH "
+         "mallocSanityCheckArena: BLOCK COUNT MISMATCH "
          "(via sbs %d, via lists %d)\n",
          blockctr_sb_free, blockctr_li );
       ppSuperblocks(a);
@@ -813,7 +820,7 @@
 {
    Int i;
    for (i = 0; i < VG_N_ARENAS; i++)
-      VG_(mallocSanityCheckArena) ( i );
+      mallocSanityCheckArena ( i );
 }
 
 
@@ -828,6 +835,7 @@
    Superblock* sb;
    WordF*      b;
    Int         b_bszW;
+
    ensure_mm_init();
    a = arenaId_to_ArenaP(aid);
    for (sb = a->sblocks; sb != NULL; sb = sb->next) {
@@ -845,10 +853,10 @@
 
 
 /*------------------------------------------------------------*/
-/*--- Externally-visible functions.                        ---*/
+/*--- Core-visible functions.                              ---*/
 /*------------------------------------------------------------*/
 
-void* VG_(malloc) ( ArenaId aid, Int req_pszB )
+void* VG_(arena_malloc) ( ArenaId aid, Int req_pszB )
 {
    Int         req_pszW, req_bszW, frag_bszW, b_bszW, lno;
    Superblock* new_sb;
@@ -943,15 +951,15 @@
       a->bytes_on_loan_max = a->bytes_on_loan;
 
 #  ifdef DEBUG_MALLOC
-   VG_(mallocSanityCheckArena)(aid);
+   mallocSanityCheckArena(aid);
 #  endif
 
-   VGP_POPCC;
+   VGP_POPCC(VgpMalloc);
    return first_to_payload(a, b);
 }
 
  
-void VG_(free) ( ArenaId aid, void* ptr )
+void VG_(arena_free) ( ArenaId aid, void* ptr )
 {
    Superblock* sb;
    UInt*       sb_payl_firstw;
@@ -966,8 +974,11 @@
    ensure_mm_init();
    a = arenaId_to_ArenaP(aid);
 
-   if (ptr == NULL) return;
-
+   if (ptr == NULL) {
+      VGP_POPCC(VgpMalloc);
+      return;
+   }
+      
    ch = payload_to_first(a, ptr);
 
 #  ifdef DEBUG_MALLOC
@@ -1026,10 +1037,10 @@
    }
 
 #  ifdef DEBUG_MALLOC
-   VG_(mallocSanityCheckArena)(aid);
+   mallocSanityCheckArena(aid);
 #  endif
 
-   VGP_POPCC;
+   VGP_POPCC(VgpMalloc);
 }
 
 
@@ -1065,13 +1076,15 @@
    .    .               .   .   .               .   .
 
 */
-void* VG_(malloc_aligned) ( ArenaId aid, Int req_alignB, Int req_pszB )
+void* VG_(arena_malloc_aligned) ( ArenaId aid, Int req_alignB, Int req_pszB )
 {
    Int    req_alignW, req_pszW, base_pszW_req, base_pszW_act, frag_bszW;
    Word   *base_b, *base_p, *align_p;
    UInt   saved_bytes_on_loan;
    Arena* a;
 
+   VGP_PUSHCC(VgpMalloc);
+
    ensure_mm_init();
    a = arenaId_to_ArenaP(aid);
 
@@ -1091,7 +1104,7 @@
          break;
       default:
          VG_(printf)("vg_malloc_aligned(%p, %d, %d)\nbad alignment request", 
-                     a, req_pszB, req_alignB );
+                     a, req_alignB, req_pszB );
          VG_(panic)("vg_malloc_aligned");
          /*NOTREACHED*/
    }
@@ -1112,7 +1125,7 @@
    /* Payload ptr for the block we are going to split.  Note this
       changes a->bytes_on_loan; we save and restore it ourselves. */
    saved_bytes_on_loan = a->bytes_on_loan;
-   base_p = VG_(malloc) ( aid, base_pszW_req * VKI_BYTES_PER_WORD );
+   base_p = VG_(arena_malloc) ( aid, base_pszW_req * VKI_BYTES_PER_WORD );
    a->bytes_on_loan = saved_bytes_on_loan;
 
    /* Block ptr for the block we are going to split. */
@@ -1163,9 +1176,11 @@
       a->bytes_on_loan_max = a->bytes_on_loan;
 
 #  ifdef DEBUG_MALLOC
-   VG_(mallocSanityCheckArena)(aid);
+   mallocSanityCheckArena(aid);
 #  endif
 
+   VGP_POPCC(VgpMalloc);
+
    return align_p;
 }
 
@@ -1174,25 +1189,34 @@
 /*--- Services layered on top of malloc/free.              ---*/
 /*------------------------------------------------------------*/
 
-void* VG_(calloc) ( ArenaId aid, Int nmemb, Int nbytes )
+void* VG_(arena_calloc) ( ArenaId aid, Int nmemb, Int nbytes )
 {
    Int    i, size;
    UChar* p;
+
+   VGP_PUSHCC(VgpMalloc);
+
    size = nmemb * nbytes;
    vg_assert(size >= 0);
-   p = VG_(malloc) ( aid, size );
+   p = VG_(arena_malloc) ( aid, size );
    for (i = 0; i < size; i++) p[i] = 0;
+
+   VGP_POPCC(VgpMalloc);
+   
    return p;
 }
 
 
-void* VG_(realloc) ( ArenaId aid, void* ptr, Int req_pszB )
+void* VG_(arena_realloc) ( ArenaId aid, void* ptr, 
+                          Int req_alignB, Int req_pszB )
 {
    Arena* a;
    Int    old_bszW, old_pszW, old_pszB, i;
    UChar  *p_old, *p_new;
    UInt*  ch;
 
+   VGP_PUSHCC(VgpMalloc);
+
    ensure_mm_init();
    a = arenaId_to_ArenaP(aid);
 
@@ -1208,19 +1232,60 @@
    old_pszW = bszW_to_pszW(a, old_bszW);
    old_pszB = old_pszW * VKI_BYTES_PER_WORD;
 
-   if (req_pszB <= old_pszB) return ptr;
+   if (req_pszB <= old_pszB) {
+      VGP_POPCC(VgpMalloc);
+      return ptr;
+   }
 
-   p_new = VG_(malloc) ( aid, req_pszB );
+   if (req_alignB == 4)
+      p_new = VG_(arena_malloc) ( aid, req_pszB );
+   else
+      p_new = VG_(arena_malloc_aligned) ( aid, req_alignB, req_pszB );
+
    p_old = (UChar*)ptr;
    for (i = 0; i < old_pszB; i++)
       p_new[i] = p_old[i];
 
-   VG_(free)(aid, p_old);
+   VG_(arena_free)(aid, p_old);
+
+   VGP_POPCC(VgpMalloc);
    return p_new;
 }
 
 
 /*------------------------------------------------------------*/
+/*--- Skin-visible functions.                              ---*/
+/*------------------------------------------------------------*/
+
+/* All just wrappers to avoid exposing arenas to skins */
+
+void* VG_(malloc) ( Int nbytes )
+{
+   return VG_(arena_malloc) ( VG_AR_SKIN, nbytes );
+}
+
+void  VG_(free) ( void* ptr )
+{
+   VG_(arena_free) ( VG_AR_SKIN, ptr );
+}
+
+void* VG_(calloc) ( Int nmemb, Int nbytes )
+{
+   return VG_(arena_calloc) ( VG_AR_SKIN, nmemb, nbytes );
+}
+
+void* VG_(realloc) ( void* ptr, Int size )
+{
+   return VG_(arena_realloc) ( VG_AR_SKIN, ptr, /*alignment*/4, size );
+}
+
+void* VG_(malloc_aligned) ( Int req_alignB, Int req_pszB )
+{
+   return VG_(arena_malloc_aligned) ( VG_AR_SKIN, req_alignB, req_pszB );
+}
+
+
+/*------------------------------------------------------------*/
 /*--- The original test driver machinery.                  ---*/
 /*------------------------------------------------------------*/
 
@@ -1243,7 +1308,7 @@
 {
    Int i, j, k, nbytes, qq;
    unsigned char* chp;
-   Arena* a = &arena[VG_AR_PRIVATE];
+   Arena* a = &arena[VG_AR_CORE];
    srandom(1);
    for (i = 0; i < N_TEST_ARR; i++)
       test_arr[i] = NULL;
diff --git a/vg_memcheck.c b/vg_memcheck.c
new file mode 100644
index 0000000..4ee380f
--- /dev/null
+++ b/vg_memcheck.c
@@ -0,0 +1,2428 @@
+/*--------------------------------------------------------------------*/
+/*--- Part of the MemCheck skin: Maintain bitmaps of memory,       ---*/
+/*--- tracking the accessibility (A) and validity (V) status of    ---*/
+/*--- each byte.                                                   ---*/
+/*---                                                vg_memcheck.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_memcheck_include.h"
+#include "vg_memcheck.h"   /* for client requests */
+//#include "vg_profile.c"
+
+/* Define to debug the mem audit system. */
+/* #define VG_DEBUG_MEMORY */
+
+/* Define to debug the memory-leak-detector. */
+/* #define VG_DEBUG_LEAKCHECK */
+
+/* Define to collect detailed performance info. */
+/* #define VG_PROFILE_MEMORY */
+
+#define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
+
+/*------------------------------------------------------------*/
+/*--- Command line options                                 ---*/
+/*------------------------------------------------------------*/
+
+Bool  SK_(clo_partial_loads_ok)       = True;
+Int   SK_(clo_freelist_vol)           = 1000000;
+Bool  SK_(clo_leak_check)             = False;
+VgRes SK_(clo_leak_resolution)        = Vg_LowRes;
+Bool  SK_(clo_show_reachable)         = False;
+Bool  SK_(clo_workaround_gcc296_bugs) = False;
+Bool  SK_(clo_check_addrVs)           = True;
+Bool  SK_(clo_cleanup)                = True;
+
+/*------------------------------------------------------------*/
+/*--- Profiling events                                     ---*/
+/*------------------------------------------------------------*/
+
+typedef 
+   enum { 
+      VgpCheckMem = VgpFini+1,
+      VgpSetMem
+   } 
+   VgpSkinCC;
+
+/*------------------------------------------------------------*/
+/*--- Low-level support for memory checking.               ---*/
+/*------------------------------------------------------------*/
+
+/* All reads and writes are checked against a memory map, which
+   records the state of all memory in the process.  The memory map is
+   organised like this:
+
+   The top 16 bits of an address are used to index into a top-level
+   map table, containing 65536 entries.  Each entry is a pointer to a
+   second-level map, which records the accesibililty and validity
+   permissions for the 65536 bytes indexed by the lower 16 bits of the
+   address.  Each byte is represented by nine bits, one indicating
+   accessibility, the other eight validity.  So each second-level map
+   contains 73728 bytes.  This two-level arrangement conveniently
+   divides the 4G address space into 64k lumps, each size 64k bytes.
+
+   All entries in the primary (top-level) map must point to a valid
+   secondary (second-level) map.  Since most of the 4G of address
+   space will not be in use -- ie, not mapped at all -- there is a
+   distinguished secondary map, which indicates `not addressible and
+   not valid' writeable for all bytes.  Entries in the primary map for
+   which the entire 64k is not in use at all point at this
+   distinguished map.
+
+   [...] lots of stuff deleted due to out of date-ness
+
+   As a final optimisation, the alignment and address checks for
+   4-byte loads and stores are combined in a neat way.  The primary
+   map is extended to have 262144 entries (2^18), rather than 2^16.
+   The top 3/4 of these entries are permanently set to the
+   distinguished secondary map.  For a 4-byte load/store, the
+   top-level map is indexed not with (addr >> 16) but instead f(addr),
+   where
+
+    f( XXXX XXXX XXXX XXXX ____ ____ ____ __YZ )
+        = ____ ____ ____ __YZ XXXX XXXX XXXX XXXX  or 
+        = ____ ____ ____ __ZY XXXX XXXX XXXX XXXX
+
+   ie the lowest two bits are placed above the 16 high address bits.
+   If either of these two bits are nonzero, the address is misaligned;
+   this will select a secondary map from the upper 3/4 of the primary
+   map.  Because this is always the distinguished secondary map, a
+   (bogus) address check failure will result.  The failure handling
+   code can then figure out whether this is a genuine addr check
+   failure or whether it is a possibly-legitimate access at a
+   misaligned address.  
+*/
+
+
+/*------------------------------------------------------------*/
+/*--- Crude profiling machinery.                           ---*/
+/*------------------------------------------------------------*/
+
+#ifdef VG_PROFILE_MEMORY
+
+#define N_PROF_EVENTS 150
+
+static UInt event_ctr[N_PROF_EVENTS];
+
+static void init_prof_mem ( void )
+{
+   Int i;
+   for (i = 0; i < N_PROF_EVENTS; i++)
+      event_ctr[i] = 0;
+}
+
+static void done_prof_mem ( void )
+{
+   Int i;
+   for (i = 0; i < N_PROF_EVENTS; i++) {
+      if ((i % 10) == 0) 
+         VG_(printf)("\n");
+      if (event_ctr[i] > 0)
+         VG_(printf)( "prof mem event %2d: %d\n", i, event_ctr[i] );
+   }
+   VG_(printf)("\n");
+}
+
+#define PROF_EVENT(ev)                                  \
+   do { vg_assert((ev) >= 0 && (ev) < N_PROF_EVENTS);   \
+        event_ctr[ev]++;                                \
+   } while (False);
+
+#else
+
+static void init_prof_mem ( void ) { }
+static void done_prof_mem ( void ) { }
+
+#define PROF_EVENT(ev) /* */
+
+#endif
+
+/* Event index.  If just the name of the fn is given, this means the
+   number of calls to the fn.  Otherwise it is the specified event.
+
+   10   alloc_secondary_map
+
+   20   get_abit
+   21   get_vbyte
+   22   set_abit
+   23   set_vbyte
+   24   get_abits4_ALIGNED
+   25   get_vbytes4_ALIGNED
+
+   30   set_address_range_perms
+   31   set_address_range_perms(lower byte loop)
+   32   set_address_range_perms(quadword loop)
+   33   set_address_range_perms(upper byte loop)
+   
+   35   make_noaccess
+   36   make_writable
+   37   make_readable
+
+   40   copy_address_range_state
+   41   copy_address_range_state(byte loop)
+   42   check_writable
+   43   check_writable(byte loop)
+   44   check_readable
+   45   check_readable(byte loop)
+   46   check_readable_asciiz
+   47   check_readable_asciiz(byte loop)
+
+   50   make_aligned_word_NOACCESS
+   51   make_aligned_word_WRITABLE
+
+   60   helperc_LOADV4
+   61   helperc_STOREV4
+   62   helperc_LOADV2
+   63   helperc_STOREV2
+   64   helperc_LOADV1
+   65   helperc_STOREV1
+
+   70   rim_rd_V4_SLOWLY
+   71   rim_wr_V4_SLOWLY
+   72   rim_rd_V2_SLOWLY
+   73   rim_wr_V2_SLOWLY
+   74   rim_rd_V1_SLOWLY
+   75   rim_wr_V1_SLOWLY
+
+   80   fpu_read
+   81   fpu_read aligned 4
+   82   fpu_read aligned 8
+   83   fpu_read 2
+   84   fpu_read 10
+
+   85   fpu_write
+   86   fpu_write aligned 4
+   87   fpu_write aligned 8
+   88   fpu_write 2
+   89   fpu_write 10
+
+   90   fpu_read_check_SLOWLY
+   91   fpu_read_check_SLOWLY(byte loop)
+   92   fpu_write_check_SLOWLY
+   93   fpu_write_check_SLOWLY(byte loop)
+
+   100  is_plausible_stack_addr
+   101  handle_esp_assignment
+   102  handle_esp_assignment(-4)
+   103  handle_esp_assignment(+4)
+   104  handle_esp_assignment(-12)
+   105  handle_esp_assignment(-8)
+   106  handle_esp_assignment(+16)
+   107  handle_esp_assignment(+12)
+   108  handle_esp_assignment(0)
+   109  handle_esp_assignment(+8)
+   110  handle_esp_assignment(-16)
+   111  handle_esp_assignment(+20)
+   112  handle_esp_assignment(-20)
+   113  handle_esp_assignment(+24)
+   114  handle_esp_assignment(-24)
+
+   120  vg_handle_esp_assignment_SLOWLY
+   121  vg_handle_esp_assignment_SLOWLY(normal; move down)
+   122  vg_handle_esp_assignment_SLOWLY(normal; move up)
+   123  vg_handle_esp_assignment_SLOWLY(normal)
+   124  vg_handle_esp_assignment_SLOWLY(>= HUGE_DELTA)
+*/
+
+/*------------------------------------------------------------*/
+/*--- Function declarations.                               ---*/
+/*------------------------------------------------------------*/
+
+static UInt vgmext_rd_V4_SLOWLY ( Addr a );
+static UInt vgmext_rd_V2_SLOWLY ( Addr a );
+static UInt vgmext_rd_V1_SLOWLY ( Addr a );
+static void vgmext_wr_V4_SLOWLY ( Addr a, UInt vbytes );
+static void vgmext_wr_V2_SLOWLY ( Addr a, UInt vbytes );
+static void vgmext_wr_V1_SLOWLY ( Addr a, UInt vbytes );
+static void fpu_read_check_SLOWLY ( Addr addr, Int size );
+static void fpu_write_check_SLOWLY ( Addr addr, Int size );
+
+/*------------------------------------------------------------*/
+/*--- Data defns.                                          ---*/
+/*------------------------------------------------------------*/
+
+typedef 
+   struct {
+      UChar abits[8192];
+      UChar vbyte[65536];
+   }
+   SecMap;
+
+static SecMap* primary_map[ /*65536*/ 262144 ];
+static SecMap  distinguished_secondary_map;
+
+#define IS_DISTINGUISHED_SM(smap) \
+   ((smap) == &distinguished_secondary_map)
+
+#define ENSURE_MAPPABLE(addr,caller)                                   \
+   do {                                                                \
+      if (IS_DISTINGUISHED_SM(primary_map[(addr) >> 16])) {       \
+         primary_map[(addr) >> 16] = alloc_secondary_map(caller); \
+         /* VG_(printf)("new 2map because of %p\n", addr); */          \
+      }                                                                \
+   } while(0)
+
+#define BITARR_SET(aaa_p,iii_p)                         \
+   do {                                                 \
+      UInt   iii = (UInt)iii_p;                         \
+      UChar* aaa = (UChar*)aaa_p;                       \
+      aaa[iii >> 3] |= (1 << (iii & 7));                \
+   } while (0)
+
+#define BITARR_CLEAR(aaa_p,iii_p)                       \
+   do {                                                 \
+      UInt   iii = (UInt)iii_p;                         \
+      UChar* aaa = (UChar*)aaa_p;                       \
+      aaa[iii >> 3] &= ~(1 << (iii & 7));               \
+   } while (0)
+
+#define BITARR_TEST(aaa_p,iii_p)                        \
+      (0 != (((UChar*)aaa_p)[ ((UInt)iii_p) >> 3 ]      \
+               & (1 << (((UInt)iii_p) & 7))))           \
+
+
+#define VGM_BIT_VALID      0
+#define VGM_BIT_INVALID    1
+
+#define VGM_NIBBLE_VALID   0
+#define VGM_NIBBLE_INVALID 0xF
+
+#define VGM_BYTE_VALID     0
+#define VGM_BYTE_INVALID   0xFF
+
+#define VGM_WORD_VALID     0
+#define VGM_WORD_INVALID   0xFFFFFFFF
+
+#define VGM_EFLAGS_VALID   0xFFFFFFFE
+#define VGM_EFLAGS_INVALID 0xFFFFFFFF     /* not used */
+
+
+static void init_shadow_memory ( void )
+{
+   Int i;
+
+   for (i = 0; i < 8192; i++)             /* Invalid address */
+      distinguished_secondary_map.abits[i] = VGM_BYTE_INVALID; 
+   for (i = 0; i < 65536; i++)            /* Invalid Value */
+      distinguished_secondary_map.vbyte[i] = VGM_BYTE_INVALID; 
+
+   /* These entries gradually get overwritten as the used address
+      space expands. */
+   for (i = 0; i < 65536; i++)
+      primary_map[i] = &distinguished_secondary_map;
+
+   /* These ones should never change; it's a bug in Valgrind if they do. */
+   for (i = 65536; i < 262144; i++)
+      primary_map[i] = &distinguished_secondary_map;
+}
+
+void SK_(post_clo_init) ( void )
+{
+}
+
+void SK_(fini) ( void )
+{
+   VG_(print_malloc_stats)();
+
+   if (VG_(clo_verbosity) == 1) {
+      if (!SK_(clo_leak_check))
+         VG_(message)(Vg_UserMsg, 
+             "For a detailed leak analysis,  rerun with: --leak-check=yes");
+
+      VG_(message)(Vg_UserMsg, 
+                   "For counts of detected errors, rerun with: -v");
+   }
+   if (SK_(clo_leak_check)) SK_(detect_memory_leaks)();
+
+   done_prof_mem();
+
+   if (0) {
+      VG_(message)(Vg_DebugMsg, 
+        "------ Valgrind's client block stats follow ---------------" );
+      SK_(show_client_block_stats)();
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Basic bitmap management, reading and writing.        ---*/
+/*------------------------------------------------------------*/
+
+/* Allocate and initialise a secondary map. */
+
+static SecMap* alloc_secondary_map ( __attribute__ ((unused)) 
+                                     Char* caller )
+{
+   SecMap* map;
+   UInt  i;
+   PROF_EVENT(10);
+
+   /* Mark all bytes as invalid access and invalid value. */
+
+   /* It just happens that a SecMap occupies exactly 18 pages --
+      although this isn't important, so the following assert is
+      spurious. */
+   vg_assert(0 == (sizeof(SecMap) % VKI_BYTES_PER_PAGE));
+   map = VG_(get_memory_from_mmap)( sizeof(SecMap), caller );
+
+   for (i = 0; i < 8192; i++)
+      map->abits[i] = VGM_BYTE_INVALID; /* Invalid address */
+   for (i = 0; i < 65536; i++)
+      map->vbyte[i] = VGM_BYTE_INVALID; /* Invalid Value */
+
+   /* VG_(printf)("ALLOC_2MAP(%s)\n", caller ); */
+   return map;
+}
+
+
+/* Basic reading/writing of the bitmaps, for byte-sized accesses. */
+
+static __inline__ UChar get_abit ( Addr a )
+{
+   SecMap* sm     = primary_map[a >> 16];
+   UInt    sm_off = a & 0xFFFF;
+   PROF_EVENT(20);
+#  if 0
+      if (IS_DISTINGUISHED_SM(sm))
+         VG_(message)(Vg_DebugMsg, 
+                      "accessed distinguished 2ndary (A)map! 0x%x\n", a);
+#  endif
+   return BITARR_TEST(sm->abits, sm_off) 
+             ? VGM_BIT_INVALID : VGM_BIT_VALID;
+}
+
+static __inline__ UChar get_vbyte ( Addr a )
+{
+   SecMap* sm     = primary_map[a >> 16];
+   UInt    sm_off = a & 0xFFFF;
+   PROF_EVENT(21);
+#  if 0
+      if (IS_DISTINGUISHED_SM(sm))
+         VG_(message)(Vg_DebugMsg, 
+                      "accessed distinguished 2ndary (V)map! 0x%x\n", a);
+#  endif
+   return sm->vbyte[sm_off];
+}
+
+static __inline__ void set_abit ( Addr a, UChar abit )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   PROF_EVENT(22);
+   ENSURE_MAPPABLE(a, "set_abit");
+   sm     = primary_map[a >> 16];
+   sm_off = a & 0xFFFF;
+   if (abit) 
+      BITARR_SET(sm->abits, sm_off);
+   else
+      BITARR_CLEAR(sm->abits, sm_off);
+}
+
+static __inline__ void set_vbyte ( Addr a, UChar vbyte )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   PROF_EVENT(23);
+   ENSURE_MAPPABLE(a, "set_vbyte");
+   sm     = primary_map[a >> 16];
+   sm_off = a & 0xFFFF;
+   sm->vbyte[sm_off] = vbyte;
+}
+
+
+/* Reading/writing of the bitmaps, for aligned word-sized accesses. */
+
+static __inline__ UChar get_abits4_ALIGNED ( Addr a )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   UChar   abits8;
+   PROF_EVENT(24);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+#  endif
+   sm     = primary_map[a >> 16];
+   sm_off = a & 0xFFFF;
+   abits8 = sm->abits[sm_off >> 3];
+   abits8 >>= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+   abits8 &= 0x0F;
+   return abits8;
+}
+
+static UInt __inline__ get_vbytes4_ALIGNED ( Addr a )
+{
+   SecMap* sm     = primary_map[a >> 16];
+   UInt    sm_off = a & 0xFFFF;
+   PROF_EVENT(25);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+#  endif
+   return ((UInt*)(sm->vbyte))[sm_off >> 2];
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Setting permissions over address ranges.             ---*/
+/*------------------------------------------------------------*/
+
+static void set_address_range_perms ( Addr a, UInt len, 
+                                      UInt example_a_bit,
+                                      UInt example_v_bit )
+{
+   UChar   vbyte, abyte8;
+   UInt    vword4, sm_off;
+   SecMap* sm;
+
+   PROF_EVENT(30);
+
+   if (len == 0)
+      return;
+
+   if (len > 100 * 1000 * 1000) {
+      VG_(message)(Vg_UserMsg, 
+                   "Warning: set address range perms: "
+                   "large range %u, a %d, v %d",
+                   len, example_a_bit, example_v_bit );
+   }
+
+   VGP_PUSHCC(VgpSetMem);
+
+   /* Requests to change permissions of huge address ranges may
+      indicate bugs in our machinery.  30,000,000 is arbitrary, but so
+      far all legitimate requests have fallen beneath that size. */
+   /* 4 Mar 02: this is just stupid; get rid of it. */
+   /* vg_assert(len < 30000000); */
+
+   /* Check the permissions make sense. */
+   vg_assert(example_a_bit == VGM_BIT_VALID 
+             || example_a_bit == VGM_BIT_INVALID);
+   vg_assert(example_v_bit == VGM_BIT_VALID 
+             || example_v_bit == VGM_BIT_INVALID);
+   if (example_a_bit == VGM_BIT_INVALID)
+      vg_assert(example_v_bit == VGM_BIT_INVALID);
+
+   /* The validity bits to write. */
+   vbyte = example_v_bit==VGM_BIT_VALID 
+              ? VGM_BYTE_VALID : VGM_BYTE_INVALID;
+
+   /* In order that we can charge through the address space at 8
+      bytes/main-loop iteration, make up some perms. */
+   abyte8 = (example_a_bit << 7)
+            | (example_a_bit << 6)
+            | (example_a_bit << 5)
+            | (example_a_bit << 4)
+            | (example_a_bit << 3)
+            | (example_a_bit << 2)
+            | (example_a_bit << 1)
+            | (example_a_bit << 0);
+   vword4 = (vbyte << 24) | (vbyte << 16) | (vbyte << 8) | vbyte;
+
+#  ifdef VG_DEBUG_MEMORY
+   /* Do it ... */
+   while (True) {
+      PROF_EVENT(31);
+      if (len == 0) break;
+      set_abit ( a, example_a_bit );
+      set_vbyte ( a, vbyte );
+      a++;
+      len--;
+   }
+
+#  else
+   /* Slowly do parts preceding 8-byte alignment. */
+   while (True) {
+      PROF_EVENT(31);
+      if (len == 0) break;
+      if ((a % 8) == 0) break;
+      set_abit ( a, example_a_bit );
+      set_vbyte ( a, vbyte );
+      a++;
+      len--;
+   }   
+
+   if (len == 0) {
+      VGP_POPCC(VgpSetMem);
+      return;
+   }
+   vg_assert((a % 8) == 0 && len > 0);
+
+   /* Once aligned, go fast. */
+   while (True) {
+      PROF_EVENT(32);
+      if (len < 8) break;
+      ENSURE_MAPPABLE(a, "set_address_range_perms(fast)");
+      sm = primary_map[a >> 16];
+      sm_off = a & 0xFFFF;
+      sm->abits[sm_off >> 3] = abyte8;
+      ((UInt*)(sm->vbyte))[(sm_off >> 2) + 0] = vword4;
+      ((UInt*)(sm->vbyte))[(sm_off >> 2) + 1] = vword4;
+      a += 8;
+      len -= 8;
+   }
+
+   if (len == 0) {
+      VGP_POPCC(VgpSetMem);
+      return;
+   }
+   vg_assert((a % 8) == 0 && len > 0 && len < 8);
+
+   /* Finish the upper fragment. */
+   while (True) {
+      PROF_EVENT(33);
+      if (len == 0) break;
+      set_abit ( a, example_a_bit );
+      set_vbyte ( a, vbyte );
+      a++;
+      len--;
+   }   
+#  endif
+
+   /* Check that zero page and highest page have not been written to
+      -- this could happen with buggy syscall wrappers.  Today
+      (2001-04-26) had precisely such a problem with __NR_setitimer. */
+   vg_assert(SK_(cheap_sanity_check)());
+   VGP_POPCC(VgpSetMem);
+}
+
+/* Set permissions for address ranges ... */
+
+void SK_(make_noaccess) ( Addr a, UInt len )
+{
+   PROF_EVENT(35);
+   DEBUG("SK_(make_noaccess)(%p, %x)\n", a, len);
+   set_address_range_perms ( a, len, VGM_BIT_INVALID, VGM_BIT_INVALID );
+}
+
+void SK_(make_writable) ( Addr a, UInt len )
+{
+   PROF_EVENT(36);
+   DEBUG("SK_(make_writable)(%p, %x)\n", a, len);
+   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_INVALID );
+}
+
+void SK_(make_readable) ( Addr a, UInt len )
+{
+   PROF_EVENT(37);
+   DEBUG("SK_(make_readable)(%p, 0x%x)\n", a, len);
+   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID );
+}
+
+/* Block-copy permissions (needed for implementing realloc()). */
+
+static void copy_address_range_state ( Addr src, Addr dst, UInt len )
+{
+   UInt i;
+
+   DEBUG("copy_address_range_state\n");
+
+   PROF_EVENT(40);
+   for (i = 0; i < len; i++) {
+      UChar abit  = get_abit ( src+i );
+      UChar vbyte = get_vbyte ( src+i );
+      PROF_EVENT(41);
+      set_abit ( dst+i, abit );
+      set_vbyte ( dst+i, vbyte );
+   }
+}
+
+
+/* Check permissions for address range.  If inadequate permissions
+   exist, *bad_addr is set to the offending address, so the caller can
+   know what it is. */
+
+Bool SK_(check_writable) ( Addr a, UInt len, Addr* bad_addr )
+{
+   UInt  i;
+   UChar abit;
+   PROF_EVENT(42);
+   for (i = 0; i < len; i++) {
+      PROF_EVENT(43);
+      abit = get_abit(a);
+      if (abit == VGM_BIT_INVALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      a++;
+   }
+   return True;
+}
+
+Bool SK_(check_readable) ( Addr a, UInt len, Addr* bad_addr )
+{
+   UInt  i;
+   UChar abit;
+   UChar vbyte;
+
+   PROF_EVENT(44);
+   DEBUG("SK_(check_readable)\n");
+   for (i = 0; i < len; i++) {
+      abit  = get_abit(a);
+      vbyte = get_vbyte(a);
+      PROF_EVENT(45);
+      if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      a++;
+   }
+   return True;
+}
+
+
+/* Check a zero-terminated ascii string.  Tricky -- don't want to
+   examine the actual bytes, to find the end, until we're sure it is
+   safe to do so. */
+
+Bool SK_(check_readable_asciiz) ( Addr a, Addr* bad_addr )
+{
+   UChar abit;
+   UChar vbyte;
+   PROF_EVENT(46);
+   DEBUG("SK_(check_readable_asciiz)\n");
+   while (True) {
+      PROF_EVENT(47);
+      abit  = get_abit(a);
+      vbyte = get_vbyte(a);
+      if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) {
+         if (bad_addr != NULL) *bad_addr = a;
+         return False;
+      }
+      /* Ok, a is safe to read. */
+      if (* ((UChar*)a) == 0) return True;
+      a++;
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Memory event handlers                                ---*/
+/*------------------------------------------------------------*/
+
+/* Setting permissions for aligned words.  This supports fast stack
+   operations. */
+
+static void make_noaccess_aligned ( Addr a, UInt len )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   UChar   mask;
+   Addr    a_past_end = a + len;
+
+   VGP_PUSHCC(VgpSetMem);
+
+   PROF_EVENT(50);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+   vg_assert(IS_ALIGNED4_ADDR(len));
+#  endif
+
+   for ( ; a < a_past_end; a += 4) {
+      ENSURE_MAPPABLE(a, "make_noaccess_aligned");
+      sm     = primary_map[a >> 16];
+      sm_off = a & 0xFFFF;
+      ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID;
+      mask = 0x0F;
+      mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+      /* mask now contains 1s where we wish to make address bits
+         invalid (1s). */
+      sm->abits[sm_off >> 3] |= mask;
+   }
+   VGP_POPCC(VgpSetMem);
+}
+
+static void make_writable_aligned ( Addr a, UInt len )
+{
+   SecMap* sm;
+   UInt    sm_off;
+   UChar   mask;
+   Addr    a_past_end = a + len;
+
+   VGP_PUSHCC(VgpSetMem);
+
+   PROF_EVENT(51);
+#  ifdef VG_DEBUG_MEMORY
+   vg_assert(IS_ALIGNED4_ADDR(a));
+   vg_assert(IS_ALIGNED4_ADDR(len));
+#  endif
+
+   for ( ; a < a_past_end; a += 4) {
+      ENSURE_MAPPABLE(a, "make_writable_aligned");
+      sm     = primary_map[a >> 16];
+      sm_off = a & 0xFFFF;
+      ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID;
+      mask = 0x0F;
+      mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
+      /* mask now contains 1s where we wish to make address bits
+         invalid (0s). */
+      sm->abits[sm_off >> 3] &= ~mask;
+   }
+   VGP_POPCC(VgpSetMem);
+}
+
+
+static
+void check_is_writable ( CorePart part, ThreadState* tst,
+                         Char* s, UInt base, UInt size )
+{
+   Bool ok;
+   Addr bad_addr;
+
+   VGP_PUSHCC(VgpCheckMem);
+
+   /* VG_(message)(Vg_DebugMsg,"check is writable: %x .. %x",
+                               base,base+size-1); */
+   ok = SK_(check_writable) ( base, size, &bad_addr );
+   if (!ok) {
+      switch (part) {
+      case Vg_CoreSysCall:
+         SK_(record_param_error) ( tst, bad_addr, /*isWrite =*/True, s );
+         break;
+
+      case Vg_CorePThread:
+      case Vg_CoreSignal:
+         SK_(record_core_mem_error)( tst, /*isWrite=*/True, s );
+         break;
+
+      default:
+         VG_(panic)("check_is_readable: Unknown or unexpected CorePart");
+      }
+   }
+
+   VGP_POPCC(VgpCheckMem);
+}
+
+static
+void check_is_readable ( CorePart part, ThreadState* tst,
+                         Char* s, UInt base, UInt size )
+{     
+   Bool ok;
+   Addr bad_addr;
+
+   VGP_PUSHCC(VgpCheckMem);
+   
+   /* VG_(message)(Vg_DebugMsg,"check is readable: %x .. %x",
+                               base,base+size-1); */
+   ok = SK_(check_readable) ( base, size, &bad_addr );
+   if (!ok) {
+      switch (part) {
+      case Vg_CoreSysCall:
+         SK_(record_param_error) ( tst, bad_addr, /*isWrite =*/False, s );
+         break;
+      
+      case Vg_CorePThread:
+         SK_(record_core_mem_error)( tst, /*isWrite=*/False, s );
+         break;
+
+      /* If we're being asked to jump to a silly address, record an error 
+         message before potentially crashing the entire system. */
+      case Vg_CoreTranslate:
+         SK_(record_jump_error)( tst, bad_addr );
+         break;
+
+      default:
+         VG_(panic)("check_is_readable: Unknown or unexpected CorePart");
+      }
+   }
+   VGP_POPCC(VgpCheckMem);
+}
+
+static
+void check_is_readable_asciiz ( CorePart part, ThreadState* tst,
+                                Char* s, UInt str )
+{
+   Bool ok = True;
+   Addr bad_addr;
+   /* VG_(message)(Vg_DebugMsg,"check is readable asciiz: 0x%x",str); */
+
+   VGP_PUSHCC(VgpCheckMem);
+
+   vg_assert(part == Vg_CoreSysCall);
+   ok = SK_(check_readable_asciiz) ( (Addr)str, &bad_addr );
+   if (!ok) {
+      SK_(record_param_error) ( tst, bad_addr, /*is_writable =*/False, s );
+   }
+
+   VGP_POPCC(VgpCheckMem);
+}
+
+
+static
+void memcheck_new_mem_startup( Addr a, UInt len, Bool rr, Bool ww, Bool xx )
+{
+   // JJJ: this ignores the permissions and just makes it readable, like the
+   // old code did, AFAICT
+   DEBUG("new_mem_startup(%p, %u, rr=%u, ww=%u, xx=%u)\n", a,len,rr,ww,xx);
+   SK_(make_readable)(a, len);
+}
+
+static
+void memcheck_new_mem_heap ( Addr a, UInt len, Bool is_inited )
+{
+   if (is_inited) {
+      SK_(make_readable)(a, len);
+   } else {
+      SK_(make_writable)(a, len);
+   }
+}
+
+static
+void memcheck_set_perms (Addr a, UInt len, 
+                         Bool nn, Bool rr, Bool ww, Bool xx)
+{
+   DEBUG("memcheck_set_perms(%p, %u, nn=%u, rr=%u ww=%u, xx=%u)\n",
+                             a, len, nn, rr, ww, xx);
+   if      (rr) SK_(make_readable)(a, len);
+   else if (ww) SK_(make_writable)(a, len);
+   else         SK_(make_noaccess)(a, len);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Functions called directly from generated code.       ---*/
+/*------------------------------------------------------------*/
+
+static __inline__ UInt rotateRight16 ( UInt x )
+{
+   /* Amazingly, gcc turns this into a single rotate insn. */
+   return (x >> 16) | (x << 16);
+}
+
+
+static __inline__ UInt shiftRight16 ( UInt x )
+{
+   return x >> 16;
+}
+
+
+/* Read/write 1/2/4 sized V bytes, and emit an address error if
+   needed. */
+
+/* VG_(helperc_{LD,ST}V{1,2,4}) handle the common case fast.
+   Under all other circumstances, it defers to the relevant _SLOWLY
+   function, which can handle all situations.
+*/
+__attribute__ ((regparm(1)))
+UInt SK_(helperc_LOADV4) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgmext_rd_V4_SLOWLY(a);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
+   SecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   UChar   abits  = sm->abits[a_off];
+   abits >>= (a & 4);
+   abits &= 15;
+   PROF_EVENT(60);
+   if (abits == VGM_NIBBLE_VALID) {
+      /* Handle common case quickly: a is suitably aligned, is mapped,
+         and is addressible. */
+      UInt v_off = a & 0xFFFF;
+      return ((UInt*)(sm->vbyte))[ v_off >> 2 ];
+   } else {
+      /* Slow but general case. */
+      return vgmext_rd_V4_SLOWLY(a);
+   }
+#  endif
+}
+
+__attribute__ ((regparm(2)))
+void SK_(helperc_STOREV4) ( Addr a, UInt vbytes )
+{
+#  ifdef VG_DEBUG_MEMORY
+   vgmext_wr_V4_SLOWLY(a, vbytes);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
+   SecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   UChar   abits  = sm->abits[a_off];
+   abits >>= (a & 4);
+   abits &= 15;
+   PROF_EVENT(61);
+   if (abits == VGM_NIBBLE_VALID) {
+      /* Handle common case quickly: a is suitably aligned, is mapped,
+         and is addressible. */
+      UInt v_off = a & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = vbytes;
+   } else {
+      /* Slow but general case. */
+      vgmext_wr_V4_SLOWLY(a, vbytes);
+   }
+#  endif
+}
+
+__attribute__ ((regparm(1)))
+UInt SK_(helperc_LOADV2) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgmext_rd_V2_SLOWLY(a);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
+   SecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(62);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      return 0xFFFF0000 
+             |  
+             (UInt)( ((UShort*)(sm->vbyte))[ v_off >> 1 ] );
+   } else {
+      /* Slow but general case. */
+      return vgmext_rd_V2_SLOWLY(a);
+   }
+#  endif
+}
+
+__attribute__ ((regparm(2)))
+void SK_(helperc_STOREV2) ( Addr a, UInt vbytes )
+{
+#  ifdef VG_DEBUG_MEMORY
+   vgmext_wr_V2_SLOWLY(a, vbytes);
+#  else
+   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
+   SecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(63);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      ((UShort*)(sm->vbyte))[ v_off >> 1 ] = vbytes & 0x0000FFFF;
+   } else {
+      /* Slow but general case. */
+      vgmext_wr_V2_SLOWLY(a, vbytes);
+   }
+#  endif
+}
+
+__attribute__ ((regparm(1)))
+UInt SK_(helperc_LOADV1) ( Addr a )
+{
+#  ifdef VG_DEBUG_MEMORY
+   return vgmext_rd_V1_SLOWLY(a);
+#  else
+   UInt    sec_no = shiftRight16(a);
+   SecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(64);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      return 0xFFFFFF00
+             |
+             (UInt)( ((UChar*)(sm->vbyte))[ v_off ] );
+   } else {
+      /* Slow but general case. */
+      return vgmext_rd_V1_SLOWLY(a);
+   }
+#  endif
+}
+
+__attribute__ ((regparm(2)))
+void SK_(helperc_STOREV1) ( Addr a, UInt vbytes )
+{
+#  ifdef VG_DEBUG_MEMORY
+   vgmext_wr_V1_SLOWLY(a, vbytes);
+#  else
+   UInt    sec_no = shiftRight16(a);
+   SecMap* sm     = primary_map[sec_no];
+   UInt    a_off  = (a & 0xFFFF) >> 3;
+   PROF_EVENT(65);
+   if (sm->abits[a_off] == VGM_BYTE_VALID) {
+      /* Handle common case quickly. */
+      UInt v_off = a & 0xFFFF;
+      ((UChar*)(sm->vbyte))[ v_off ] = vbytes & 0x000000FF;
+   } else {
+      /* Slow but general case. */
+      vgmext_wr_V1_SLOWLY(a, vbytes);
+   }
+#  endif
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Fallback functions to handle cases that the above    ---*/
+/*--- VG_(helperc_{LD,ST}V{1,2,4}) can't manage.           ---*/
+/*------------------------------------------------------------*/
+
+static UInt vgmext_rd_V4_SLOWLY ( Addr a )
+{
+   Bool a0ok, a1ok, a2ok, a3ok;
+   UInt vb0, vb1, vb2, vb3;
+
+   PROF_EVENT(70);
+
+   /* First establish independently the addressibility of the 4 bytes
+      involved. */
+   a0ok = get_abit(a+0) == VGM_BIT_VALID;
+   a1ok = get_abit(a+1) == VGM_BIT_VALID;
+   a2ok = get_abit(a+2) == VGM_BIT_VALID;
+   a3ok = get_abit(a+3) == VGM_BIT_VALID;
+
+   /* Also get the validity bytes for the address. */
+   vb0 = (UInt)get_vbyte(a+0);
+   vb1 = (UInt)get_vbyte(a+1);
+   vb2 = (UInt)get_vbyte(a+2);
+   vb3 = (UInt)get_vbyte(a+3);
+
+   /* Now distinguish 3 cases */
+
+   /* Case 1: the address is completely valid, so:
+      - no addressing error
+      - return V bytes as read from memory
+   */
+   if (a0ok && a1ok && a2ok && a3ok) {
+      UInt vw = VGM_WORD_INVALID;
+      vw <<= 8; vw |= vb3;
+      vw <<= 8; vw |= vb2;
+      vw <<= 8; vw |= vb1;
+      vw <<= 8; vw |= vb0;
+      return vw;
+   }
+
+   /* Case 2: the address is completely invalid.  
+      - emit addressing error
+      - return V word indicating validity.  
+      This sounds strange, but if we make loads from invalid addresses 
+      give invalid data, we also risk producing a number of confusing
+      undefined-value errors later, which confuses the fact that the
+      error arose in the first place from an invalid address. 
+   */
+   /* VG_(printf)("%p (%d %d %d %d)\n", a, a0ok, a1ok, a2ok, a3ok); */
+   if (!SK_(clo_partial_loads_ok) 
+       || ((a & 3) != 0)
+       || (!a0ok && !a1ok && !a2ok && !a3ok)) {
+      SK_(record_address_error)( a, 4, False );
+      return (VGM_BYTE_VALID << 24) | (VGM_BYTE_VALID << 16) 
+             | (VGM_BYTE_VALID << 8) | VGM_BYTE_VALID;
+   }
+
+   /* Case 3: the address is partially valid.  
+      - no addressing error
+      - returned V word is invalid where the address is invalid, 
+        and contains V bytes from memory otherwise. 
+      Case 3 is only allowed if SK_(clo_partial_loads_ok) is True
+      (which is the default), and the address is 4-aligned.  
+      If not, Case 2 will have applied.
+   */
+   vg_assert(SK_(clo_partial_loads_ok));
+   {
+      UInt vw = VGM_WORD_INVALID;
+      vw <<= 8; vw |= (a3ok ? vb3 : VGM_BYTE_INVALID);
+      vw <<= 8; vw |= (a2ok ? vb2 : VGM_BYTE_INVALID);
+      vw <<= 8; vw |= (a1ok ? vb1 : VGM_BYTE_INVALID);
+      vw <<= 8; vw |= (a0ok ? vb0 : VGM_BYTE_INVALID);
+      return vw;
+   }
+}
+
+static void vgmext_wr_V4_SLOWLY ( Addr a, UInt vbytes )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(71);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+2) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+3) != VGM_BIT_VALID) aerr = True;
+
+   /* Store the V bytes, remembering to do it little-endian-ly. */
+   set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+1, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+2, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+3, vbytes & 0x000000FF );
+
+   /* If an address error has happened, report it. */
+   if (aerr)
+      SK_(record_address_error)( a, 4, True );
+}
+
+static UInt vgmext_rd_V2_SLOWLY ( Addr a )
+{
+   /* Check the address for validity. */
+   UInt vw   = VGM_WORD_INVALID;
+   Bool aerr = False;
+   PROF_EVENT(72);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
+
+   /* Fetch the V bytes, remembering to do it little-endian-ly. */
+   vw <<= 8; vw |= (UInt)get_vbyte(a+1);
+   vw <<= 8; vw |= (UInt)get_vbyte(a+0);
+
+   /* If an address error has happened, report it. */
+   if (aerr) {
+      SK_(record_address_error)( a, 2, False );
+      vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) 
+           | (VGM_BYTE_VALID << 8) | (VGM_BYTE_VALID);
+   }
+   return vw;   
+}
+
+static void vgmext_wr_V2_SLOWLY ( Addr a, UInt vbytes )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(73);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
+
+   /* Store the V bytes, remembering to do it little-endian-ly. */
+   set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8;
+   set_vbyte( a+1, vbytes & 0x000000FF );
+
+   /* If an address error has happened, report it. */
+   if (aerr)
+      SK_(record_address_error)( a, 2, True );
+}
+
+static UInt vgmext_rd_V1_SLOWLY ( Addr a )
+{
+   /* Check the address for validity. */
+   UInt vw   = VGM_WORD_INVALID;
+   Bool aerr = False;
+   PROF_EVENT(74);
+
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+
+   /* Fetch the V byte. */
+   vw <<= 8; vw |= (UInt)get_vbyte(a+0);
+
+   /* If an address error has happened, report it. */
+   if (aerr) {
+      SK_(record_address_error)( a, 1, False );
+      vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) 
+           | (VGM_BYTE_INVALID << 8) | (VGM_BYTE_VALID);
+   }
+   return vw;   
+}
+
+static void vgmext_wr_V1_SLOWLY ( Addr a, UInt vbytes )
+{
+   /* Check the address for validity. */
+   Bool aerr = False;
+   PROF_EVENT(75);
+   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
+
+   /* Store the V bytes, remembering to do it little-endian-ly. */
+   set_vbyte( a+0, vbytes & 0x000000FF );
+
+   /* If an address error has happened, report it. */
+   if (aerr)
+      SK_(record_address_error)( a, 1, True );
+}
+
+
+/* ---------------------------------------------------------------------
+   Called from generated code, or from the assembly helpers.
+   Handlers for value check failures.
+   ------------------------------------------------------------------ */
+
+void SK_(helperc_value_check0_fail) ( void )
+{
+   SK_(record_value_error) ( 0 );
+}
+
+void SK_(helperc_value_check1_fail) ( void )
+{
+   SK_(record_value_error) ( 1 );
+}
+
+void SK_(helperc_value_check2_fail) ( void )
+{
+   SK_(record_value_error) ( 2 );
+}
+
+void SK_(helperc_value_check4_fail) ( void )
+{
+   SK_(record_value_error) ( 4 );
+}
+
+
+/* ---------------------------------------------------------------------
+   FPU load and store checks, called from generated code.
+   ------------------------------------------------------------------ */
+
+__attribute__ ((regparm(2)))
+void SK_(fpu_read_check) ( Addr addr, Int size )
+{
+   /* Ensure the read area is both addressible and valid (ie,
+      readable).  If there's an address error, don't report a value
+      error too; but if there isn't an address error, check for a
+      value error. 
+
+      Try to be reasonably fast on the common case; wimp out and defer
+      to fpu_read_check_SLOWLY for everything else.  */
+
+   SecMap* sm;
+   UInt    sm_off, v_off, a_off;
+   Addr    addr4;
+
+   PROF_EVENT(80);
+
+#  ifdef VG_DEBUG_MEMORY
+   fpu_read_check_SLOWLY ( addr, size );
+#  else
+
+   if (size == 4) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
+      PROF_EVENT(81);
+      /* Properly aligned. */
+      sm     = primary_map[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
+      /* Properly aligned and addressible. */
+      v_off = addr & 0xFFFF;
+      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
+         goto slow4;
+      /* Properly aligned, addressible and with valid data. */
+      return;
+     slow4:
+      fpu_read_check_SLOWLY ( addr, 4 );
+      return;
+   }
+
+   if (size == 8) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
+      PROF_EVENT(82);
+      /* Properly aligned.  Do it in two halves. */
+      addr4 = addr + 4;
+      /* First half. */
+      sm     = primary_map[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* First half properly aligned and addressible. */
+      v_off = addr & 0xFFFF;
+      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
+         goto slow8;
+      /* Second half. */
+      sm     = primary_map[addr4 >> 16];
+      sm_off = addr4 & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* Second half properly aligned and addressible. */
+      v_off = addr4 & 0xFFFF;
+      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
+         goto slow8;
+      /* Both halves properly aligned, addressible and with valid
+         data. */
+      return;
+     slow8:
+      fpu_read_check_SLOWLY ( addr, 8 );
+      return;
+   }
+
+   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
+      cases go quickly.  */
+   if (size == 2) {
+      PROF_EVENT(83);
+      fpu_read_check_SLOWLY ( addr, 2 );
+      return;
+   }
+
+   if (size == 10) {
+      PROF_EVENT(84);
+      fpu_read_check_SLOWLY ( addr, 10 );
+      return;
+   }
+
+   if (size == 28 || size == 108) {
+      PROF_EVENT(84); /* XXX assign correct event number */
+      fpu_read_check_SLOWLY ( addr, 28 );
+      return;
+   }
+
+   VG_(printf)("size is %d\n", size);
+   VG_(panic)("vgmext_fpu_read_check: unhandled size");
+#  endif
+}
+
+
+__attribute__ ((regparm(2)))
+void SK_(fpu_write_check) ( Addr addr, Int size )
+{
+   /* Ensure the written area is addressible, and moan if otherwise.
+      If it is addressible, make it valid, otherwise invalid. 
+   */
+
+   SecMap* sm;
+   UInt    sm_off, v_off, a_off;
+   Addr    addr4;
+
+   PROF_EVENT(85);
+
+#  ifdef VG_DEBUG_MEMORY
+   fpu_write_check_SLOWLY ( addr, size );
+#  else
+
+   if (size == 4) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
+      PROF_EVENT(86);
+      /* Properly aligned. */
+      sm     = primary_map[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
+      /* Properly aligned and addressible.  Make valid. */
+      v_off = addr & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
+      return;
+     slow4:
+      fpu_write_check_SLOWLY ( addr, 4 );
+      return;
+   }
+
+   if (size == 8) {
+      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
+      PROF_EVENT(87);
+      /* Properly aligned.  Do it in two halves. */
+      addr4 = addr + 4;
+      /* First half. */
+      sm     = primary_map[addr >> 16];
+      sm_off = addr & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* First half properly aligned and addressible.  Make valid. */
+      v_off = addr & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
+      /* Second half. */
+      sm     = primary_map[addr4 >> 16];
+      sm_off = addr4 & 0xFFFF;
+      a_off  = sm_off >> 3;
+      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
+      /* Second half properly aligned and addressible. */
+      v_off = addr4 & 0xFFFF;
+      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
+      /* Properly aligned, addressible and with valid data. */
+      return;
+     slow8:
+      fpu_write_check_SLOWLY ( addr, 8 );
+      return;
+   }
+
+   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
+      cases go quickly.  */
+   if (size == 2) {
+      PROF_EVENT(88);
+      fpu_write_check_SLOWLY ( addr, 2 );
+      return;
+   }
+
+   if (size == 10) {
+      PROF_EVENT(89);
+      fpu_write_check_SLOWLY ( addr, 10 );
+      return;
+   }
+
+   if (size == 28 || size == 108) {
+      PROF_EVENT(89); /* XXX assign correct event number */
+      fpu_write_check_SLOWLY ( addr, 28 );
+      return;
+   }
+
+   VG_(printf)("size is %d\n", size);
+   VG_(panic)("vgmext_fpu_write_check: unhandled size");
+#  endif
+}
+
+
+/* ---------------------------------------------------------------------
+   Slow, general cases for FPU load and store checks.
+   ------------------------------------------------------------------ */
+
+/* Generic version.  Test for both addr and value errors, but if
+   there's an addr error, don't report a value error even if it
+   exists. */
+
+void fpu_read_check_SLOWLY ( Addr addr, Int size )
+{
+   Int  i;
+   Bool aerr = False;
+   Bool verr = False;
+   PROF_EVENT(90);
+   for (i = 0; i < size; i++) {
+      PROF_EVENT(91);
+      if (get_abit(addr+i) != VGM_BIT_VALID)
+         aerr = True;
+      if (get_vbyte(addr+i) != VGM_BYTE_VALID)
+         verr = True;
+   }
+
+   if (aerr) {
+      SK_(record_address_error)( addr, size, False );
+   } else {
+     if (verr)
+        SK_(record_value_error)( size );
+   }
+}
+
+
+/* Generic version.  Test for addr errors.  Valid addresses are
+   given valid values, and invalid addresses invalid values. */
+
+void fpu_write_check_SLOWLY ( Addr addr, Int size )
+{
+   Int  i;
+   Addr a_here;
+   Bool a_ok;
+   Bool aerr = False;
+   PROF_EVENT(92);
+   for (i = 0; i < size; i++) {
+      PROF_EVENT(93);
+      a_here = addr+i;
+      a_ok = get_abit(a_here) == VGM_BIT_VALID;
+      if (a_ok) {
+	set_vbyte(a_here, VGM_BYTE_VALID);
+      } else {
+	set_vbyte(a_here, VGM_BYTE_INVALID);
+        aerr = True;
+      }
+   }
+   if (aerr) {
+      SK_(record_address_error)( addr, size, True );
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Shadow chunks info                                   ---*/
+/*------------------------------------------------------------*/
+
+static __inline__
+void set_where( ShadowChunk* sc, ExeContext* ec )
+{
+   sc->skin_extra[0] = (UInt)ec;
+}
+
+static __inline__
+ExeContext *get_where( ShadowChunk* sc )
+{
+   return (ExeContext*)sc->skin_extra[0];
+}
+
+void SK_(complete_shadow_chunk) ( ShadowChunk* sc, ThreadState* tst )
+{
+   set_where( sc, VG_(get_ExeContext) ( tst ) );
+}
+
+/*------------------------------------------------------------*/
+/*--- Postponing free()ing                                 ---*/
+/*------------------------------------------------------------*/
+
+/* Holds blocks after freeing. */
+static ShadowChunk* vg_freed_list_start   = NULL;
+static ShadowChunk* vg_freed_list_end     = NULL;
+static Int          vg_freed_list_volume  = 0;
+
+static __attribute__ ((unused))
+       Int count_freelist ( void )
+{
+   ShadowChunk* sc;
+   Int n = 0;
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      n++;
+   return n;
+}
+
+static __attribute__ ((unused))
+       void freelist_sanity ( void )
+{
+   ShadowChunk* sc;
+   Int n = 0;
+   /* VG_(printf)("freelist sanity\n"); */
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      n += sc->size;
+   vg_assert(n == vg_freed_list_volume);
+}
+
+/* Put a shadow chunk on the freed blocks queue, possibly freeing up
+   some of the oldest blocks in the queue at the same time. */
+static void add_to_freed_queue ( ShadowChunk* sc )
+{
+   ShadowChunk* sc1;
+
+   /* Put it at the end of the freed list */
+   if (vg_freed_list_end == NULL) {
+      vg_assert(vg_freed_list_start == NULL);
+      vg_freed_list_end = vg_freed_list_start = sc;
+      vg_freed_list_volume = sc->size;
+   } else {    
+      vg_assert(vg_freed_list_end->next == NULL);
+      vg_freed_list_end->next = sc;
+      vg_freed_list_end = sc;
+      vg_freed_list_volume += sc->size;
+   }
+   sc->next = NULL;
+
+   /* Release enough of the oldest blocks to bring the free queue
+      volume below vg_clo_freelist_vol. */
+   
+   while (vg_freed_list_volume > SK_(clo_freelist_vol)) {
+      /* freelist_sanity(); */
+      vg_assert(vg_freed_list_start != NULL);
+      vg_assert(vg_freed_list_end != NULL);
+
+      sc1 = vg_freed_list_start;
+      vg_freed_list_volume -= sc1->size;
+      /* VG_(printf)("volume now %d\n", vg_freed_list_volume); */
+      vg_assert(vg_freed_list_volume >= 0);
+
+      if (vg_freed_list_start == vg_freed_list_end) {
+         vg_freed_list_start = vg_freed_list_end = NULL;
+      } else {
+         vg_freed_list_start = sc1->next;
+      }
+      sc1->next = NULL; /* just paranoia */
+      VG_(freeShadowChunk) ( sc1 );
+   }
+}
+
+/* Return the first shadow chunk satisfying the predicate p. */
+ShadowChunk* SK_(any_matching_freed_ShadowChunks)
+                        ( Bool (*p) ( ShadowChunk* ))
+{
+   ShadowChunk* sc;
+
+   /* No point looking through freed blocks if we're not keeping
+      them around for a while... */
+   for (sc = vg_freed_list_start; sc != NULL; sc = sc->next)
+      if (p(sc))
+         return sc;
+
+   return NULL;
+}
+
+void SK_(alt_free) ( ShadowChunk* sc, ThreadState* tst )
+{
+   /* Record where freed */
+   set_where( sc, VG_(get_ExeContext) ( tst ) );
+
+   /* Put it out of harm's way for a while. */
+   add_to_freed_queue ( sc );
+}
+
+/*------------------------------------------------------------*/
+/*--- Low-level address-space scanning, for the leak       ---*/
+/*--- detector.                                            ---*/
+/*------------------------------------------------------------*/
+
+static 
+jmp_buf memscan_jmpbuf;
+
+static
+void vg_scan_all_valid_memory_sighandler ( Int sigNo )
+{
+   __builtin_longjmp(memscan_jmpbuf, 1);
+}
+
+/* Safely (avoiding SIGSEGV / SIGBUS) scan the entire valid address
+   space and pass the addresses and values of all addressible,
+   defined, aligned words to notify_word.  This is the basis for the
+   leak detector.  Returns the number of calls made to notify_word.  */
+UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) )
+{
+   /* All volatile, because some gccs seem paranoid about longjmp(). */
+   volatile UInt res, numPages, page, vbytes, primaryMapNo, nWordsNotified;
+   volatile Addr pageBase, addr;
+   volatile SecMap* sm;
+   volatile UChar abits;
+   volatile UInt page_first_word;
+
+   vki_ksigaction sigbus_saved;
+   vki_ksigaction sigbus_new;
+   vki_ksigaction sigsegv_saved;
+   vki_ksigaction sigsegv_new;
+   vki_ksigset_t  blockmask_saved;
+   vki_ksigset_t  unblockmask_new;
+
+   /* Temporarily install a new sigsegv and sigbus handler, and make
+      sure SIGBUS, SIGSEGV and SIGTERM are unblocked.  (Perhaps the
+      first two can never be blocked anyway?)  */
+
+   sigbus_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
+   sigbus_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
+   sigbus_new.ksa_restorer = NULL;
+   res = VG_(ksigemptyset)( &sigbus_new.ksa_mask );
+   vg_assert(res == 0);
+
+   sigsegv_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
+   sigsegv_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
+   sigsegv_new.ksa_restorer = NULL;
+   res = VG_(ksigemptyset)( &sigsegv_new.ksa_mask );
+   vg_assert(res == 0+0);
+
+   res =  VG_(ksigemptyset)( &unblockmask_new );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGBUS );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGSEGV );
+   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGTERM );
+   vg_assert(res == 0+0+0);
+
+   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_new, &sigbus_saved );
+   vg_assert(res == 0+0+0+0);
+
+   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_new, &sigsegv_saved );
+   vg_assert(res == 0+0+0+0+0);
+
+   res = VG_(ksigprocmask)( VKI_SIG_UNBLOCK, &unblockmask_new, &blockmask_saved );
+   vg_assert(res == 0+0+0+0+0+0);
+
+   /* The signal handlers are installed.  Actually do the memory scan. */
+   numPages = 1 << (32-VKI_BYTES_PER_PAGE_BITS);
+   vg_assert(numPages == 1048576);
+   vg_assert(4096 == (1 << VKI_BYTES_PER_PAGE_BITS));
+
+   nWordsNotified = 0;
+
+   for (page = 0; page < numPages; page++) {
+      pageBase = page << VKI_BYTES_PER_PAGE_BITS;
+      primaryMapNo = pageBase >> 16;
+      sm = primary_map[primaryMapNo];
+      if (IS_DISTINGUISHED_SM(sm)) continue;
+      if (__builtin_setjmp(memscan_jmpbuf) == 0) {
+         /* try this ... */
+         page_first_word = * (volatile UInt*)pageBase;
+         /* we get here if we didn't get a fault */
+         /* Scan the page */
+         for (addr = pageBase; addr < pageBase+VKI_BYTES_PER_PAGE; addr += 4) {
+            abits  = get_abits4_ALIGNED(addr);
+            vbytes = get_vbytes4_ALIGNED(addr);
+            if (abits == VGM_NIBBLE_VALID 
+                && vbytes == VGM_WORD_VALID) {
+               nWordsNotified++;
+               notify_word ( addr, *(UInt*)addr );
+	    }
+         }
+      } else {
+         /* We get here if reading the first word of the page caused a
+            fault, which in turn caused the signal handler to longjmp.
+            Ignore this page. */
+         if (0)
+         VG_(printf)(
+            "vg_scan_all_valid_memory_sighandler: ignoring page at %p\n",
+            (void*)pageBase 
+         );
+      }
+   }
+
+   /* Restore signal state to whatever it was before. */
+   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_saved, NULL );
+   vg_assert(res == 0 +0);
+
+   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_saved, NULL );
+   vg_assert(res == 0 +0 +0);
+
+   res = VG_(ksigprocmask)( VKI_SIG_SETMASK, &blockmask_saved, NULL );
+   vg_assert(res == 0 +0 +0 +0);
+
+   return nWordsNotified;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
+/*------------------------------------------------------------*/
+
+/* A block is either 
+   -- Proper-ly reached; a pointer to its start has been found
+   -- Interior-ly reached; only an interior pointer to it has been found
+   -- Unreached; so far, no pointers to any part of it have been found. 
+*/
+typedef 
+   enum { Unreached, Interior, Proper } 
+   Reachedness;
+
+/* A block record, used for generating err msgs. */
+typedef
+   struct _LossRecord {
+      struct _LossRecord* next;
+      /* Where these lost blocks were allocated. */
+      ExeContext*  allocated_at;
+      /* Their reachability. */
+      Reachedness  loss_mode;
+      /* Number of blocks and total # bytes involved. */
+      UInt         total_bytes;
+      UInt         num_blocks;
+   }
+   LossRecord;
+
+
+/* Find the i such that ptr points at or inside the block described by
+   shadows[i].  Return -1 if none found.  This assumes that shadows[]
+   has been sorted on the ->data field. */
+
+#ifdef VG_DEBUG_LEAKCHECK
+/* Used to sanity-check the fast binary-search mechanism. */
+static Int find_shadow_for_OLD ( Addr          ptr, 
+                                 ShadowChunk** shadows,
+                                 Int           n_shadows )
+
+{
+   Int  i;
+   Addr a_lo, a_hi;
+   PROF_EVENT(70);
+   for (i = 0; i < n_shadows; i++) {
+      PROF_EVENT(71);
+      a_lo = shadows[i]->data;
+      a_hi = ((Addr)shadows[i]->data) + shadows[i]->size - 1;
+      if (a_lo <= ptr && ptr <= a_hi)
+         return i;
+   }
+   return -1;
+}
+#endif
+
+
+static Int find_shadow_for ( Addr          ptr, 
+                             ShadowChunk** shadows,
+                             Int           n_shadows )
+{
+   Addr a_mid_lo, a_mid_hi;
+   Int lo, mid, hi, retVal;
+   PROF_EVENT(70);
+   /* VG_(printf)("find shadow for %p = ", ptr); */
+   retVal = -1;
+   lo = 0;
+   hi = n_shadows-1;
+   while (True) {
+      PROF_EVENT(71);
+
+      /* invariant: current unsearched space is from lo to hi,
+         inclusive. */
+      if (lo > hi) break; /* not found */
+
+      mid      = (lo + hi) / 2;
+      a_mid_lo = shadows[mid]->data;
+      a_mid_hi = ((Addr)shadows[mid]->data) + shadows[mid]->size - 1;
+
+      if (ptr < a_mid_lo) {
+         hi = mid-1;
+         continue;
+      } 
+      if (ptr > a_mid_hi) {
+         lo = mid+1;
+         continue;
+      }
+      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
+      retVal = mid;
+      break;
+   }
+
+#  ifdef VG_DEBUG_LEAKCHECK
+   vg_assert(retVal == find_shadow_for_OLD ( ptr, shadows, n_shadows ));
+#  endif
+   /* VG_(printf)("%d\n", retVal); */
+   return retVal;
+}
+
+
+
+static void sort_malloc_shadows ( ShadowChunk** shadows, UInt n_shadows )
+{
+   Int   incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+                      9841, 29524, 88573, 265720,
+                      797161, 2391484 };
+   Int          lo = 0;
+   Int          hi = n_shadows-1;
+   Int          i, j, h, bigN, hp;
+   ShadowChunk* v;
+
+   PROF_EVENT(72);
+   bigN = hi - lo + 1; if (bigN < 2) return;
+   hp = 0; while (incs[hp] < bigN) hp++; hp--;
+
+   for (; hp >= 0; hp--) {
+      PROF_EVENT(73);
+      h = incs[hp];
+      i = lo + h;
+      while (1) {
+         PROF_EVENT(74);
+         if (i > hi) break;
+         v = shadows[i];
+         j = i;
+         while (shadows[j-h]->data > v->data) {
+            PROF_EVENT(75);
+            shadows[j] = shadows[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         shadows[j] = v;
+         i++;
+      }
+   }
+}
+
+/* Globals, for the callback used by SK_(detect_memory_leaks). */
+
+static ShadowChunk** vglc_shadows;
+static Int           vglc_n_shadows;
+static Reachedness*  vglc_reachedness;
+static Addr          vglc_min_mallocd_addr;
+static Addr          vglc_max_mallocd_addr;
+
+static 
+void vg_detect_memory_leaks_notify_addr ( Addr a, UInt word_at_a )
+{
+   Int  sh_no;
+   Addr ptr;
+
+   /* Rule out some known causes of bogus pointers.  Mostly these do
+      not cause much trouble because only a few false pointers can
+      ever lurk in these places.  This mainly stops it reporting that
+      blocks are still reachable in stupid test programs like this
+
+         int main (void) { char* a = malloc(100); return 0; }
+
+      which people seem inordinately fond of writing, for some reason.  
+
+      Note that this is a complete kludge.  It would be better to
+      ignore any addresses corresponding to valgrind.so's .bss and
+      .data segments, but I cannot think of a reliable way to identify
+      where the .bss segment has been put.  If you can, drop me a
+      line.  
+   */
+   if (VG_(within_stack)(a))                return;
+   if (VG_(within_m_state_static)(a))       return;
+   if (a == (Addr)(&vglc_min_mallocd_addr)) return;
+   if (a == (Addr)(&vglc_max_mallocd_addr)) return;
+
+   /* OK, let's get on and do something Useful for a change. */
+
+   ptr = (Addr)word_at_a;
+   if (ptr >= vglc_min_mallocd_addr && ptr <= vglc_max_mallocd_addr) {
+      /* Might be legitimate; we'll have to investigate further. */
+      sh_no = find_shadow_for ( ptr, vglc_shadows, vglc_n_shadows );
+      if (sh_no != -1) {
+         /* Found a block at/into which ptr points. */
+         vg_assert(sh_no >= 0 && sh_no < vglc_n_shadows);
+         vg_assert(ptr < vglc_shadows[sh_no]->data 
+                         + vglc_shadows[sh_no]->size);
+         /* Decide whether Proper-ly or Interior-ly reached. */
+         if (ptr == vglc_shadows[sh_no]->data) {
+            if (0) VG_(printf)("pointer at %p to %p\n", a, word_at_a );
+            vglc_reachedness[sh_no] = Proper;
+         } else {
+            if (vglc_reachedness[sh_no] == Unreached)
+               vglc_reachedness[sh_no] = Interior;
+         }
+      }
+   }
+}
+
+
+void SK_(detect_memory_leaks) ( void )
+{
+   Int    i;
+   Int    blocks_leaked, bytes_leaked;
+   Int    blocks_dubious, bytes_dubious;
+   Int    blocks_reachable, bytes_reachable;
+   Int    n_lossrecords;
+   UInt   bytes_notified;
+   
+   LossRecord*  errlist;
+   LossRecord*  p;
+
+   PROF_EVENT(76);
+
+   /* VG_(get_malloc_shadows) allocates storage for shadows */
+   vglc_shadows = VG_(get_malloc_shadows)( &vglc_n_shadows );
+   if (vglc_n_shadows == 0) {
+      vg_assert(vglc_shadows == NULL);
+      VG_(message)(Vg_UserMsg, 
+                   "No malloc'd blocks -- no leaks are possible.\n");
+      return;
+   }
+
+   VG_(message)(Vg_UserMsg, 
+                "searching for pointers to %d not-freed blocks.", 
+                vglc_n_shadows );
+   sort_malloc_shadows ( vglc_shadows, vglc_n_shadows );
+
+   /* Sanity check; assert that the blocks are now in order and that
+      they don't overlap. */
+   for (i = 0; i < vglc_n_shadows-1; i++) {
+      vg_assert( ((Addr)vglc_shadows[i]->data)
+                 < ((Addr)vglc_shadows[i+1]->data) );
+      vg_assert( ((Addr)vglc_shadows[i]->data) + vglc_shadows[i]->size
+                 < ((Addr)vglc_shadows[i+1]->data) );
+   }
+
+   vglc_min_mallocd_addr = ((Addr)vglc_shadows[0]->data);
+   vglc_max_mallocd_addr = ((Addr)vglc_shadows[vglc_n_shadows-1]->data)
+                         + vglc_shadows[vglc_n_shadows-1]->size - 1;
+
+   vglc_reachedness 
+      = VG_(malloc)( vglc_n_shadows * sizeof(Reachedness) );
+   for (i = 0; i < vglc_n_shadows; i++)
+      vglc_reachedness[i] = Unreached;
+
+   /* Do the scan of memory. */
+   bytes_notified
+       = VG_(scan_all_valid_memory)( &vg_detect_memory_leaks_notify_addr )
+         * VKI_BYTES_PER_WORD;
+
+   VG_(message)(Vg_UserMsg, "checked %d bytes.", bytes_notified);
+
+   blocks_leaked    = bytes_leaked    = 0;
+   blocks_dubious   = bytes_dubious   = 0;
+   blocks_reachable = bytes_reachable = 0;
+
+   for (i = 0; i < vglc_n_shadows; i++) {
+      if (vglc_reachedness[i] == Unreached) {
+         blocks_leaked++;
+         bytes_leaked += vglc_shadows[i]->size;
+      }
+      else if (vglc_reachedness[i] == Interior) {
+         blocks_dubious++;
+         bytes_dubious += vglc_shadows[i]->size;
+      }
+      else if (vglc_reachedness[i] == Proper) {
+         blocks_reachable++;
+         bytes_reachable += vglc_shadows[i]->size;
+      }
+   }
+
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, "definitely lost: %d bytes in %d blocks.", 
+                            bytes_leaked, blocks_leaked );
+   VG_(message)(Vg_UserMsg, "possibly lost:   %d bytes in %d blocks.", 
+                            bytes_dubious, blocks_dubious );
+   VG_(message)(Vg_UserMsg, "still reachable: %d bytes in %d blocks.", 
+                            bytes_reachable, blocks_reachable );
+
+
+   /* Common up the lost blocks so we can print sensible error
+      messages. */
+
+   n_lossrecords = 0;
+   errlist       = NULL;
+   for (i = 0; i < vglc_n_shadows; i++) {
+     
+      /* 'where' stored in 'skin_extra' field */
+      ExeContext* where = get_where ( vglc_shadows[i] );
+
+      for (p = errlist; p != NULL; p = p->next) {
+         if (p->loss_mode == vglc_reachedness[i]
+             && VG_(eq_ExeContext) ( SK_(clo_leak_resolution),
+                                     p->allocated_at, 
+                                     where) ) {
+            break;
+	 }
+      }
+      if (p != NULL) {
+         p->num_blocks  ++;
+         p->total_bytes += vglc_shadows[i]->size;
+      } else {
+         n_lossrecords ++;
+         p = VG_(malloc)(sizeof(LossRecord));
+         p->loss_mode    = vglc_reachedness[i];
+         p->allocated_at = where;
+         p->total_bytes  = vglc_shadows[i]->size;
+         p->num_blocks   = 1;
+         p->next         = errlist;
+         errlist         = p;
+      }
+   }
+   
+   for (i = 0; i < n_lossrecords; i++) {
+      LossRecord* p_min = NULL;
+      UInt        n_min = 0xFFFFFFFF;
+      for (p = errlist; p != NULL; p = p->next) {
+         if (p->num_blocks > 0 && p->total_bytes < n_min) {
+            n_min = p->total_bytes;
+            p_min = p;
+         }
+      }
+      vg_assert(p_min != NULL);
+
+      if ( (!SK_(clo_show_reachable)) && p_min->loss_mode == Proper) {
+         p_min->num_blocks = 0;
+         continue;
+      }
+
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(
+         Vg_UserMsg,
+         "%d bytes in %d blocks are %s in loss record %d of %d",
+         p_min->total_bytes, p_min->num_blocks,
+         p_min->loss_mode==Unreached ? "definitely lost" :
+            (p_min->loss_mode==Interior ? "possibly lost"
+                                        : "still reachable"),
+         i+1, n_lossrecords
+      );
+      VG_(pp_ExeContext)(p_min->allocated_at);
+      p_min->num_blocks = 0;
+   }
+
+   VG_(message)(Vg_UserMsg, "");
+   VG_(message)(Vg_UserMsg, "LEAK SUMMARY:");
+   VG_(message)(Vg_UserMsg, "   definitely lost: %d bytes in %d blocks.", 
+                            bytes_leaked, blocks_leaked );
+   VG_(message)(Vg_UserMsg, "   possibly lost:   %d bytes in %d blocks.", 
+                            bytes_dubious, blocks_dubious );
+   VG_(message)(Vg_UserMsg, "   still reachable: %d bytes in %d blocks.", 
+                            bytes_reachable, blocks_reachable );
+   if (!SK_(clo_show_reachable)) {
+      VG_(message)(Vg_UserMsg, 
+         "Reachable blocks (those to which a pointer was found) are not shown.");
+      VG_(message)(Vg_UserMsg, 
+         "To see them, rerun with: --show-reachable=yes");
+   }
+   VG_(message)(Vg_UserMsg, "");
+
+   VG_(free) ( vglc_shadows );
+   VG_(free) ( vglc_reachedness );
+}
+
+
+/* ---------------------------------------------------------------------
+   Sanity check machinery (permanently engaged).
+   ------------------------------------------------------------------ */
+
+/* Check that nobody has spuriously claimed that the first or last 16
+   pages (64 KB) of address space have become accessible.  Failure of
+   the following do not per se indicate an internal consistency
+   problem, but they are so likely to that we really want to know
+   about it if so. */
+
+Bool SK_(cheap_sanity_check) ( void )
+{
+   if (IS_DISTINGUISHED_SM(primary_map[0]) && 
+       IS_DISTINGUISHED_SM(primary_map[65535]))
+      return True;
+   else
+      return False;
+}
+
+Bool SK_(expensive_sanity_check) ( void )
+{
+   Int i;
+
+   /* Make sure nobody changed the distinguished secondary. */
+   for (i = 0; i < 8192; i++)
+      if (distinguished_secondary_map.abits[i] != VGM_BYTE_INVALID)
+         return False;
+
+   for (i = 0; i < 65536; i++)
+      if (distinguished_secondary_map.vbyte[i] != VGM_BYTE_INVALID)
+         return False;
+
+   /* Make sure that the upper 3/4 of the primary map hasn't
+      been messed with. */
+   for (i = 65536; i < 262144; i++)
+      if (primary_map[i] != & distinguished_secondary_map)
+         return False;
+
+   return True;
+}
+      
+/* ---------------------------------------------------------------------
+   Debugging machinery (turn on to debug).  Something of a mess.
+   ------------------------------------------------------------------ */
+
+#if 0
+/* Print the value tags on the 8 integer registers & flag reg. */
+
+static void uint_to_bits ( UInt x, Char* str )
+{
+   Int i;
+   Int w = 0;
+   /* str must point to a space of at least 36 bytes. */
+   for (i = 31; i >= 0; i--) {
+      str[w++] = (x & ( ((UInt)1) << i)) ? '1' : '0';
+      if (i == 24 || i == 16 || i == 8)
+         str[w++] = ' ';
+   }
+   str[w++] = 0;
+   vg_assert(w == 36);
+}
+
+/* Caution!  Not vthread-safe; looks in VG_(baseBlock), not the thread
+   state table. */
+
+static void vg_show_reg_tags ( void )
+{
+   Char buf1[36];
+   Char buf2[36];
+   UInt z_eax, z_ebx, z_ecx, z_edx, 
+        z_esi, z_edi, z_ebp, z_esp, z_eflags;
+
+   z_eax    = VG_(baseBlock)[VGOFF_(sh_eax)];
+   z_ebx    = VG_(baseBlock)[VGOFF_(sh_ebx)];
+   z_ecx    = VG_(baseBlock)[VGOFF_(sh_ecx)];
+   z_edx    = VG_(baseBlock)[VGOFF_(sh_edx)];
+   z_esi    = VG_(baseBlock)[VGOFF_(sh_esi)];
+   z_edi    = VG_(baseBlock)[VGOFF_(sh_edi)];
+   z_ebp    = VG_(baseBlock)[VGOFF_(sh_ebp)];
+   z_esp    = VG_(baseBlock)[VGOFF_(sh_esp)];
+   z_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
+   
+   uint_to_bits(z_eflags, buf1);
+   VG_(message)(Vg_DebugMsg, "efl %\n", buf1);
+
+   uint_to_bits(z_eax, buf1);
+   uint_to_bits(z_ebx, buf2);
+   VG_(message)(Vg_DebugMsg, "eax %s   ebx %s\n", buf1, buf2);
+
+   uint_to_bits(z_ecx, buf1);
+   uint_to_bits(z_edx, buf2);
+   VG_(message)(Vg_DebugMsg, "ecx %s   edx %s\n", buf1, buf2);
+
+   uint_to_bits(z_esi, buf1);
+   uint_to_bits(z_edi, buf2);
+   VG_(message)(Vg_DebugMsg, "esi %s   edi %s\n", buf1, buf2);
+
+   uint_to_bits(z_ebp, buf1);
+   uint_to_bits(z_esp, buf2);
+   VG_(message)(Vg_DebugMsg, "ebp %s   esp %s\n", buf1, buf2);
+}
+
+
+/* For debugging only.  Scan the address space and touch all allegedly
+   addressible words.  Useful for establishing where Valgrind's idea of
+   addressibility has diverged from what the kernel believes. */
+
+static 
+void zzzmemscan_notify_word ( Addr a, UInt w )
+{
+}
+
+void zzzmemscan ( void )
+{
+   Int n_notifies
+      = VG_(scan_all_valid_memory)( zzzmemscan_notify_word );
+   VG_(printf)("zzzmemscan: n_bytes = %d\n", 4 * n_notifies );
+}
+#endif
+
+
+
+
+#if 0
+static Int zzz = 0;
+
+void show_bb ( Addr eip_next )
+{
+   VG_(printf)("[%4d] ", zzz);
+   vg_show_reg_tags( &VG_(m_shadow );
+   VG_(translate) ( eip_next, NULL, NULL, NULL );
+}
+#endif /* 0 */
+
+/*------------------------------------------------------------*/
+/*--- Syscall wrappers                                     ---*/
+/*------------------------------------------------------------*/
+
+void* SK_(pre_syscall)  ( ThreadId tid, UInt syscallno, Bool isBlocking )
+{
+   Int sane = SK_(cheap_sanity_check)();
+   return (void*)sane;
+}
+
+void  SK_(post_syscall) ( ThreadId tid, UInt syscallno,
+                           void* pre_result, Int res, Bool isBlocking )
+{
+   Int  sane_before_call = (Int)pre_result;
+   Bool sane_after_call  = SK_(cheap_sanity_check)();
+
+   if ((Int)sane_before_call && (!sane_after_call)) {
+      VG_(message)(Vg_DebugMsg, "post-syscall: ");
+      VG_(message)(Vg_DebugMsg,
+                   "probable sanity check failure for syscall number %d\n",
+                   syscallno );
+      VG_(panic)("aborting due to the above ... bye!");
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Setup                                                ---*/
+/*------------------------------------------------------------*/
+
+void SK_(written_shadow_regs_values)( UInt* gen_reg_value, UInt* eflags_value )
+{
+   *gen_reg_value = VGM_WORD_VALID;
+   *eflags_value  = VGM_EFLAGS_VALID;
+}
+
+Bool SK_(process_cmd_line_option)(Char* arg)
+{
+#  define STREQ(s1,s2)     (0==VG_(strcmp_ws)((s1),(s2)))
+#  define STREQN(nn,s1,s2) (0==VG_(strncmp_ws)((s1),(s2),(nn)))
+
+   if      (STREQ(arg, "--partial-loads-ok=yes"))
+      SK_(clo_partial_loads_ok) = True;
+   else if (STREQ(arg, "--partial-loads-ok=no"))
+      SK_(clo_partial_loads_ok) = False;
+
+   else if (STREQN(15, arg, "--freelist-vol=")) {
+      SK_(clo_freelist_vol) = (Int)VG_(atoll)(&arg[15]);
+      if (SK_(clo_freelist_vol) < 0) SK_(clo_freelist_vol) = 0;
+   }
+
+   else if (STREQ(arg, "--leak-check=yes"))
+      SK_(clo_leak_check) = True;
+   else if (STREQ(arg, "--leak-check=no"))
+      SK_(clo_leak_check) = False;
+
+   else if (STREQ(arg, "--leak-resolution=low"))
+      SK_(clo_leak_resolution) = Vg_LowRes;
+   else if (STREQ(arg, "--leak-resolution=med"))
+      SK_(clo_leak_resolution) = Vg_MedRes;
+   else if (STREQ(arg, "--leak-resolution=high"))
+      SK_(clo_leak_resolution) = Vg_HighRes;
+   
+   else if (STREQ(arg, "--show-reachable=yes"))
+      SK_(clo_show_reachable) = True;
+   else if (STREQ(arg, "--show-reachable=no"))
+      SK_(clo_show_reachable) = False;
+
+   else if (STREQ(arg, "--workaround-gcc296-bugs=yes"))
+      SK_(clo_workaround_gcc296_bugs) = True;
+   else if (STREQ(arg, "--workaround-gcc296-bugs=no"))
+      SK_(clo_workaround_gcc296_bugs) = False;
+
+   else if (STREQ(arg, "--check-addrVs=yes"))
+      SK_(clo_check_addrVs) = True;
+   else if (STREQ(arg, "--check-addrVs=no"))
+      SK_(clo_check_addrVs) = False;
+
+   else if (STREQ(arg, "--cleanup=yes"))
+      SK_(clo_cleanup) = True;
+   else if (STREQ(arg, "--cleanup=no"))
+      SK_(clo_cleanup) = False;
+
+   else
+      return False;
+
+   return True;
+
+#undef STREQ
+#undef STREQN
+}
+
+Char* SK_(usage)(void)
+{  
+   return  
+"    --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
+"    --freelist-vol=<number>   volume of freed blocks queue [1000000]\n"
+"    --leak-check=no|yes       search for memory leaks at exit? [no]\n"
+"    --leak-resolution=low|med|high\n"
+"                              amount of bt merging in leak check [low]\n"
+"    --show-reachable=no|yes   show reachable blocks in leak check? [no]\n"
+"    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
+"    --check-addrVs=no|yes     experimental lighterweight checking? [yes]\n"
+"                              yes == Valgrind's original behaviour\n"
+"\n"
+"    --cleanup=no|yes          improve after instrumentation? [yes]\n";
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Setup                                                ---*/
+/*------------------------------------------------------------*/
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* track)
+{
+   needs->name                    = "valgrind";
+   needs->description             = "a memory error detector";
+
+   needs->core_errors             = True;
+   needs->skin_errors             = True;
+   needs->run_libc_freeres        = True;
+
+   needs->sizeof_shadow_block     = 1;
+
+   needs->basic_block_discards    = False;
+   needs->shadow_regs             = True;
+   needs->command_line_options    = True;
+   needs->client_requests         = True;
+   needs->extended_UCode          = True;
+   needs->syscall_wrapper         = True;
+   needs->alternative_free        = True;
+   needs->sanity_checks           = True;
+
+   VG_(register_compact_helper)((Addr) & SK_(helper_value_check4_fail));
+   VG_(register_compact_helper)((Addr) & SK_(helper_value_check0_fail));
+   VG_(register_compact_helper)((Addr) & SK_(helperc_STOREV4));
+   VG_(register_compact_helper)((Addr) & SK_(helperc_STOREV1));
+   VG_(register_compact_helper)((Addr) & SK_(helperc_LOADV4));
+   VG_(register_compact_helper)((Addr) & SK_(helperc_LOADV1));
+
+   /* These two made non-compact because 2-byte transactions are rare. */
+   VG_(register_noncompact_helper)((Addr) & SK_(helperc_STOREV2));
+   VG_(register_noncompact_helper)((Addr) & SK_(helperc_LOADV2));
+   VG_(register_noncompact_helper)((Addr) & SK_(fpu_write_check));
+   VG_(register_noncompact_helper)((Addr) & SK_(fpu_read_check));
+   VG_(register_noncompact_helper)((Addr) & SK_(helper_value_check2_fail));
+   VG_(register_noncompact_helper)((Addr) & SK_(helper_value_check1_fail));
+
+   /* Events to track */
+   track->new_mem_startup       = & memcheck_new_mem_startup;
+   track->new_mem_heap          = & memcheck_new_mem_heap;
+   track->new_mem_stack         = & SK_(make_writable);
+   track->new_mem_stack_aligned = & make_writable_aligned;
+   track->new_mem_stack_signal  = & SK_(make_writable);
+   track->new_mem_brk           = & SK_(make_writable);
+   track->new_mem_mmap          = & memcheck_set_perms;
+   
+   track->copy_mem_heap         = & copy_address_range_state;
+   track->copy_mem_remap        = & copy_address_range_state;
+   track->change_mem_mprotect   = & memcheck_set_perms;
+      
+   track->ban_mem_heap          = & SK_(make_noaccess);
+   track->ban_mem_stack         = & SK_(make_noaccess);
+
+   track->die_mem_heap          = & SK_(make_noaccess);
+   track->die_mem_stack         = & SK_(make_noaccess);
+   track->die_mem_stack_aligned = & make_noaccess_aligned; 
+   track->die_mem_stack_signal  = & SK_(make_noaccess); 
+   track->die_mem_brk           = & SK_(make_noaccess);
+   track->die_mem_munmap        = & SK_(make_noaccess); 
+
+   track->bad_free              = & SK_(record_free_error);
+   track->mismatched_free       = & SK_(record_freemismatch_error);
+
+   track->pre_mem_read          = & check_is_readable;
+   track->pre_mem_read_asciiz   = & check_is_readable_asciiz;
+   track->pre_mem_write         = & check_is_writable;
+   track->post_mem_write        = & SK_(make_readable);
+
+   init_shadow_memory();
+
+   init_prof_mem();
+
+   VGP_(register_profile_event) ( VgpSetMem,   "set-mem-perms" );
+   VGP_(register_profile_event) ( VgpCheckMem, "check-mem-perms" );
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                            vg_memcheck.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_memcheck.h b/vg_memcheck.h
new file mode 100644
index 0000000..b126ffb
--- /dev/null
+++ b/vg_memcheck.h
@@ -0,0 +1,197 @@
+
+/*
+   ----------------------------------------------------------------
+
+   Notice that the following BSD-style license applies to this one
+   file (vg_memcheck.h) only.  The entire rest of Valgrind is licensed
+   under the terms of the GNU General Public License, version 2.  See
+   the COPYING file in the source distribution for details.
+
+   ----------------------------------------------------------------
+
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward.  All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+   2. The origin of this software must not be misrepresented; you must 
+      not claim that you wrote the original software.  If you use this 
+      software in a product, an acknowledgment in the product 
+      documentation would be appreciated but is not required.
+
+   3. Altered source versions must be plainly marked as such, and must
+      not be misrepresented as being the original software.
+
+   4. The name of the author may not be used to endorse or promote 
+      products derived from this software without specific prior written 
+      permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   ----------------------------------------------------------------
+
+   Notice that the above BSD-style license applies to this one file
+   (vg_memcheck.h) only.  The entire rest of Valgrind is licensed under
+   the terms of the GNU General Public License, version 2.  See the
+   COPYING file in the source distribution for details.
+
+   ---------------------------------------------------------------- 
+*/
+
+
+#ifndef __VG_MEMCHECK_H
+#define __VG_MEMCHECK_H
+
+
+/* This file is for inclusion into client (your!) code.
+
+   You can use these macros to manipulate and query memory permissions
+   inside your own programs.
+
+   See comment near the top of valgrind.h on how to use them.
+*/
+
+#include "valgrind.h"
+
+typedef
+   enum { 
+      VG_USERREQ__MAKE_NOACCESS = VG_USERREQ__FINAL_DUMMY_CLIENT_REQUEST + 1, 
+      VG_USERREQ__MAKE_WRITABLE,
+      VG_USERREQ__MAKE_READABLE,
+      VG_USERREQ__DISCARD,
+      VG_USERREQ__CHECK_WRITABLE,
+      VG_USERREQ__CHECK_READABLE,
+      VG_USERREQ__MAKE_NOACCESS_STACK,
+      VG_USERREQ__DO_LEAK_CHECK, /* untested */
+   } Vg_MemCheckClientRequest;
+
+
+
+/* Client-code macros to manipulate the state of memory. */
+
+/* Mark memory at _qzz_addr as unaddressible and undefined for
+   _qzz_len bytes.  Returns an int handle pertaining to the block
+   descriptions Valgrind will use in subsequent error messages. */
+#define VALGRIND_MAKE_NOACCESS(_qzz_addr,_qzz_len)               \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
+                            VG_USERREQ__MAKE_NOACCESS,           \
+                            _qzz_addr, _qzz_len, 0, 0);          \
+    _qzz_res;                                                    \
+   }) 
+      
+/* Similarly, mark memory at _qzz_addr as addressible but undefined
+   for _qzz_len bytes. */
+#define VALGRIND_MAKE_WRITABLE(_qzz_addr,_qzz_len)               \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
+                            VG_USERREQ__MAKE_WRITABLE,           \
+                            _qzz_addr, _qzz_len, 0, 0);          \
+    _qzz_res;                                                    \
+   })
+
+/* Similarly, mark memory at _qzz_addr as addressible and defined
+   for _qzz_len bytes. */
+#define VALGRIND_MAKE_READABLE(_qzz_addr,_qzz_len)               \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
+                            VG_USERREQ__MAKE_READABLE,           \
+                            _qzz_addr, _qzz_len, 0, 0);          \
+    _qzz_res;                                                    \
+   })
+
+/* Discard a block-description-handle obtained from the above three
+   macros.  After this, Valgrind will no longer be able to relate
+   addressing errors to the user-defined block associated with the
+   handle.  The permissions settings associated with the handle remain
+   in place.  Returns 1 for an invalid handle, 0 for a valid
+   handle. */
+#define VALGRIND_DISCARD(_qzz_blkindex)                          \
+   ({unsigned int _qzz_res;                                      \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */,    \
+                            VG_USERREQ__DISCARD,                 \
+                            0, _qzz_blkindex, 0, 0);             \
+    _qzz_res;                                                    \
+   })
+
+
+/* Client-code macros to check the state of memory. */
+
+/* Check that memory at _qzz_addr is addressible for _qzz_len bytes.
+   If suitable addressibility is not established, Valgrind prints an
+   error message and returns the address of the first offending byte.
+   Otherwise it returns zero. */
+#define VALGRIND_CHECK_WRITABLE(_qzz_addr,_qzz_len)                \
+   ({unsigned int _qzz_res;                                        \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
+                            VG_USERREQ__CHECK_WRITABLE,            \
+                            _qzz_addr, _qzz_len, 0, 0);            \
+    _qzz_res;                                                      \
+   })
+
+/* Check that memory at _qzz_addr is addressible and defined for
+   _qzz_len bytes.  If suitable addressibility and definedness are not
+   established, Valgrind prints an error message and returns the
+   address of the first offending byte.  Otherwise it returns zero. */
+#define VALGRIND_CHECK_READABLE(_qzz_addr,_qzz_len)                \
+   ({unsigned int _qzz_res;                                        \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
+                            VG_USERREQ__CHECK_READABLE,            \
+                            _qzz_addr, _qzz_len, 0, 0);            \
+    _qzz_res;                                                      \
+   })
+
+/* Use this macro to force the definedness and addressibility of a
+   value to be checked.  If suitable addressibility and definedness
+   are not established, Valgrind prints an error message and returns
+   the address of the first offending byte.  Otherwise it returns
+   zero. */
+#define VALGRIND_CHECK_DEFINED(__lvalue)                           \
+   (void)                                                          \
+   VALGRIND_CHECK_READABLE(                                        \
+      (volatile unsigned char *)&(__lvalue),                       \
+                      (unsigned int)(sizeof (__lvalue)))
+
+/* Mark memory, intended to be on the client's stack, at _qzz_addr as
+   unaddressible and undefined for _qzz_len bytes.  Does not return a
+   value.  The record associated with this setting will be
+   automatically removed by Valgrind when the containing routine
+   exits. */
+#define VALGRIND_MAKE_NOACCESS_STACK(_qzz_addr,_qzz_len)           \
+   {unsigned int _qzz_res;                                         \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
+                            VG_USERREQ__MAKE_NOACCESS_STACK,       \
+                            _qzz_addr, _qzz_len, 0, 0);            \
+   }
+
+
+
+/* Do a memory leak check mid-execution.
+   Currently implemented but untested.
+*/
+#define VALGRIND_DO_LEAK_CHECK                                     \
+   {unsigned int _qzz_res;                                         \
+    VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0,                           \
+                            VG_USERREQ__DO_LEAK_CHECK,             \
+                            0, 0, 0, 0);                           \
+   }
+
+
+#endif
diff --git a/vg_memcheck_clientreqs.c b/vg_memcheck_clientreqs.c
new file mode 100644
index 0000000..b5284bd
--- /dev/null
+++ b/vg_memcheck_clientreqs.c
@@ -0,0 +1,367 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Part of the MemCheck skin: for when the client advises       ---*/
+/*--- Valgrind about memory permissions.                           ---*/
+/*---                                     vg_memcheck_clientreqs.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_memcheck_include.h"
+
+#include "vg_memcheck.h"  /* for VG_USERREQ__* */
+
+
+/*------------------------------------------------------------*/
+/*--- General client block management.                     ---*/
+/*------------------------------------------------------------*/
+
+/* This is managed as an expanding array of client block descriptors.
+   Indices of live descriptors are issued to the client, so it can ask
+   to free them later.  Therefore we cannot slide live entries down
+   over dead ones.  Instead we must use free/inuse flags and scan for
+   an empty slot at allocation time.  This in turn means allocation is
+   relatively expensive, so we hope this does not happen too often. 
+*/
+
+typedef
+   enum { CG_NotInUse, CG_NoAccess, CG_Writable, CG_Readable }
+   CGenBlockKind;
+
+typedef
+   struct {
+      Addr          start;
+      UInt          size;
+      ExeContext*   where;
+      CGenBlockKind kind;
+   } 
+   CGenBlock;
+
+/* This subsystem is self-initialising. */
+static UInt       vg_cgb_size = 0;
+static UInt       vg_cgb_used = 0;
+static CGenBlock* vg_cgbs     = NULL;
+
+/* Stats for this subsystem. */
+static UInt vg_cgb_used_MAX = 0;   /* Max in use. */
+static UInt vg_cgb_allocs   = 0;   /* Number of allocs. */
+static UInt vg_cgb_discards = 0;   /* Number of discards. */
+static UInt vg_cgb_search   = 0;   /* Number of searches. */
+
+
+static
+Int vg_alloc_client_block ( void )
+{
+   Int        i, sz_new;
+   CGenBlock* cgbs_new;
+
+   vg_cgb_allocs++;
+
+   for (i = 0; i < vg_cgb_used; i++) {
+      vg_cgb_search++;
+      if (vg_cgbs[i].kind == CG_NotInUse)
+         return i;
+   }
+
+   /* Not found.  Try to allocate one at the end. */
+   if (vg_cgb_used < vg_cgb_size) {
+      vg_cgb_used++;
+      return vg_cgb_used-1;
+   }
+
+   /* Ok, we have to allocate a new one. */
+   vg_assert(vg_cgb_used == vg_cgb_size);
+   sz_new = (vg_cgbs == NULL) ? 10 : (2 * vg_cgb_size);
+
+   cgbs_new = VG_(malloc)( sz_new * sizeof(CGenBlock) );
+   for (i = 0; i < vg_cgb_used; i++) 
+      cgbs_new[i] = vg_cgbs[i];
+
+   if (vg_cgbs != NULL)
+      VG_(free)( vg_cgbs );
+   vg_cgbs = cgbs_new;
+
+   vg_cgb_size = sz_new;
+   vg_cgb_used++;
+   if (vg_cgb_used > vg_cgb_used_MAX)
+      vg_cgb_used_MAX = vg_cgb_used;
+   return vg_cgb_used-1;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Stack block management.                              ---*/
+/*------------------------------------------------------------*/
+
+/* This is managed as an expanding array of CStackBlocks.  They are
+   packed up against the left-hand end of the array, with no holes.
+   They are kept sorted by the start field, with the [0] having the
+   highest value.  This means it's pretty cheap to put new blocks at
+   the end, corresponding to stack pushes, since the additions put
+   blocks on in what is presumably fairly close to strictly descending
+   order.  If this assumption doesn't hold the performance
+   consequences will be horrible.
+
+   When the client's %ESP jumps back upwards as the result of a RET
+   insn, we shrink the array backwards from the end, in a
+   guaranteed-cheap linear scan.  
+*/
+
+typedef
+   struct {
+      Addr        start;
+      UInt        size;
+      ExeContext* where;
+   } 
+   CStackBlock;
+
+/* This subsystem is self-initialising. */
+static UInt         vg_csb_size = 0;
+static UInt         vg_csb_used = 0;
+static CStackBlock* vg_csbs     = NULL;
+
+/* Stats for this subsystem. */
+static UInt vg_csb_used_MAX = 0;   /* Max in use. */
+static UInt vg_csb_allocs   = 0;   /* Number of allocs. */
+static UInt vg_csb_discards = 0;   /* Number of discards. */
+static UInt vg_csb_swaps    = 0;   /* Number of searches. */
+
+static
+void vg_add_client_stack_block ( ThreadState* tst, Addr aa, UInt sz )
+{
+   UInt i, sz_new;
+   CStackBlock* csbs_new;
+   vg_csb_allocs++;
+
+   /* Ensure there is space for a new block. */
+
+   if (vg_csb_used >= vg_csb_size) {
+
+      /* No; we have to expand the array. */
+      vg_assert(vg_csb_used == vg_csb_size);
+
+      sz_new = (vg_csbs == NULL) ? 10 : (2 * vg_csb_size);
+
+      csbs_new = VG_(malloc)( sz_new * sizeof(CStackBlock) );
+      for (i = 0; i < vg_csb_used; i++) 
+        csbs_new[i] = vg_csbs[i];
+
+      if (vg_csbs != NULL)
+         VG_(free)( vg_csbs );
+      vg_csbs = csbs_new;
+
+      vg_csb_size = sz_new;
+   }
+
+   /* Ok, we can use [vg_csb_used]. */
+   vg_csbs[vg_csb_used].start = aa;
+   vg_csbs[vg_csb_used].size  = sz;
+   /* Actually running a thread at this point. */
+   vg_csbs[vg_csb_used].where = VG_(get_ExeContext) ( tst );
+   vg_csb_used++;
+
+   if (vg_csb_used > vg_csb_used_MAX)
+      vg_csb_used_MAX = vg_csb_used;
+
+   vg_assert(vg_csb_used <= vg_csb_size);
+
+   /* VG_(printf)("acsb  %p %d\n", aa, sz); */
+   SK_(make_noaccess) ( aa, sz );
+
+   /* And make sure that they are in descending order of address. */
+   i = vg_csb_used;
+   while (i > 0 && vg_csbs[i-1].start < vg_csbs[i].start) {
+      CStackBlock tmp = vg_csbs[i-1];
+      vg_csbs[i-1] = vg_csbs[i];
+      vg_csbs[i] = tmp;
+      vg_csb_swaps++;
+   }
+
+#  if 1
+   for (i = 1; i < vg_csb_used; i++)
+      vg_assert(vg_csbs[i-1].start >= vg_csbs[i].start);
+#  endif
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Externally visible functions.                        ---*/
+/*------------------------------------------------------------*/
+
+void SK_(show_client_block_stats) ( void )
+{
+   VG_(message)(Vg_DebugMsg, 
+      "general CBs: %d allocs, %d discards, %d maxinuse, %d search",
+      vg_cgb_allocs, vg_cgb_discards, vg_cgb_used_MAX, vg_cgb_search 
+   );
+   VG_(message)(Vg_DebugMsg, 
+      "  stack CBs: %d allocs, %d discards, %d maxinuse, %d swap",
+      vg_csb_allocs, vg_csb_discards, vg_csb_used_MAX, vg_csb_swaps
+   );
+}
+
+Bool SK_(client_perm_maybe_describe)( Addr a, AddrInfo* ai )
+{
+   Int i;
+   /* VG_(printf)("try to identify %d\n", a); */
+
+   /* First see if it's a stack block.  We do two passes, one exact
+      and one with a bit of slop, so as to try and get the most
+      accurate fix. */
+   for (i = 0; i < vg_csb_used; i++) {
+      if (vg_csbs[i].start <= a
+          && a < vg_csbs[i].start + vg_csbs[i].size) {
+         ai->akind = UserS;
+         ai->blksize = vg_csbs[i].size;
+         ai->rwoffset  = (Int)(a) - (Int)(vg_csbs[i].start);
+         ai->lastchange = vg_csbs[i].where;
+         return True;
+      }
+   }
+
+   /* No exact match on the stack.  Re-do the stack scan with a bit of
+      slop. */
+   for (i = 0; i < vg_csb_used; i++) {
+      if (vg_csbs[i].start - 8 <= a
+          && a < vg_csbs[i].start + vg_csbs[i].size + 8) {
+         ai->akind = UserS;
+         ai->blksize = vg_csbs[i].size;
+         ai->rwoffset  = (Int)(a) - (Int)(vg_csbs[i].start);
+         ai->lastchange = vg_csbs[i].where;
+         return True;
+      }
+   }
+
+   /* No match on the stack.  Perhaps it's a general block ? */
+   for (i = 0; i < vg_cgb_used; i++) {
+      if (vg_cgbs[i].kind == CG_NotInUse) 
+         continue;
+      if (VG_(addr_is_in_block)(a, vg_cgbs[i].start, vg_cgbs[i].size)) {
+         ai->akind = UserG;
+         ai->blksize = vg_cgbs[i].size;
+         ai->rwoffset  = (Int)(a) - (Int)(vg_cgbs[i].start);
+         ai->lastchange = vg_cgbs[i].where;
+         return True;
+      }
+   }
+   return False;
+}
+
+
+void SK_(delete_client_stack_blocks_following_ESP_change) ( void )
+{
+   Addr newESP = VG_(get_stack_pointer)();
+
+   while (vg_csb_used > 0 
+          && vg_csbs[vg_csb_used-1].start + vg_csbs[vg_csb_used-1].size 
+             <= newESP) {
+      vg_csb_used--;
+      vg_csb_discards++;
+      if (VG_(clo_verbosity) > 2)
+         VG_(printf)("discarding stack block %p for %d\n", 
+            (void*)vg_csbs[vg_csb_used].start, 
+            vg_csbs[vg_csb_used].size);
+   }
+}
+
+
+UInt SK_(handle_client_request) ( ThreadState* tst, UInt* arg_block )
+{
+   Int   i;
+   Bool  ok;
+   Addr  bad_addr;
+   UInt* arg = arg_block;
+
+   switch (arg[0]) {
+      case VG_USERREQ__CHECK_WRITABLE: /* check writable */
+         ok = SK_(check_writable) ( arg[1], arg[2], &bad_addr );
+         if (!ok)
+            SK_(record_user_error) ( tst, bad_addr, True );
+         return ok ? (UInt)NULL : bad_addr;
+
+      case VG_USERREQ__CHECK_READABLE: /* check readable */
+         ok = SK_(check_readable) ( arg[1], arg[2], &bad_addr );
+         if (!ok)
+            SK_(record_user_error) ( tst, bad_addr, False );
+         return ok ? (UInt)NULL : bad_addr;
+
+      case VG_USERREQ__DO_LEAK_CHECK:
+         SK_(detect_memory_leaks)();
+         return 0; /* return value is meaningless */
+
+      case VG_USERREQ__MAKE_NOACCESS: /* make no access */
+         i = vg_alloc_client_block();
+         /* VG_(printf)("allocated %d %p\n", i, vg_cgbs); */
+         vg_cgbs[i].kind  = CG_NoAccess;
+         vg_cgbs[i].start = arg[1];
+         vg_cgbs[i].size  = arg[2];
+         vg_cgbs[i].where = VG_(get_ExeContext) ( tst );
+         SK_(make_noaccess) ( arg[1], arg[2] );
+         return i;
+
+      case VG_USERREQ__MAKE_WRITABLE: /* make writable */
+         i = vg_alloc_client_block();
+         vg_cgbs[i].kind  = CG_Writable;
+         vg_cgbs[i].start = arg[1];
+         vg_cgbs[i].size  = arg[2];
+         vg_cgbs[i].where = VG_(get_ExeContext) ( tst );
+         SK_(make_writable) ( arg[1], arg[2] );
+         return i;
+
+      case VG_USERREQ__MAKE_READABLE: /* make readable */
+         i = vg_alloc_client_block();
+         vg_cgbs[i].kind  = CG_Readable;
+         vg_cgbs[i].start = arg[1];
+         vg_cgbs[i].size  = arg[2];
+         vg_cgbs[i].where = VG_(get_ExeContext) ( tst );
+         SK_(make_readable) ( arg[1], arg[2] );
+         return i;
+         
+      case VG_USERREQ__DISCARD: /* discard */
+         if (vg_cgbs == NULL 
+             || arg[2] >= vg_cgb_used || vg_cgbs[arg[2]].kind == CG_NotInUse)
+            return 1;
+         vg_assert(arg[2] >= 0 && arg[2] < vg_cgb_used);
+         vg_cgbs[arg[2]].kind = CG_NotInUse;
+         vg_cgb_discards++;
+         return 0;
+
+      case VG_USERREQ__MAKE_NOACCESS_STACK: /* make noaccess stack block */
+         vg_add_client_stack_block ( tst, arg[1], arg[2] );
+         return 0;
+
+      default:
+         VG_(message)(Vg_UserMsg, 
+                      "Warning: unknown memcheck client request code %d",
+                      arg[0]);
+         return 1;
+   }
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                 vg_memcheck_clientreqs.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_memcheck_errcontext.c b/vg_memcheck_errcontext.c
new file mode 100644
index 0000000..81f420c
--- /dev/null
+++ b/vg_memcheck_errcontext.c
@@ -0,0 +1,610 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Part of the MemCheck skin: management of memory error        ---*/
+/*--- messages.                                                    ---*/
+/*---                                     vg_memcheck_errcontext.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_memcheck_include.h"
+
+/*------------------------------------------------------------*/
+/*--- Defns                                                ---*/
+/*------------------------------------------------------------*/
+
+/* These many bytes below %ESP are considered addressible if we're
+   doing the --workaround-gcc296-bugs hack. */
+#define VG_GCC296_BUG_STACK_SLOP 1024
+
+
+typedef 
+   enum { 
+      /* Bad syscall params */
+      ParamSupp,
+      /* Memory errors in core (pthread ops, signal handling) */
+      CoreMemSupp,
+      /* Use of invalid values of given size */
+      Value0Supp, Value1Supp, Value2Supp, Value4Supp, Value8Supp, 
+      /* Invalid read/write attempt at given size */
+      Addr1Supp, Addr2Supp, Addr4Supp, Addr8Supp,
+      /* Invalid or mismatching free */
+      FreeSupp
+   } 
+   MemCheckSuppKind;
+
+/* What kind of error it is. */
+typedef 
+   enum { ValueErr,
+          CoreMemErr,
+          AddrErr, 
+          ParamErr, UserErr,  /* behaves like an anonymous ParamErr */
+          FreeErr, FreeMismatchErr
+   }
+   MemCheckErrorKind;
+
+/* What kind of memory access is involved in the error? */
+typedef
+   enum { ReadAxs, WriteAxs, ExecAxs }
+   AxsKind;
+
+/* Extra context for memory errors */
+typedef
+   struct {
+      /* AddrErr */
+      AxsKind axskind;
+      /* AddrErr, ValueErr */
+      Int size;
+      /* AddrErr, FreeErr, FreeMismatchErr, ParamErr, UserErr */
+      AddrInfo addrinfo;
+      /* ParamErr, UserErr, CoreMemErr */
+      Bool isWrite;
+   }
+   MemCheckError;
+
+/*------------------------------------------------------------*/
+/*--- Comparing and printing errors                        ---*/
+/*------------------------------------------------------------*/
+
+static __inline__
+void clear_AddrInfo ( AddrInfo* ai )
+{
+   ai->akind      = Unknown;
+   ai->blksize    = 0;
+   ai->rwoffset   = 0;
+   ai->lastchange = NULL;
+   ai->stack_tid  = VG_INVALID_THREADID;
+   ai->maybe_gcc  = False;
+}
+
+static __inline__
+void clear_MemCheckError ( MemCheckError* err_extra )
+{
+   err_extra->axskind   = ReadAxs;
+   err_extra->size      = 0;
+   clear_AddrInfo ( &err_extra->addrinfo );
+   err_extra->isWrite   = False;
+}
+
+__attribute__ ((unused))
+static Bool eq_AddrInfo ( VgRes res, AddrInfo* ai1, AddrInfo* ai2 )
+{
+   if (ai1->akind != Undescribed 
+       && ai2->akind != Undescribed
+       && ai1->akind != ai2->akind) 
+      return False;
+   if (ai1->akind == Freed || ai1->akind == Mallocd) {
+      if (ai1->blksize != ai2->blksize)
+         return False;
+      if (!VG_(eq_ExeContext)(res, ai1->lastchange, ai2->lastchange))
+         return False;
+   }
+   return True;
+}
+
+/* Compare error contexts, to detect duplicates.  Note that if they
+   are otherwise the same, the faulting addrs and associated rwoffsets
+   are allowed to be different.  */
+
+Bool SK_(eq_SkinError) ( VgRes res,
+                          SkinError* e1, SkinError* e2 )
+{
+   MemCheckError* e1_extra = e1->extra;
+   MemCheckError* e2_extra = e2->extra;
+   
+   switch (e1->ekind) {
+      case CoreMemErr:
+         if (e1_extra->isWrite != e2_extra->isWrite)   return False;
+         if (e2->ekind != CoreMemErr)                  return False; 
+         if (e1->string == e2->string)                 return True;
+         if (0 == VG_(strcmp)(e1->string, e2->string)) return True;
+         return False;
+
+      case UserErr:
+      case ParamErr:
+         if (e1_extra->isWrite != e2_extra->isWrite)
+            return False;
+         if (e1->ekind == ParamErr 
+             && 0 != VG_(strcmp)(e1->string, e2->string))
+            return False;
+         return True;
+
+      case FreeErr:
+      case FreeMismatchErr:
+         /* JRS 2002-Aug-26: comparing addrs seems overkill and can
+            cause excessive duplication of errors.  Not even AddrErr
+            below does that.  So don't compare either the .addr field
+            or the .addrinfo fields. */
+         /* if (e1->addr != e2->addr) return False; */
+         /* if (!eq_AddrInfo(res, &e1_extra->addrinfo, &e2_extra->addrinfo)) 
+               return False;
+         */
+         return True;
+
+      case AddrErr:
+         /* if (e1_extra->axskind != e2_extra->axskind) return False; */
+         if (e1_extra->size != e2_extra->size) return False;
+         /*
+         if (!eq_AddrInfo(res, &e1_extra->addrinfo, &e2_extra->addrinfo)) 
+            return False;
+         */
+         return True;
+
+      case ValueErr:
+         if (e1_extra->size != e2_extra->size) return False;
+         return True;
+
+      default: 
+         VG_(printf)("Error:\n  unknown MemCheck error code %d\n", e1->ekind);
+         VG_(panic)("unknown error code in SK_(eq_SkinError)");
+   }
+}
+
+static void pp_AddrInfo ( Addr a, AddrInfo* ai )
+{
+   switch (ai->akind) {
+      case Stack: 
+         VG_(message)(Vg_UserMsg, 
+                      "   Address 0x%x is on thread %d's stack", 
+                      a, ai->stack_tid);
+         break;
+      case Unknown:
+         if (ai->maybe_gcc) {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is just below %%esp.  Possibly a bug in GCC/G++",
+               a);
+            VG_(message)(Vg_UserMsg, 
+               "   v 2.96 or 3.0.X.  To suppress, use: --workaround-gcc296-bugs=yes");
+	 } else {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is not stack'd, malloc'd or free'd", a);
+         }
+         break;
+      case Freed: case Mallocd: case UserG: case UserS: {
+         UInt delta;
+         UChar* relative;
+         if (ai->rwoffset < 0) {
+            delta    = (UInt)(- ai->rwoffset);
+            relative = "before";
+         } else if (ai->rwoffset >= ai->blksize) {
+            delta    = ai->rwoffset - ai->blksize;
+            relative = "after";
+         } else {
+            delta    = ai->rwoffset;
+            relative = "inside";
+         }
+         if (ai->akind == UserS) {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is %d bytes %s a %d-byte stack red-zone created",
+               a, delta, relative, 
+               ai->blksize );
+	 } else {
+            VG_(message)(Vg_UserMsg, 
+               "   Address 0x%x is %d bytes %s a block of size %d %s",
+               a, delta, relative, 
+               ai->blksize,
+               ai->akind==Mallocd ? "alloc'd" 
+                  : ai->akind==Freed ? "free'd" 
+                                     : "client-defined");
+         }
+         VG_(pp_ExeContext)(ai->lastchange);
+         break;
+      }
+      default:
+         VG_(panic)("pp_AddrInfo");
+   }
+}
+
+void SK_(pp_SkinError) ( SkinError* err, void (*pp_ExeContext)(void) )
+{
+   MemCheckError* err_extra = err->extra;
+
+   switch (err->ekind) {
+      case CoreMemErr:
+         if (err_extra->isWrite) {
+            VG_(message)(Vg_UserMsg, 
+               "%s contains unaddressable byte(s)", err->string );
+         } else {
+            VG_(message)(Vg_UserMsg, 
+                "%s contains uninitialised or unaddressable byte(s)",
+                err->string);
+         }
+         pp_ExeContext();
+         break;
+      
+      case ValueErr:
+         if (err_extra->size == 0) {
+             VG_(message)(
+                Vg_UserMsg,
+                "Conditional jump or move depends on uninitialised value(s)");
+         } else {
+             VG_(message)(Vg_UserMsg,
+                          "Use of uninitialised value of size %d",
+                          err_extra->size);
+         }
+         pp_ExeContext();
+         break;
+
+      case AddrErr:
+         switch (err_extra->axskind) {
+            case ReadAxs:
+               VG_(message)(Vg_UserMsg, "Invalid read of size %d", 
+                                        err_extra->size ); 
+               break;
+            case WriteAxs:
+               VG_(message)(Vg_UserMsg, "Invalid write of size %d", 
+                                        err_extra->size ); 
+               break;
+            case ExecAxs:
+               VG_(message)(Vg_UserMsg, "Jump to the invalid address "
+                                        "stated on the next line");
+               break;
+            default: 
+               VG_(panic)("pp_SkinError(axskind)");
+         }
+         pp_ExeContext();
+         pp_AddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      case FreeErr:
+         VG_(message)(Vg_UserMsg,"Invalid free() / delete / delete[]");
+         /* fall through */
+      case FreeMismatchErr:
+         if (err->ekind == FreeMismatchErr)
+            VG_(message)(Vg_UserMsg, 
+                         "Mismatched free() / delete / delete []");
+         pp_ExeContext();
+         pp_AddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      case ParamErr:
+         if (err_extra->isWrite) {
+            VG_(message)(Vg_UserMsg, 
+               "Syscall param %s contains unaddressable byte(s)",
+                err->string );
+         } else {
+            VG_(message)(Vg_UserMsg, 
+                "Syscall param %s contains uninitialised or "
+                "unaddressable byte(s)",
+            err->string);
+         }
+         pp_ExeContext();
+         pp_AddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      case UserErr:
+         if (err_extra->isWrite) {
+            VG_(message)(Vg_UserMsg, 
+               "Unaddressable byte(s) found during client check request");
+         } else {
+            VG_(message)(Vg_UserMsg, 
+               "Uninitialised or "
+               "unaddressable byte(s) found during client check request");
+         }
+         pp_ExeContext();
+         pp_AddrInfo(err->addr, &err_extra->addrinfo);
+         break;
+
+      default: 
+         VG_(printf)("Error:\n  unknown MemCheck error code %d\n", err->ekind);
+         VG_(panic)("unknown error code in SK_(pp_SkinError)");
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Recording errors                                     ---*/
+/*------------------------------------------------------------*/
+
+/* Describe an address as best you can, for error messages,
+   putting the result in ai. */
+
+static void describe_addr ( Addr a, AddrInfo* ai )
+{
+   ShadowChunk* sc;
+   Bool         ok;
+   ThreadId     tid;
+
+   /* Nested functions, yeah.  Need the lexical scoping of 'a'. */ 
+
+   /* Closure for searching thread stacks */
+   Bool addr_is_in_bounds(Addr stack_min, Addr stack_max)
+   {
+      return (stack_min <= a && a <= stack_max);
+   }
+   /* Closure for searching malloc'd and free'd lists */
+   Bool addr_is_in_block(ShadowChunk *sh_ch)
+   {
+      return VG_(addr_is_in_block) ( a, sh_ch->data, sh_ch->size );
+   }
+
+   /* Perhaps it's a user-def'd block ? */
+   ok = SK_(client_perm_maybe_describe)( a, ai );
+   if (ok)
+      return;
+   /* Perhaps it's on a thread's stack? */
+   tid = VG_(any_matching_thread_stack)(addr_is_in_bounds);
+   if (tid != VG_INVALID_THREADID) {
+      ai->akind     = Stack;
+      ai->stack_tid = tid;
+      return;
+   }
+   /* Search for a recently freed block which might bracket it. */
+   sc = SK_(any_matching_freed_ShadowChunks)(addr_is_in_block);
+   if (NULL != sc) {
+      ai->akind      = Freed;
+      ai->blksize    = sc->size;
+      ai->rwoffset   = (Int)(a) - (Int)(sc->data);
+      ai->lastchange = (ExeContext*)sc->skin_extra[0];
+      return;
+   }
+   /* Search for a currently malloc'd block which might bracket it. */
+   sc = VG_(any_matching_mallocd_ShadowChunks)(addr_is_in_block);
+   if (NULL != sc) {
+      ai->akind      = Mallocd;
+      ai->blksize    = sc->size;
+      ai->rwoffset   = (Int)(a) - (Int)(sc->data);
+      ai->lastchange = (ExeContext*)sc->skin_extra[0];
+      return;
+   } 
+   /* Clueless ... */
+   ai->akind = Unknown;
+   return;
+}
+
+
+/* Creates a copy of the err_extra, updates the copy with address info if
+   necessary, sticks the copy into the SkinError. */
+void SK_(dup_extra_and_update)(SkinError* err)
+{
+   MemCheckError* err_extra;
+
+   err_extra  = VG_(malloc)(sizeof(MemCheckError));
+   *err_extra = *((MemCheckError*)err->extra);
+
+   if (err_extra->addrinfo.akind == Undescribed)
+      describe_addr ( err->addr, &(err_extra->addrinfo) );
+
+   err->extra = err_extra;
+}
+
+/* These two are called from generated code. */
+void SK_(record_value_error) ( Int size )
+{
+   MemCheckError err_extra;
+
+   clear_MemCheckError( &err_extra );
+   err_extra.size = size;
+   VG_(maybe_record_error)( NULL, ValueErr, /*addr*/0, /*s*/NULL, &err_extra );
+}
+
+/* Is this address within some small distance below %ESP?  Used only
+   for the --workaround-gcc296-bugs kludge. */
+Bool VG_(is_just_below_ESP)( Addr esp, Addr aa )
+{
+   if ((UInt)esp > (UInt)aa
+       && ((UInt)esp - (UInt)aa) <= VG_GCC296_BUG_STACK_SLOP)
+      return True;
+   else
+      return False;
+}
+
+void SK_(record_address_error) ( Addr a, Int size, Bool isWrite )
+{
+   MemCheckError err_extra;
+   Bool          just_below_esp;
+
+   just_below_esp 
+      = VG_(is_just_below_ESP)( VG_(get_stack_pointer)(), a );
+
+   /* If this is caused by an access immediately below %ESP, and the
+      user asks nicely, we just ignore it. */
+   if (SK_(clo_workaround_gcc296_bugs) && just_below_esp)
+      return;
+
+   clear_MemCheckError( &err_extra );
+   err_extra.axskind = isWrite ? WriteAxs : ReadAxs;
+   err_extra.size    = size;
+   err_extra.addrinfo.akind     = Undescribed;
+   err_extra.addrinfo.maybe_gcc = just_below_esp;
+   VG_(maybe_record_error)( NULL, AddrErr, a, /*s*/NULL, &err_extra );
+}
+
+/* These ones are called from non-generated code */
+
+/* This is for memory errors in pthread functions, as opposed to pthread API
+   errors which are found by the core. */
+void SK_(record_core_mem_error) ( ThreadState* tst, Bool isWrite, Char* msg )
+{
+   MemCheckError err_extra;
+
+   clear_MemCheckError( &err_extra );
+   err_extra.isWrite = isWrite;
+   VG_(maybe_record_error)( tst, CoreMemErr, /*addr*/0, msg, &err_extra );
+}
+
+void SK_(record_param_error) ( ThreadState* tst, Addr a, Bool isWrite, 
+                               Char* msg )
+{
+   MemCheckError err_extra;
+
+   vg_assert(NULL != tst);
+   clear_MemCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   err_extra.isWrite = isWrite;
+   VG_(maybe_record_error)( tst, ParamErr, a, msg, &err_extra );
+}
+
+void SK_(record_jump_error) ( ThreadState* tst, Addr a )
+{
+   MemCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_MemCheckError( &err_extra );
+   err_extra.axskind = ExecAxs;
+   err_extra.addrinfo.akind = Undescribed;
+   VG_(maybe_record_error)( tst, AddrErr, a, /*s*/NULL, &err_extra );
+}
+
+void SK_(record_free_error) ( ThreadState* tst, Addr a ) 
+{
+   MemCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_MemCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   VG_(maybe_record_error)( tst, FreeErr, a, /*s*/NULL, &err_extra );
+}
+
+void SK_(record_freemismatch_error) ( ThreadState* tst, Addr a )
+{
+   MemCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_MemCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   VG_(maybe_record_error)( tst, FreeMismatchErr, a, /*s*/NULL, &err_extra );
+}
+
+void SK_(record_user_error) ( ThreadState* tst, Addr a, Bool isWrite )
+{
+   MemCheckError err_extra;
+
+   vg_assert(NULL != tst);
+
+   clear_MemCheckError( &err_extra );
+   err_extra.addrinfo.akind = Undescribed;
+   err_extra.isWrite        = isWrite;
+   VG_(maybe_record_error)( tst, UserErr, a, /*s*/NULL, &err_extra );
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Suppressions                                         ---*/
+/*------------------------------------------------------------*/
+
+#define STREQ(s1,s2) (s1 != NULL && s2 != NULL \
+                      && VG_(strcmp)((s1),(s2))==0)
+
+Bool SK_(recognised_suppression) ( Char* name, SuppKind *skind )
+{
+   if      (STREQ(name, "Param"))   *skind = ParamSupp;
+   else if (STREQ(name, "CoreMem")) *skind = CoreMemSupp;
+   else if (STREQ(name, "Value0"))  *skind = Value0Supp; /* backwards compat */ 
+   else if (STREQ(name, "Cond"))    *skind = Value0Supp;
+   else if (STREQ(name, "Value1"))  *skind = Value1Supp;
+   else if (STREQ(name, "Value2"))  *skind = Value2Supp;
+   else if (STREQ(name, "Value4"))  *skind = Value4Supp;
+   else if (STREQ(name, "Value8"))  *skind = Value8Supp;
+   else if (STREQ(name, "Addr1"))   *skind = Addr1Supp;
+   else if (STREQ(name, "Addr2"))   *skind = Addr2Supp;
+   else if (STREQ(name, "Addr4"))   *skind = Addr4Supp;
+   else if (STREQ(name, "Addr8"))   *skind = Addr8Supp;
+   else if (STREQ(name, "Free"))    *skind = FreeSupp;
+   else 
+      return False;
+
+   return True;
+}
+
+Bool SK_(read_extra_suppression_info) ( Int fd, Char* buf, Int nBuf, 
+                                         SkinSupp *s )
+{
+   Bool eof;
+
+   if (s->skind == ParamSupp) {
+      eof = VG_(getLine) ( fd, buf, nBuf );
+      if (eof) return False;
+      s->string = VG_(strdup)(buf);
+   }
+   return True;
+}
+
+extern Bool SK_(error_matches_suppression)(SkinError* err, SkinSupp* su)
+{
+   UInt su_size;
+   MemCheckError* err_extra = err->extra;
+
+   switch (su->skind) {
+      case ParamSupp:
+         return (err->ekind == ParamErr && STREQ(su->string, err->string));
+
+      case CoreMemSupp:
+         return (err->ekind == CoreMemErr && STREQ(su->string, err->string));
+
+      case Value0Supp: su_size = 0; goto value_case;
+      case Value1Supp: su_size = 1; goto value_case;
+      case Value2Supp: su_size = 2; goto value_case;
+      case Value4Supp: su_size = 4; goto value_case;
+      case Value8Supp: su_size = 8; goto value_case;
+      value_case:
+         return (err->ekind == ValueErr && err_extra->size == su_size);
+
+      case Addr1Supp: su_size = 1; goto addr_case;
+      case Addr2Supp: su_size = 2; goto addr_case;
+      case Addr4Supp: su_size = 4; goto addr_case;
+      case Addr8Supp: su_size = 8; goto addr_case;
+      addr_case:
+         return (err->ekind == AddrErr && err_extra->size != su_size);
+
+      case FreeSupp:
+         return (err->ekind == FreeErr || err->ekind == FreeMismatchErr);
+
+      default:
+         VG_(printf)("Error:\n"
+                     "  unknown MemCheck suppression type %d\n", su->skind);
+         VG_(panic)("unknown suppression type in "
+                    "SK_(error_matches_suppression)");
+   }
+}
+
+#  undef STREQ
+
+/*--------------------------------------------------------------------*/
+/*--- end                                 vg_memcheck_errcontext.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_memcheck_from_ucode.c b/vg_memcheck_from_ucode.c
new file mode 100644
index 0000000..82550b7
--- /dev/null
+++ b/vg_memcheck_from_ucode.c
@@ -0,0 +1,642 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Part of the MemCheck skin: Generate code for skin-specific   ---*/
+/*--- UInstrs.                                                     ---*/
+/*---                                     vg_memcheck_from_ucode.c ---*/
+/*--------------------------------------------------------------------*/
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_memcheck_include.h"
+
+/*------------------------------------------------------------*/
+/*--- Renamings of frequently-used global functions.       ---*/
+/*------------------------------------------------------------*/
+
+#define nameIReg  VG_(nameOfIntReg)
+#define nameISize VG_(nameOfIntSize)
+
+#define dis       VG_(print_codegen)
+
+/*------------------------------------------------------------*/
+/*--- Instruction emission -- turning final uinstrs back   ---*/
+/*--- into x86 code.                                       ---*/
+/*------------------------------------------------------------*/
+
+/* See the corresponding comment at the top of vg_from_ucode.c to find out
+ * how all this works */
+
+/*----------------------------------------------------*/
+/*--- v-size (4, or 2 with OSO) insn emitters      ---*/
+/*----------------------------------------------------*/
+
+static void emit_testv_lit_reg ( Int sz, UInt lit, Int reg )
+{
+   VG_(newEmit)();
+   if (sz == 2) {
+      VG_(emitB) ( 0x66 );
+   } else {
+      vg_assert(sz == 4);
+   }
+   VG_(emitB) ( 0xF7 ); /* Grp3 Ev */
+   VG_(emit_amode_ereg_greg) ( reg, 0 /* Grp3 subopcode for TEST */ );
+   if (sz == 2) VG_(emitW) ( lit ); else VG_(emitL) ( lit );
+   if (dis)
+      VG_(printf)("\n\t\ttest%c $0x%x, %s\n", nameISize(sz), 
+                                            lit, nameIReg(sz,reg));
+}
+
+static void emit_testv_lit_offregmem ( Int sz, UInt lit, Int off, Int reg )
+{
+   VG_(newEmit)();
+   if (sz == 2) {
+      VG_(emitB) ( 0x66 );
+   } else {
+      vg_assert(sz == 4);
+   }
+   VG_(emitB) ( 0xF7 ); /* Grp3 Ev */
+   VG_(emit_amode_offregmem_reg) ( off, reg, 0 /* Grp3 subopcode for TEST */ );
+   if (sz == 2) VG_(emitW) ( lit ); else VG_(emitL) ( lit );
+   if (dis)
+      VG_(printf)("\n\t\ttest%c $%d, 0x%x(%s)\n", 
+                  nameISize(sz), lit, off, nameIReg(4,reg) );
+}
+
+/*----------------------------------------------------*/
+/*--- Instruction synthesisers                     ---*/
+/*----------------------------------------------------*/
+
+/* Synthesise a minimal test (and which discards result) of reg32
+   against lit.  It's always safe do simply
+      emit_testv_lit_reg ( 4, lit, reg32 )
+   but we try to do better when possible.
+*/
+static void synth_minimal_test_lit_reg ( UInt lit, Int reg32 )
+{
+   if ((lit & 0xFFFFFF00) == 0 && reg32 < 4) {
+      /* We can get away with a byte insn. */
+      VG_(emit_testb_lit_reg) ( lit, reg32 );
+   }
+   else 
+   if ((lit & 0xFFFF0000) == 0) {
+      /* Literal fits in 16 bits; do a word insn. */
+      emit_testv_lit_reg ( 2, lit, reg32 );
+   }
+   else {
+      /* Totally general ... */
+      emit_testv_lit_reg ( 4, lit, reg32 );
+   }
+}
+
+/*----------------------------------------------------*/
+/*--- Top level of the uinstr -> x86 translation.  ---*/
+/*----------------------------------------------------*/
+
+static void synth_LOADV ( Int sz, Int a_reg, Int tv_reg,
+                          RRegSet regs_live_before,
+                          RRegSet regs_live_after )
+{
+   Addr helper;
+   UInt argv[] = { a_reg };
+   UInt tagv[] = { RealReg };
+
+   switch (sz) {
+      case 4: helper = (Addr) & SK_(helperc_LOADV4); break;
+      case 2: helper = (Addr) & SK_(helperc_LOADV2); break;
+      case 1: helper = (Addr) & SK_(helperc_LOADV1); break;
+      default: VG_(panic)("synth_LOADV");
+   }
+   VG_(synth_ccall) ( helper, 1, 1, argv, tagv, tv_reg,
+                      regs_live_before, regs_live_after );
+}
+
+
+static void synth_STOREV ( Int sz, Int tv_tag, Int tv_val, Int a_reg,
+                           RRegSet regs_live_before,
+                           RRegSet regs_live_after )
+{
+   Addr helper;
+   UInt argv[] = { a_reg,   tv_val };
+   Tag  tagv[] = { RealReg, tv_tag };
+
+   vg_assert(tv_tag == RealReg || tv_tag == Literal);
+   switch (sz) {
+      case 4: helper = (Addr) SK_(helperc_STOREV4); break;
+      case 2: helper = (Addr) SK_(helperc_STOREV2); break;
+      case 1: helper = (Addr) SK_(helperc_STOREV1); break;
+      default: VG_(panic)("synth_STOREV");
+   }
+   VG_(synth_ccall) ( helper, 2, 2, argv, tagv, INVALID_REALREG,
+                      regs_live_before, regs_live_after );
+}
+
+
+static void synth_SETV ( Int sz, Int reg )
+{
+   UInt val;
+   switch (sz) {
+      case 4: val = 0x00000000; break;
+      case 2: val = 0xFFFF0000; break;
+      case 1: val = 0xFFFFFF00; break;
+      case 0: val = 0xFFFFFFFE; break;
+      default: VG_(panic)("synth_SETV");
+   }
+   VG_(emit_movv_lit_reg) ( 4, val, reg );
+}
+
+
+static void synth_TESTV ( Int sz, Int tag, Int val )
+{
+   vg_assert(tag == ArchReg || tag == RealReg);
+   if (tag == ArchReg) {
+      switch (sz) {
+         case 4: 
+            emit_testv_lit_offregmem ( 
+               4, 0xFFFFFFFF, VG_(shadowRegOffset)(val), R_EBP );
+            break;
+         case 2: 
+            emit_testv_lit_offregmem ( 
+               4, 0x0000FFFF, VG_(shadowRegOffset)(val), R_EBP );
+            break;
+         case 1:
+            if (val < 4) {
+               emit_testv_lit_offregmem ( 
+                  4, 0x000000FF, VG_(shadowRegOffset)(val), R_EBP );
+            } else {
+               emit_testv_lit_offregmem ( 
+                  4, 0x0000FF00, VG_(shadowRegOffset)(val-4), R_EBP );
+            }
+            break;
+         case 0: 
+            /* should never happen */
+         default: 
+            VG_(panic)("synth_TESTV(ArchReg)");
+      }
+   } else {
+      switch (sz) {
+         case 4:
+            /* Works, but holds the entire 32-bit literal, hence
+               generating a 6-byte insn.  We want to know if any bits
+               in the reg are set, but since this is for the full reg,
+               we might as well compare it against zero, which can be
+               done with a shorter insn. */
+            /* synth_minimal_test_lit_reg ( 0xFFFFFFFF, val ); */
+            VG_(emit_cmpl_zero_reg) ( val );
+            break;
+         case 2:
+            synth_minimal_test_lit_reg ( 0x0000FFFF, val );
+            break;
+         case 1:
+            synth_minimal_test_lit_reg ( 0x000000FF, val );
+            break;
+         case 0:
+            synth_minimal_test_lit_reg ( 0x00000001, val );
+            break;
+         default: 
+            VG_(panic)("synth_TESTV(RealReg)");
+      }
+   }
+   VG_(emit_jcondshort_delta) ( CondZ, 3 );
+   VG_(synth_call) (
+      True, /* needed to guarantee that this insn is indeed 3 bytes long */
+      ( sz==4 
+      ? VG_(helper_offset)((Addr) & SK_(helper_value_check4_fail))
+      : ( sz==2 
+        ? VG_(helper_offset)((Addr) & SK_(helper_value_check2_fail))
+        : ( sz==1 
+          ? VG_(helper_offset)((Addr) & SK_(helper_value_check1_fail))
+          : VG_(helper_offset)((Addr) & SK_(helper_value_check0_fail)))))
+   );
+}
+
+
+static void synth_GETV ( Int sz, Int arch, Int reg )
+{
+   /* VG_(printf)("synth_GETV %d of Arch %s\n", sz, nameIReg(sz, arch)); */
+   switch (sz) {
+      case 4: 
+         VG_(emit_movv_offregmem_reg) ( 4, VG_(shadowRegOffset)(arch),
+                                        R_EBP, reg );
+         break;
+      case 2: 
+         VG_(emit_movzwl_offregmem_reg) ( VG_(shadowRegOffset)(arch),
+                                          R_EBP, reg );
+         VG_(emit_nonshiftopv_lit_reg) ( 4, OR, 0xFFFF0000, reg );
+         break;
+      case 1: 
+         if (arch < 4) {
+            VG_(emit_movzbl_offregmem_reg) ( VG_(shadowRegOffset)(arch),
+                                             R_EBP, reg );
+         } else {
+            VG_(emit_movzbl_offregmem_reg) ( VG_(shadowRegOffset)(arch-4)+1,
+                                             R_EBP, reg );
+         }
+         VG_(emit_nonshiftopv_lit_reg) ( 4, OR, 0xFFFFFF00, reg );
+         break;
+      default: 
+         VG_(panic)("synth_GETV");
+   }
+}
+
+
+static void synth_PUTV ( Int sz, Int srcTag, UInt lit_or_reg, Int arch )
+{
+   if (srcTag == Literal) {
+     /* PUTV with a Literal is only ever used to set the corresponding
+        ArchReg to `all valid'.  Should really be a kind of SETV. */
+      UInt lit = lit_or_reg;
+      switch (sz) {
+         case 4:
+            vg_assert(lit == 0x00000000);
+            VG_(emit_movv_lit_offregmem) ( 4, 0x00000000, 
+                                      VG_(shadowRegOffset)(arch), R_EBP );
+            break;
+         case 2:
+            vg_assert(lit == 0xFFFF0000);
+            VG_(emit_movv_lit_offregmem) ( 2, 0x0000, 
+                                      VG_(shadowRegOffset)(arch), R_EBP );
+            break;
+         case 1:
+            vg_assert(lit == 0xFFFFFF00);
+            if (arch < 4) {
+               VG_(emit_movb_lit_offregmem) ( 0x00, 
+                                         VG_(shadowRegOffset)(arch), R_EBP );
+            } else {
+               VG_(emit_movb_lit_offregmem) ( 0x00, 
+                                              VG_(shadowRegOffset)(arch-4)+1,
+                                              R_EBP );
+            }
+            break;
+         default: 
+            VG_(panic)("synth_PUTV(lit)");
+      }
+
+   } else {
+
+      UInt reg;
+      vg_assert(srcTag == RealReg);
+
+      if (sz == 1 && lit_or_reg >= 4) {
+         VG_(emit_swapl_reg_EAX) ( lit_or_reg );
+         reg = R_EAX;
+      } else {
+         reg = lit_or_reg;
+      }
+
+      if (sz == 1) vg_assert(reg < 4);
+
+      switch (sz) {
+         case 4:
+            VG_(emit_movv_reg_offregmem) ( 4, reg,
+                                      VG_(shadowRegOffset)(arch), R_EBP );
+            break;
+         case 2:
+            VG_(emit_movv_reg_offregmem) ( 2, reg,
+                                      VG_(shadowRegOffset)(arch), R_EBP );
+            break;
+         case 1:
+            if (arch < 4) {
+               VG_(emit_movb_reg_offregmem) ( reg,
+                                         VG_(shadowRegOffset)(arch), R_EBP );
+	    } else {
+               VG_(emit_movb_reg_offregmem) ( reg,
+                                        VG_(shadowRegOffset)(arch-4)+1, R_EBP );
+            }
+            break;
+         default: 
+            VG_(panic)("synth_PUTV(reg)");
+      }
+
+      if (sz == 1 && lit_or_reg >= 4) {
+         VG_(emit_swapl_reg_EAX) ( lit_or_reg );
+      }
+   }
+}
+
+
+static void synth_GETVF ( Int reg )
+{
+   VG_(emit_movv_offregmem_reg) ( 4, VG_(shadowFlagsOffset)(), R_EBP, reg );
+   /* paranoia only; should be unnecessary ... */
+   /* VG_(emit_nonshiftopv_lit_reg) ( 4, OR, 0xFFFFFFFE, reg ); */
+}
+
+
+static void synth_PUTVF ( UInt reg )
+{
+   VG_(emit_movv_reg_offregmem) ( 4, reg, VG_(shadowFlagsOffset)(), R_EBP );
+}
+
+
+static void synth_TAG1_op ( TagOp op, Int reg, RRegSet regs_live_after )
+{
+   switch (op) {
+
+      /* Scheme is
+            neg<sz> %reg          -- CF = %reg==0 ? 0 : 1
+            sbbl %reg, %reg       -- %reg = -CF
+            or 0xFFFFFFFE, %reg   -- invalidate all bits except lowest
+      */
+      case Tag_PCast40:
+         VG_(emit_unaryopv_reg)(4, NEG, reg);
+         VG_(emit_nonshiftopv_reg_reg)(4, SBB, reg, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFFFFFE, reg);
+         break;
+      case Tag_PCast20:
+         VG_(emit_unaryopv_reg)(2, NEG, reg);
+         VG_(emit_nonshiftopv_reg_reg)(4, SBB, reg, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFFFFFE, reg);
+         break;
+      case Tag_PCast10:
+         if (reg >= 4) {
+            VG_(emit_swapl_reg_EAX)(reg);
+            VG_(emit_unaryopb_reg)(NEG, R_EAX);
+            VG_(emit_swapl_reg_EAX)(reg);
+         } else {
+            VG_(emit_unaryopb_reg)(NEG, reg);
+         }
+         VG_(emit_nonshiftopv_reg_reg)(4, SBB, reg, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFFFFFE, reg);
+         break;
+
+      /* Scheme is
+            andl $1, %reg -- %reg is 0 or 1
+            negl %reg -- %reg is 0 or 0xFFFFFFFF
+            and possibly an OR to invalidate unused bits.
+      */
+      case Tag_PCast04:
+         VG_(emit_nonshiftopv_lit_reg)(4, AND, 0x00000001, reg);
+         VG_(emit_unaryopv_reg)(4, NEG, reg);
+         break;
+      case Tag_PCast02:
+         VG_(emit_nonshiftopv_lit_reg)(4, AND, 0x00000001, reg);
+         VG_(emit_unaryopv_reg)(4, NEG, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFF0000, reg);
+         break;
+      case Tag_PCast01:
+         VG_(emit_nonshiftopv_lit_reg)(4, AND, 0x00000001, reg);
+         VG_(emit_unaryopv_reg)(4, NEG, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFFFF00, reg);
+         break;
+
+      /* Scheme is
+            shl $24, %reg -- make irrelevant bits disappear
+            negl %reg             -- CF = %reg==0 ? 0 : 1
+            sbbl %reg, %reg       -- %reg = -CF
+            and possibly an OR to invalidate unused bits.
+      */
+      case Tag_PCast14:
+         VG_(emit_shiftopv_lit_reg)(4, SHL, 24, reg);
+         VG_(emit_unaryopv_reg)(4, NEG, reg);
+         VG_(emit_nonshiftopv_reg_reg)(4, SBB, reg, reg);
+         break;
+      case Tag_PCast12:
+         VG_(emit_shiftopv_lit_reg)(4, SHL, 24, reg);
+         VG_(emit_unaryopv_reg)(4, NEG, reg);
+         VG_(emit_nonshiftopv_reg_reg)(4, SBB, reg, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFF0000, reg);
+         break;
+      case Tag_PCast11:
+         VG_(emit_shiftopv_lit_reg)(4, SHL, 24, reg);
+         VG_(emit_unaryopv_reg)(4, NEG, reg);
+         VG_(emit_nonshiftopv_reg_reg)(4, SBB, reg, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFFFF00, reg);
+         break;
+
+      /* We use any non-live reg (except %reg) as a temporary,
+         or push/pop %ebp if none available:
+            (%dead_reg = any dead reg, else %ebp)
+            (pushl %ebp if all regs live)
+            movl %reg, %dead_reg
+            negl %dead_reg
+            orl %dead_reg, %reg
+            (popl %ebp if all regs live)
+         This sequence turns out to be correct regardless of the 
+         operation width.
+      */
+      case Tag_Left4:
+      case Tag_Left2:
+      case Tag_Left1: {
+         UInt dead_reg = R_EBP;
+         Int  i, reg_of_i;
+
+         for (i = 0; i < VG_MAX_REALREGS; i++) {
+            if (! IS_RREG_LIVE(i, regs_live_after)) {
+               reg_of_i = VG_(rankToRealRegNum)(i);
+               if (reg != reg_of_i) {
+                  dead_reg = reg_of_i;
+                  break;
+               }
+            }
+         }
+
+         if (R_EBP == dead_reg)
+            VG_(emit_pushv_reg)(4, dead_reg);
+         VG_(emit_movv_reg_reg)(4, reg, dead_reg);
+         VG_(emit_unaryopv_reg)(4, NEG, dead_reg);
+         VG_(emit_nonshiftopv_reg_reg)(4, OR, dead_reg, reg);
+         if (R_EBP == dead_reg)
+            VG_(emit_popv_reg)(4, dead_reg);
+         break;
+      }
+
+      /* These are all fairly obvious; do the op and then, if
+         necessary, invalidate unused bits. */
+      case Tag_SWiden14:
+         VG_(emit_shiftopv_lit_reg)(4, SHL, 24, reg);
+         VG_(emit_shiftopv_lit_reg)(4, SAR, 24, reg);
+         break;
+      case Tag_SWiden24:
+         VG_(emit_shiftopv_lit_reg)(4, SHL, 16, reg);
+         VG_(emit_shiftopv_lit_reg)(4, SAR, 16, reg);
+         break;
+      case Tag_SWiden12:
+         VG_(emit_shiftopv_lit_reg)(4, SHL, 24, reg);
+         VG_(emit_shiftopv_lit_reg)(4, SAR, 24, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFF0000, reg);
+         break;
+      case Tag_ZWiden14:
+         VG_(emit_nonshiftopv_lit_reg)(4, AND, 0x000000FF, reg);
+         break;
+      case Tag_ZWiden24:
+         VG_(emit_nonshiftopv_lit_reg)(4, AND, 0x0000FFFF, reg);
+         break;
+      case Tag_ZWiden12:
+         VG_(emit_nonshiftopv_lit_reg)(4, AND, 0x000000FF, reg);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFF0000, reg);
+         break;
+
+      default:
+         VG_(panic)("synth_TAG1_op");
+   }
+}
+
+
+static void synth_TAG2_op ( TagOp op, Int regs, Int regd )
+{
+   switch (op) {
+
+      /* UifU is implemented by OR, since 1 means Undefined. */
+      case Tag_UifU4:
+      case Tag_UifU2:
+      case Tag_UifU1:
+      case Tag_UifU0:
+         VG_(emit_nonshiftopv_reg_reg)(4, OR, regs, regd);
+         break;
+
+      /* DifD is implemented by AND, since 0 means Defined. */
+      case Tag_DifD4:
+      case Tag_DifD2:
+      case Tag_DifD1:
+         VG_(emit_nonshiftopv_reg_reg)(4, AND, regs, regd);
+         break;
+
+      /* ImproveAND(value, tags) = value OR tags.
+	 Defined (0) value 0s give defined (0); all other -> undefined (1).
+         value is in regs; tags is in regd. 
+         Be paranoid and invalidate unused bits; I don't know whether 
+         or not this is actually necessary. */
+      case Tag_ImproveAND4_TQ:
+         VG_(emit_nonshiftopv_reg_reg)(4, OR, regs, regd);
+         break;
+      case Tag_ImproveAND2_TQ:
+         VG_(emit_nonshiftopv_reg_reg)(4, OR, regs, regd);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFF0000, regd);
+         break;
+      case Tag_ImproveAND1_TQ:
+         VG_(emit_nonshiftopv_reg_reg)(4, OR, regs, regd);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFFFF00, regd);
+         break;
+
+      /* ImproveOR(value, tags) = (not value) OR tags.
+	 Defined (0) value 1s give defined (0); all other -> undefined (1).
+         value is in regs; tags is in regd. 
+         To avoid trashing value, this is implemented (re de Morgan) as
+               not (value AND (not tags))
+         Be paranoid and invalidate unused bits; I don't know whether 
+         or not this is actually necessary. */
+      case Tag_ImproveOR4_TQ:
+         VG_(emit_unaryopv_reg)(4, NOT, regd);
+         VG_(emit_nonshiftopv_reg_reg)(4, AND, regs, regd);
+         VG_(emit_unaryopv_reg)(4, NOT, regd);
+         break;
+      case Tag_ImproveOR2_TQ:
+         VG_(emit_unaryopv_reg)(4, NOT, regd);
+         VG_(emit_nonshiftopv_reg_reg)(4, AND, regs, regd);
+         VG_(emit_unaryopv_reg)(4, NOT, regd);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFF0000, regd);
+         break;
+      case Tag_ImproveOR1_TQ:
+         VG_(emit_unaryopv_reg)(4, NOT, regd);
+         VG_(emit_nonshiftopv_reg_reg)(4, AND, regs, regd);
+         VG_(emit_unaryopv_reg)(4, NOT, regd);
+         VG_(emit_nonshiftopv_lit_reg)(4, OR, 0xFFFFFF00, regd);
+         break;
+
+      default:
+         VG_(panic)("synth_TAG2_op");
+   }
+}
+
+/*----------------------------------------------------*/
+/*--- Generate code for a single UInstr.           ---*/
+/*----------------------------------------------------*/
+
+void SK_(emitExtUInstr) ( UInstr* u, RRegSet regs_live_before )
+{
+   switch (u->opcode) {
+
+      case SETV:
+         vg_assert(u->tag1 == RealReg);
+         synth_SETV ( u->size, u->val1 );
+         break;
+
+      case STOREV:
+         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
+         vg_assert(u->tag2 == RealReg);
+         synth_STOREV ( u->size, u->tag1, 
+                                 u->tag1==Literal ? u->lit32 : u->val1, 
+                                 u->val2,
+                        regs_live_before, u->regs_live_after );
+         break;
+
+      case LOADV:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->tag2 == RealReg);
+         if (0)
+            VG_(emit_AMD_prefetch_reg) ( u->val1 );
+         synth_LOADV ( u->size, u->val1, u->val2,
+                       regs_live_before, u->regs_live_after );
+         break;
+
+      case TESTV:
+         vg_assert(u->tag1 == RealReg || u->tag1 == ArchReg);
+         synth_TESTV(u->size, u->tag1, u->val1);
+         break;
+
+      case GETV:
+         vg_assert(u->tag1 == ArchReg);
+         vg_assert(u->tag2 == RealReg);
+         synth_GETV(u->size, u->val1, u->val2);
+         break;
+
+      case GETVF:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->size == 0);
+         synth_GETVF(u->val1);
+         break;
+
+      case PUTV:
+         vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
+         vg_assert(u->tag2 == ArchReg);
+         synth_PUTV(u->size, u->tag1, 
+                             u->tag1==Literal ? u->lit32 : u->val1, 
+                             u->val2 );
+         break;
+
+      case PUTVF:
+         vg_assert(u->tag1 == RealReg);
+         vg_assert(u->size == 0);
+         synth_PUTVF(u->val1);
+         break;
+
+      case TAG1:
+         synth_TAG1_op ( u->val3, u->val1, u->regs_live_after );
+         break;
+
+      case TAG2:
+         synth_TAG2_op ( u->val3, u->val1, u->val2 );
+         break;
+
+      default: 
+         VG_(printf)("emitExtUInstr: unhandled extension insn:\n");
+         VG_(ppUInstr)(0,u);
+         VG_(panic)("emitExtUInstr: unhandled extension opcode");
+   }
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                 vg_memcheck_from_ucode.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_memcheck_helpers.S b/vg_memcheck_helpers.S
new file mode 100644
index 0000000..515c873
--- /dev/null
+++ b/vg_memcheck_helpers.S
@@ -0,0 +1,62 @@
+##--------------------------------------------------------------------##
+##--- Support routines for the memory checker.                     ---##
+##---                                        vg_memcheck_helpers.S ---##
+##--------------------------------------------------------------------##
+
+/*
+  This file is part of Valgrind, an x86 protected-mode emulator 
+  designed for debugging and profiling binaries on x86-Unixes.
+
+  Copyright (C) 2000-2002 Julian Seward 
+     jseward@acm.org
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_constants.h"
+
+.global SK_(helper_value_check0_fail)
+SK_(helper_value_check0_fail):
+	pushal
+	call	SK_(helperc_value_check0_fail)
+	popal
+	ret
+
+.global SK_(helper_value_check1_fail)
+SK_(helper_value_check1_fail):
+	pushal
+	call	SK_(helperc_value_check1_fail)
+	popal
+	ret
+
+.global SK_(helper_value_check2_fail)
+SK_(helper_value_check2_fail):
+	pushal
+	call	SK_(helperc_value_check2_fail)
+	popal
+	ret
+
+.global SK_(helper_value_check4_fail)
+SK_(helper_value_check4_fail):
+	pushal
+	call	SK_(helperc_value_check4_fail)
+	popal
+	ret
+
+
+
diff --git a/vg_memcheck_include.h b/vg_memcheck_include.h
new file mode 100644
index 0000000..82bcae7
--- /dev/null
+++ b/vg_memcheck_include.h
@@ -0,0 +1,209 @@
+/*--------------------------------------------------------------------*/
+/*--- A header file for all parts of the MemCheck skin.            ---*/
+/*---                                        vg_memcheck_include.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VG_MEMCHECK_INCLUDE_H
+#define __VG_MEMCHECK_INCLUDE_H
+
+#include "vg_skin.h"
+
+/* UCode extension for efficient memory checking operations */
+typedef
+   enum {
+      /* uinstrs which are not needed for mere translation of x86 code,
+         only for instrumentation of it. */
+      LOADV = DUMMY_FINAL_UOPCODE + 1,
+      STOREV,
+      GETV,
+      PUTV,
+      TESTV,
+      SETV, 
+      /* Get/set the v-bit (and it is only one bit) for the simulated
+         %eflags register. */
+      GETVF,
+      PUTVF,
+
+      /* Do a unary or binary tag op.  Only for post-instrumented
+         code.  For TAG1, first and only arg is a TempReg, and is both
+         arg and result reg.  For TAG2, first arg is src, second is
+         dst, in the normal way; both are TempRegs.  In both cases,
+         3rd arg is a RiCHelper with a Lit16 tag.  This indicates
+         which tag op to do. */
+      TAG1,
+      TAG2
+   }
+   MemCheckOpcode;
+
+
+/* Lists the names of value-tag operations used in instrumented
+   code.  These are the third argument to TAG1 and TAG2 uinsns. */
+typedef
+   enum { 
+     /* Unary. */
+     Tag_PCast40, Tag_PCast20, Tag_PCast10,
+     Tag_PCast01, Tag_PCast02, Tag_PCast04,
+
+     Tag_PCast14, Tag_PCast12, Tag_PCast11,
+
+     Tag_Left4, Tag_Left2, Tag_Left1,
+
+     Tag_SWiden14, Tag_SWiden24, Tag_SWiden12,
+     Tag_ZWiden14, Tag_ZWiden24, Tag_ZWiden12,
+
+     /* Binary; 1st is rd; 2nd is rd+wr */
+     Tag_UifU4, Tag_UifU2, Tag_UifU1, Tag_UifU0,
+     Tag_DifD4, Tag_DifD2, Tag_DifD1,
+
+     Tag_ImproveAND4_TQ, Tag_ImproveAND2_TQ, Tag_ImproveAND1_TQ,
+     Tag_ImproveOR4_TQ, Tag_ImproveOR2_TQ, Tag_ImproveOR1_TQ,
+     Tag_DebugFn
+   }
+   TagOp;
+
+/* The classification of a faulting address. */
+typedef 
+   enum { Undescribed, /* as-yet unclassified */
+          Stack, 
+          Unknown, /* classification yielded nothing useful */
+          Freed, Mallocd, 
+          UserG, UserS 
+   }
+   AddrKind;
+
+/* Records info about a faulting address. */
+typedef
+   struct {
+      /* ALL */
+      AddrKind akind;
+      /* Freed, Mallocd */
+      Int blksize;
+      /* Freed, Mallocd */
+      Int rwoffset;
+      /* Freed, Mallocd */
+      ExeContext* lastchange;
+      /* Stack */
+      ThreadId stack_tid;
+      /* True if is just-below %esp -- could be a gcc bug. */
+      Bool maybe_gcc;
+   }
+   AddrInfo;
+
+
+/*------------------------------------------------------------*/
+/*--- Skin-specific command line options + defaults        ---*/
+/*------------------------------------------------------------*/
+
+/* Allow loads from partially-valid addresses?  default: YES */
+extern Bool SK_(clo_partial_loads_ok);
+
+/* Max volume of the freed blocks queue. */
+extern Int SK_(clo_freelist_vol);
+
+/* Do leak check at exit?  default: NO */
+extern Bool SK_(clo_leak_check);
+
+/* How closely should we compare ExeContexts in leak records? default: 2 */
+extern VgRes SK_(clo_leak_resolution);
+
+/* In leak check, show reachable-but-not-freed blocks?  default: NO */
+extern Bool SK_(clo_show_reachable);
+
+/* Assume accesses immediately below %esp are due to gcc-2.96 bugs.
+ * default: NO*/
+extern Bool SK_(clo_workaround_gcc296_bugs);
+
+/* Shall we V-check addrs? (they are always A checked too)   default: YES */
+extern Bool SK_(clo_check_addrVs);
+
+/* DEBUG: clean up instrumented code?  default: YES */
+extern Bool SK_(clo_cleanup);
+
+
+/*------------------------------------------------------------*/
+/*--- Functions                                            ---*/
+/*------------------------------------------------------------*/
+
+// SSS: work out a consistent prefix convention here
+
+/* Functions defined in vg_memcheck_helpers.S */
+extern void SK_(helper_value_check4_fail) ( void );
+extern void SK_(helper_value_check2_fail) ( void );
+extern void SK_(helper_value_check1_fail) ( void );
+extern void SK_(helper_value_check0_fail) ( void );
+
+/* Functions defined in vg_memcheck.c */
+extern void SK_(helperc_STOREV4) ( UInt, Addr );
+extern void SK_(helperc_STOREV2) ( UInt, Addr );
+extern void SK_(helperc_STOREV1) ( UInt, Addr );
+   
+extern UInt SK_(helperc_LOADV1) ( Addr );
+extern UInt SK_(helperc_LOADV2) ( Addr );
+extern UInt SK_(helperc_LOADV4) ( Addr );
+
+extern void SK_(fpu_write_check) ( Addr addr, Int size );
+extern void SK_(fpu_read_check)  ( Addr addr, Int size );
+
+extern ShadowChunk* SK_(any_matching_freed_ShadowChunks) 
+                        ( Bool (*p) ( ShadowChunk* ) );
+
+/* For client requests */
+extern void SK_(make_noaccess) ( Addr a, UInt len );
+extern void SK_(make_readable) ( Addr a, UInt len );
+extern void SK_(make_writable) ( Addr a, UInt len );
+
+extern Bool SK_(check_writable) ( Addr a, UInt len, Addr* bad_addr );
+extern Bool SK_(check_readable) ( Addr a, UInt len, Addr* bad_addr );
+
+extern void SK_(detect_memory_leaks) ( void );
+
+
+/* Functions defined in vg_memcheck_clientreqs.c */
+extern Bool SK_(client_perm_maybe_describe)( Addr a, AddrInfo* ai );
+extern void SK_(delete_client_stack_blocks_following_ESP_change) ( void );
+extern void SK_(show_client_block_stats) ( void );
+
+/* Functions defined in vg_memcheck_errcontext.c */
+extern void SK_(record_value_error)       ( Int size );
+extern void SK_(record_address_error)     ( Addr a, Int size, Bool isWrite );
+extern void SK_(record_core_mem_error)    ( ThreadState* tst, Bool isWrite,
+                                            Char* s );
+extern void SK_(record_param_error)       ( ThreadState* tst, Addr a,   
+                                            Bool isWriteLack, Char* msg );
+extern void SK_(record_jump_error)        ( ThreadState* tst, Addr a );
+extern void SK_(record_free_error)        ( ThreadState* tst, Addr a );
+extern void SK_(record_freemismatch_error)( ThreadState* tst, Addr a );
+extern void SK_(record_user_error)        ( ThreadState* tst, Addr a, 
+                                            Bool isWrite );
+
+#endif
+
+/*--------------------------------------------------------------------*/
+/*--- end                                    vg_memcheck_include.h ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/vg_memcheck_translate.c b/vg_memcheck_translate.c
new file mode 100644
index 0000000..34f9643
--- /dev/null
+++ b/vg_memcheck_translate.c
@@ -0,0 +1,1470 @@
+/*--------------------------------------------------------------------*/
+/*--- Part of the MemCheck skin: instrument UCode to perform       ---*/
+/*--- memory checking operations.                                  ---*/
+/*---                                      vg_memcheck_translate.c ---*/
+/*--------------------------------------------------------------------*/
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_memcheck_include.h"
+
+/* ---------------------------------------------------------------------
+   Template functions for extending UCode
+   ------------------------------------------------------------------ */
+
+/* Compare this with the restrictions on core instructions in
+   vg_translate.c:VG_(saneUInstr)().  Everything general said there applies
+   here too.
+*/
+Bool SK_(saneExtUInstr)(Bool beforeRA, Bool beforeLiveness, UInstr* u)
+{
+// SSS: duplicating these macros really sucks
+#  define LIT0 (u->lit32 == 0)
+#  define LIT1 (!(LIT0))
+#  define LITm (u->tag1 == Literal ? True : LIT0 )
+#  define SZ0 (u->size == 0)
+#  define SZi (u->size == 4 || u->size == 2 || u->size == 1)
+#  define SZj (u->size == 4 || u->size == 2 || u->size == 1 || u->size == 0)
+#  define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)
+#  define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
+#  define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
+#  define A1  (u->tag1 == ArchReg)
+#  define A2  (u->tag2 == ArchReg)
+#  define L1  (u->tag1 == Literal && u->val1 == 0)
+#  define Ls1 (u->tag1 == Lit16)
+#  define Ls3 (u->tag3 == Lit16)
+#  define TRL1 (TR1 || L1)
+#  define TRA1 (TR1 || A1)
+#  define N2  (u->tag2 == NoValue)
+#  define N3  (u->tag3 == NoValue)
+#  define COND0    (u->cond         == 0)
+#  define EXTRA4b0 (u->extra4b      == 0)
+#  define SG_WD0   (u->signed_widen == 0)
+#  define JMPKIND0 (u->jmpkind      == 0)
+#  define CCALL0   (u->argc==0 && u->regparms_n==0 && u->has_ret_val==0 && \
+                    ( beforeLiveness                                       \
+                    ? u->regs_live_after == ALL_RREGS_LIVE                 \
+                    : True ))
+#  define XOTHER   (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
+
+   Int n_lits = 0;
+   if (u->tag1 == Literal) n_lits++;
+   if (u->tag2 == Literal) n_lits++;
+   if (u->tag3 == Literal) n_lits++;
+   if (n_lits > 1) 
+      return False;
+
+   /* Fields not checked: val1, val2, val3 */
+
+   switch (u->opcode) {
+
+   /* Fields checked: lit32   size flags_r/w tag1   tag2   tag3    (rest) */
+   case LOADV:  return LIT0 && SZi && CC0 &&  TR1 && TR2 &&  N3 && XOTHER;
+   case STOREV: return LITm && SZi && CC0 && TRL1 && TR2 &&  N3 && XOTHER;
+   case GETV:   return LIT0 && SZi && CC0 &&   A1 && TR2 &&  N3 && XOTHER;
+   case PUTV:   return LITm && SZi && CC0 && TRL1 &&  A2 &&  N3 && XOTHER;
+   case GETVF: 
+   case PUTVF:  return LIT0 && SZ0 && CC0 &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case TESTV: 
+   case SETV:   return LIT0 && SZj && CC0 && TRA1 &&  N2 &&  N3 && XOTHER;
+   case TAG1:   return LIT0 && SZ0 && CC0 &&  TR1 &&  N2 && Ls3 && XOTHER;
+   case TAG2:   return LIT0 && SZ0 && CC0 &&  TR1 && TR2 && Ls3 && XOTHER;
+   default:
+      VG_(printf)("unhandled opcode: %u\n", u->opcode);
+      VG_(panic)("SK_(saneExtUInstr): unhandled opcode");
+   }
+#  undef LIT0
+#  undef LIT1
+#  undef LITm
+#  undef SZ0
+#  undef SZi
+#  undef SZj
+#  undef CC0
+#  undef TR1
+#  undef TR2
+#  undef A1
+#  undef A2
+#  undef L1
+#  undef Ls1
+#  undef Ls3
+#  undef TRL1
+#  undef TRA1
+#  undef N2
+#  undef N3
+#  undef COND0
+#  undef EXTRA4b0
+#  undef JMPKIND0
+#  undef CCALL0
+#  undef XOTHER
+}
+
+static Char* nameOfTagOp ( TagOp h )
+{
+   switch (h) {
+      case Tag_PCast40:        return "PCast40";
+      case Tag_PCast20:        return "PCast20";
+      case Tag_PCast10:        return "PCast10";
+      case Tag_PCast01:        return "PCast01";
+      case Tag_PCast02:        return "PCast02";
+      case Tag_PCast04:        return "PCast04";
+      case Tag_PCast14:        return "PCast14";
+      case Tag_PCast12:        return "PCast12";
+      case Tag_PCast11:        return "PCast11";
+      case Tag_Left4:          return "Left4";
+      case Tag_Left2:          return "Left2";
+      case Tag_Left1:          return "Left1";
+      case Tag_SWiden14:       return "SWiden14";
+      case Tag_SWiden24:       return "SWiden24";
+      case Tag_SWiden12:       return "SWiden12";
+      case Tag_ZWiden14:       return "ZWiden14";
+      case Tag_ZWiden24:       return "ZWiden24";
+      case Tag_ZWiden12:       return "ZWiden12";
+      case Tag_UifU4:          return "UifU4";
+      case Tag_UifU2:          return "UifU2";
+      case Tag_UifU1:          return "UifU1";
+      case Tag_UifU0:          return "UifU0";
+      case Tag_DifD4:          return "DifD4";
+      case Tag_DifD2:          return "DifD2";
+      case Tag_DifD1:          return "DifD1";
+      case Tag_ImproveAND4_TQ: return "ImproveAND4_TQ";
+      case Tag_ImproveAND2_TQ: return "ImproveAND2_TQ";
+      case Tag_ImproveAND1_TQ: return "ImproveAND1_TQ";
+      case Tag_ImproveOR4_TQ:  return "ImproveOR4_TQ";
+      case Tag_ImproveOR2_TQ:  return "ImproveOR2_TQ";
+      case Tag_ImproveOR1_TQ:  return "ImproveOR1_TQ";
+      case Tag_DebugFn:        return "DebugFn";
+      default: VG_(panic)("vg_nameOfTagOp");
+   }
+}
+
+
+Char* SK_(nameExtUOpcode)(Opcode opc)
+{
+   switch (opc) {
+      case GETVF:   return "GETVF";
+      case PUTVF:   return "PUTVF";
+      case TAG1:    return "TAG1";
+      case TAG2:    return "TAG2";
+      case LOADV:   return "LOADV";
+      case STOREV:  return "STOREV";
+      case GETV:    return "GETV";
+      case PUTV:    return "PUTV";
+      case TESTV:   return "TESTV";
+      case SETV:    return "SETV";
+      default:      
+         VG_(printf)("unhandled opcode: %u\n", opc);
+         VG_(panic)("SK_(nameExtUOpcode): unhandled case");
+   }
+}
+
+/* ---------------------------------------------------------------------
+   Debugging stuff.
+   ------------------------------------------------------------------ */
+
+void SK_(ppExtUInstr)(UInstr* u)
+{
+   switch (u->opcode) {
+
+      case TAG1:
+         VG_(printf)("\t");
+         VG_(ppUOperand)(u, 1, 4, False);
+         VG_(printf)(" = %s ( ", nameOfTagOp( u->val3 ));
+         VG_(ppUOperand)(u, 1, 4, False);
+         VG_(printf)(" )");
+         break;
+
+      case TAG2:
+         VG_(printf)("\t");
+         VG_(ppUOperand)(u, 2, 4, False);
+         VG_(printf)(" = %s ( ", nameOfTagOp( u->val3 ));
+         VG_(ppUOperand)(u, 1, 4, False);
+         VG_(printf)(", ");
+         VG_(ppUOperand)(u, 2, 4, False);
+         VG_(printf)(" )");
+         break;
+
+      case STOREV: case LOADV:
+         VG_(printf)("\t");
+         VG_(ppUOperand)(u, 1, u->size, u->opcode==LOADV);
+         VG_(printf)(", ");
+         VG_(ppUOperand)(u, 2, u->size, u->opcode==STOREV);
+         break;
+
+      case PUTVF: case GETVF:
+         VG_(printf)("\t");
+         VG_(ppUOperand)(u, 1, 0, False);
+         break;
+
+      case GETV: case PUTV:
+         VG_(printf)("\t");
+         VG_(ppUOperand)(u, 1, u->opcode==PUTV ? 4 : u->size, False);
+         VG_(printf)(", ");
+         VG_(ppUOperand)(u, 2, u->opcode==GETV ? 4 : u->size, False);
+         break;
+
+      case TESTV: case SETV:
+         VG_(printf)("\t");
+         VG_(ppUOperand)(u, 1, u->size, False);
+         break;
+
+      default:
+         VG_(printf)("unhandled opcode: %u\n", u->opcode);
+         VG_(panic)("SK_(ppExtUInstr): unhandled opcode");
+   }
+
+}
+
+Int SK_(getExtRegUsage)(UInstr* u, Tag tag, RegUse* arr)
+{
+#  define RD(ono)    VG_UINSTR_READS_REG(ono)
+#  define WR(ono)    VG_UINSTR_WRITES_REG(ono)
+
+   Int n = 0;
+   switch (u->opcode) {        
+
+      // JJJ: I don't understand this comment... what about reg alloc?  --njn
+
+      /* These sizes are only ever consulted when the instrumentation
+         code is being added, so the following can return
+         manifestly-bogus sizes. */
+
+      case TAG1:    RD(1); WR(1);        break;
+      case TAG2:    RD(1); RD(2); WR(2); break;
+      case LOADV:   RD(1); WR(2);        break;
+      case STOREV:  RD(1); RD(2);        break;
+      case GETV:    WR(2);               break;
+      case PUTV:    RD(1);               break;
+      case TESTV:   RD(1);               break;
+      case SETV:    WR(1);               break;
+      case PUTVF:   RD(1);               break;
+      case GETVF:   WR(1);               break;
+
+      default: 
+         VG_(printf)("unhandled opcode: %u\n", u->opcode);
+         VG_(panic)("SK_(getExtRegUsage): unhandled opcode");
+   }
+   return n;
+
+#  undef RD
+#  undef WR
+}
+
+/*------------------------------------------------------------*/
+/*--- New instrumentation machinery.                       ---*/
+/*------------------------------------------------------------*/
+
+#define uInstr1   VG_(newUInstr1)
+#define uInstr2   VG_(newUInstr2)
+#define uInstr3   VG_(newUInstr3)
+#define uLiteral  VG_(setLiteralField)
+#define uCCall    VG_(setCCallFields)
+#define newTemp   VG_(getNewTemp)
+#define newShadow VG_(getNewShadow)
+
+static
+TagOp get_Tag_ImproveOR_TQ ( Int sz )
+{
+   switch (sz) {
+      case 4: return Tag_ImproveOR4_TQ;
+      case 2: return Tag_ImproveOR2_TQ;
+      case 1: return Tag_ImproveOR1_TQ;
+      default: VG_(panic)("get_Tag_ImproveOR_TQ");
+   }
+}
+
+
+static
+TagOp get_Tag_ImproveAND_TQ ( Int sz )
+{
+   switch (sz) {
+      case 4: return Tag_ImproveAND4_TQ;
+      case 2: return Tag_ImproveAND2_TQ;
+      case 1: return Tag_ImproveAND1_TQ;
+      default: VG_(panic)("get_Tag_ImproveAND_TQ");
+   }
+}
+
+
+static
+TagOp get_Tag_Left ( Int sz )
+{
+   switch (sz) {
+      case 4: return Tag_Left4;
+      case 2: return Tag_Left2;
+      case 1: return Tag_Left1;
+      default: VG_(panic)("get_Tag_Left");
+   }
+}
+
+
+static
+TagOp get_Tag_UifU ( Int sz )
+{
+   switch (sz) {
+      case 4: return Tag_UifU4;
+      case 2: return Tag_UifU2;
+      case 1: return Tag_UifU1;
+      case 0: return Tag_UifU0;
+      default: VG_(panic)("get_Tag_UifU");
+   }
+}
+
+
+static
+TagOp get_Tag_DifD ( Int sz )
+{
+   switch (sz) {
+      case 4: return Tag_DifD4;
+      case 2: return Tag_DifD2;
+      case 1: return Tag_DifD1;
+      default: VG_(panic)("get_Tag_DifD");
+   }
+}
+
+
+static 
+TagOp get_Tag_PCast ( Int szs, Int szd )
+{
+   if (szs == 4 && szd == 0) return Tag_PCast40;
+   if (szs == 2 && szd == 0) return Tag_PCast20;
+   if (szs == 1 && szd == 0) return Tag_PCast10;
+   if (szs == 0 && szd == 1) return Tag_PCast01;
+   if (szs == 0 && szd == 2) return Tag_PCast02;
+   if (szs == 0 && szd == 4) return Tag_PCast04;
+   if (szs == 1 && szd == 4) return Tag_PCast14;
+   if (szs == 1 && szd == 2) return Tag_PCast12;
+   if (szs == 1 && szd == 1) return Tag_PCast11;
+   VG_(printf)("get_Tag_PCast(%d,%d)\n", szs, szd);
+   VG_(panic)("get_Tag_PCast");
+}
+
+
+static 
+TagOp get_Tag_Widen ( Bool syned, Int szs, Int szd )
+{
+   if (szs == 1 && szd == 2 && syned)  return Tag_SWiden12;
+   if (szs == 1 && szd == 2 && !syned) return Tag_ZWiden12;
+
+   if (szs == 1 && szd == 4 && syned)  return Tag_SWiden14;
+   if (szs == 1 && szd == 4 && !syned) return Tag_ZWiden14;
+
+   if (szs == 2 && szd == 4 && syned)  return Tag_SWiden24;
+   if (szs == 2 && szd == 4 && !syned) return Tag_ZWiden24;
+
+   VG_(printf)("get_Tag_Widen(%d,%d,%d)\n", (Int)syned, szs, szd);
+   VG_(panic)("get_Tag_Widen");
+}
+
+/* Pessimally cast the spec'd shadow from one size to another. */
+static 
+void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg )
+{
+   if (szs == 0 && szd == 0)
+      return;
+   uInstr3(cb, TAG1, 0, TempReg, tempreg, 
+                        NoValue, 0, 
+                        Lit16,   get_Tag_PCast(szs,szd));
+}
+
+
+/* Create a signed or unsigned widen of the spec'd shadow from one
+   size to another.  The only allowed size transitions are 1->2, 1->4
+   and 2->4. */
+static 
+void create_Widen ( UCodeBlock* cb, Bool signed_widen,
+                    Int szs, Int szd, Int tempreg )
+{
+   if (szs == szd) return;
+   uInstr3(cb, TAG1, 0, TempReg, tempreg, 
+                        NoValue, 0, 
+                        Lit16,   get_Tag_Widen(signed_widen,szs,szd));
+}
+
+
+/* Get the condition codes into a new shadow, at the given size. */
+static
+Int create_GETVF ( UCodeBlock* cb, Int sz )
+{
+   Int tt = newShadow(cb);
+   uInstr1(cb, GETVF, 0, TempReg, tt);
+   create_PCast(cb, 0, sz, tt);
+   return tt;
+}
+
+
+/* Save the condition codes from the spec'd shadow. */
+static
+void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg )
+{
+   if (sz == 0) {
+      uInstr1(cb, PUTVF, 0, TempReg, tempreg);
+   } else { 
+      Int tt = newShadow(cb);
+      uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt);
+      create_PCast(cb, sz, 0, tt);
+      uInstr1(cb, PUTVF, 0, TempReg, tt);
+   }
+}
+
+
+/* Do Left on the spec'd shadow. */
+static 
+void create_Left ( UCodeBlock* cb, Int sz, Int tempreg )
+{
+   uInstr3(cb, TAG1, 0, 
+               TempReg, tempreg,
+               NoValue, 0, 
+               Lit16, get_Tag_Left(sz));
+}
+
+
+/* Do UifU on ts and td, putting the result in td. */
+static 
+void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td )
+{
+   uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
+               Lit16, get_Tag_UifU(sz));
+}
+
+
+/* Do DifD on ts and td, putting the result in td. */
+static 
+void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td )
+{
+   uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
+               Lit16, get_Tag_DifD(sz));
+}
+
+
+/* Do HelpAND on value tval and tag tqqq, putting the result in
+   tqqq. */
+static 
+void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
+{
+   uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
+               Lit16, get_Tag_ImproveAND_TQ(sz));
+}
+
+
+/* Do HelpOR on value tval and tag tqqq, putting the result in
+   tqqq. */
+static 
+void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
+{
+   uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
+               Lit16, get_Tag_ImproveOR_TQ(sz));
+}
+
+
+/* Get the shadow for an operand described by (tag, val).  Emit code
+   to do this and return the identity of the shadow holding the
+   result.  The result tag is always copied into a new shadow, so it
+   can be modified without trashing the original.*/
+static
+Int /* TempReg */ getOperandShadow ( UCodeBlock* cb, 
+                                     Int sz, Int tag, Int val )
+{
+   Int sh;
+   sh = newShadow(cb);
+   if (tag == TempReg) {
+      uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh);
+      return sh;
+   }
+   if (tag == Literal) {
+      uInstr1(cb, SETV, sz, TempReg, sh);
+      return sh;
+   }
+   if (tag == ArchReg) {
+      uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh);
+      return sh;
+   }
+   VG_(panic)("getOperandShadow");
+}
+
+/* Create and return an instrumented version of cb_in.  Free cb_in
+   before returning. */
+static UCodeBlock* memcheck_instrument ( UCodeBlock* cb_in )
+{
+   UCodeBlock* cb;
+   Int         i, j;
+   UInstr*     u_in;
+   Int         qs, qd, qt, qtt;
+   cb = VG_(allocCodeBlock)();
+   cb->nextTemp = cb_in->nextTemp;
+
+   for (i = 0; i < cb_in->used; i++) {
+      qs = qd = qt = qtt = INVALID_TEMPREG;
+      u_in = &cb_in->instrs[i];
+
+      switch (u_in->opcode) {
+
+         case NOP:
+            break;
+
+         case INCEIP:
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* Loads and stores.  Test the V bits for the address.  24
+            Mar 02: since the address is A-checked anyway, there's not
+            really much point in doing the V-check too, unless you
+            think that you might use addresses which are undefined but
+            still addressible.  Hence the optionalisation of the V
+            check.
+
+            The LOADV/STOREV does an addressibility check for the
+            address. */
+
+         case LOAD: 
+            if (SK_(clo_check_addrVs)) {
+               uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
+               uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
+            }
+            uInstr2(cb, LOADV, u_in->size, 
+                        TempReg, u_in->val1,
+                        TempReg, SHADOW(u_in->val2));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         case STORE:
+            if (SK_(clo_check_addrVs)) {
+               uInstr1(cb, TESTV,  4, TempReg, SHADOW(u_in->val2));
+               uInstr1(cb, SETV,   4, TempReg, SHADOW(u_in->val2));
+            }
+            uInstr2(cb, STOREV, u_in->size,
+                        TempReg, SHADOW(u_in->val1), 
+                        TempReg, u_in->val2);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* Moving stuff around.  Make the V bits follow accordingly,
+            but don't do anything else.  */
+
+         case GET:
+            uInstr2(cb, GETV, u_in->size,
+                        ArchReg, u_in->val1,
+                        TempReg, SHADOW(u_in->val2));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         case PUT:
+            uInstr2(cb, PUTV, u_in->size, 
+                        TempReg, SHADOW(u_in->val1),
+                        ArchReg, u_in->val2);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         case GETF:
+            /* This is not the smartest way to do it, but should work. */
+            qd = create_GETVF(cb, u_in->size);
+            uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         case PUTF:
+            create_PUTVF(cb, u_in->size, SHADOW(u_in->val1));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         case MOV:
+            switch (u_in->tag1) {
+               case TempReg: 
+                  uInstr2(cb, MOV, 4,
+                              TempReg, SHADOW(u_in->val1),
+                              TempReg, SHADOW(u_in->val2));
+                  break;
+               case Literal: 
+                  uInstr1(cb, SETV, u_in->size, 
+                              TempReg, SHADOW(u_in->val2));
+                  break;
+               default: 
+                  VG_(panic)("memcheck_instrument: MOV");
+            }
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* Special case of add, where one of the operands is a literal.
+            lea1(t) = t + some literal.
+            Therefore: lea1#(qa) = left(qa) 
+         */
+         case LEA1:
+            vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
+            qs = SHADOW(u_in->val1);
+            qd = SHADOW(u_in->val2);
+            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd);
+            create_Left(cb, u_in->size, qd);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* Another form of add.  
+            lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal
+                                and is 0,1,2 or 3.
+            lea2#(qs,qt) = left(qs `UifU` (qt << shift)).
+            Note, subtly, that the shift puts zeroes at the bottom of qt,
+            meaning Valid, since the corresponding shift of tt puts 
+            zeroes at the bottom of tb.
+         */
+         case LEA2: {
+            Int shift;
+            vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
+            switch (u_in->extra4b) {
+               case 1: shift = 0; break;
+               case 2: shift = 1; break;
+               case 4: shift = 2; break;
+               case 8: shift = 3; break;
+               default: VG_(panic)( "memcheck_instrument(LEA2)" );
+            }
+            qs = SHADOW(u_in->val1);
+            qt = SHADOW(u_in->val2);
+            qd = SHADOW(u_in->val3);
+            uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd);
+            if (shift > 0) {
+               uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd);
+               uLiteral(cb, shift);
+            }
+            create_UifU(cb, 4, qs, qd);
+            create_Left(cb, u_in->size, qd);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+         }
+
+         /* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */
+         case INC: case DEC:
+            qd = SHADOW(u_in->val1);
+            create_Left(cb, u_in->size, qd);
+            if (u_in->flags_w != FlagsEmpty)
+               create_PUTVF(cb, u_in->size, qd);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* This is a HACK (approximation :-) */
+         /* rcl#/rcr#(qs,qd) 
+               = let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags#
+                 eflags# = q0
+                 qd =pcast-0-sz(q0)
+            Ie, cast everything down to a single bit, then back up.
+            This assumes that any bad bits infect the whole word and 
+            the eflags.
+         */
+         case RCL: case RCR:
+	    vg_assert(u_in->flags_r != FlagsEmpty);
+            /* The following assertion looks like it makes sense, but is
+               actually wrong.  Consider this:
+                  rcll    %eax
+                  imull   %eax, %eax
+               The rcll writes O and C but so does the imull, so the O and C 
+               write of the rcll is annulled by the prior improvement pass.
+               Noticed by Kevin Ryde <user42@zip.com.au>
+            */
+	    /* vg_assert(u_in->flags_w != FlagsEmpty); */
+            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
+            /* We can safely modify qs; cast it to 0-size. */
+            create_PCast(cb, u_in->size, 0, qs);
+            qd = SHADOW(u_in->val2);
+            create_PCast(cb, u_in->size, 0, qd);
+            /* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */
+            create_UifU(cb, 0, qs, qd);
+            /* qs is now free; reuse it for the flag definedness. */
+            qs = create_GETVF(cb, 0);
+            create_UifU(cb, 0, qs, qd);
+            create_PUTVF(cb, 0, qd);
+            create_PCast(cb, 0, u_in->size, qd);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* for OP in shl shr sar rol ror
+            (qs is shift count#, qd is value to be OP#d)
+            OP(ts,td)
+            OP#(qs,qd)
+               = pcast-1-sz(qs) `UifU` OP(ts,qd)
+            So we apply OP to the tag bits too, and then UifU with
+            the shift count# to take account of the possibility of it
+            being undefined.
+            
+            A bit subtle:
+               ROL/ROR rearrange the tag bits as per the value bits.
+               SHL/SHR shifts zeroes into the value, and corresponding 
+                  zeroes indicating Definedness into the tag.
+               SAR copies the top bit of the value downwards, and therefore
+                  SAR also copies the definedness of the top bit too.
+            So in all five cases, we just apply the same op to the tag 
+            bits as is applied to the value bits.  Neat!
+         */
+         case SHL:
+         case SHR: case SAR:
+         case ROL: case ROR: {
+            Int t_amount = INVALID_TEMPREG;
+            vg_assert(u_in->tag1 == TempReg || u_in->tag1 == Literal);
+            vg_assert(u_in->tag2 == TempReg);
+            qd = SHADOW(u_in->val2);
+
+            /* Make qs hold shift-count# and make
+               t_amount be a TempReg holding the shift count. */
+            if (u_in->tag1 == Literal) {
+               t_amount = newTemp(cb);
+               uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount);
+               uLiteral(cb, u_in->lit32);
+               qs = SHADOW(t_amount);
+               uInstr1(cb, SETV, 1, TempReg, qs);
+            } else {
+               t_amount = u_in->val1;
+               qs = SHADOW(u_in->val1);
+            }
+
+            uInstr2(cb, u_in->opcode, 
+                        u_in->size, 
+                        TempReg, t_amount, 
+                        TempReg, qd);
+            qt = newShadow(cb);
+            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
+            create_PCast(cb, 1, u_in->size, qt);
+            create_UifU(cb, u_in->size, qt, qd);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+         }
+
+         /* One simple tag operation. */
+         case WIDEN:
+            vg_assert(u_in->tag1 == TempReg);
+            create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size, 
+                             SHADOW(u_in->val1));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* not#(x) = x (since bitwise independent) */
+         case NOT:
+            vg_assert(u_in->tag1 == TempReg);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* neg#(x) = left(x) (derivable from case for SUB) */
+         case NEG:
+            vg_assert(u_in->tag1 == TempReg);
+            create_Left(cb, u_in->size, SHADOW(u_in->val1));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* bswap#(x) = bswap(x) */
+         case BSWAP:
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->size == 4);
+            qd = SHADOW(u_in->val1);
+            uInstr1(cb, BSWAP, 4, TempReg, qd);
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* cc2val#(qd) = pcast-0-to-size(eflags#) */
+         case CC2VAL:
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->flags_r != FlagsEmpty);
+            qt = create_GETVF(cb, u_in->size);
+            uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* cmov#(qs,qd) = cmov(qs,qd)
+            That is, do the cmov of tags using the same flags as for
+            the data (obviously).  However, first do a test on the 
+            validity of the flags.
+         */
+         case CMOV:
+            vg_assert(u_in->size == 4);
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->tag2 == TempReg);
+            vg_assert(u_in->flags_r != FlagsEmpty);
+            vg_assert(u_in->flags_w == FlagsEmpty);
+            qs = SHADOW(u_in->val1);
+            qd = SHADOW(u_in->val2);
+            qt = create_GETVF(cb, 0);
+            uInstr1(cb, TESTV, 0, TempReg, qt);
+            /* qt should never be referred to again.  Nevertheless
+               ... */
+            uInstr1(cb, SETV, 0, TempReg, qt);
+
+            uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd);
+            LAST_UINSTR(cb).cond    = u_in->cond;
+            LAST_UINSTR(cb).flags_r = u_in->flags_r;
+
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* add#/sub#(qs,qd) 
+               = qs `UifU` qd `UifU` left(qs) `UifU` left(qd)
+               = left(qs) `UifU` left(qd)
+               = left(qs `UifU` qd)
+            adc#/sbb#(qs,qd)
+               = left(qs `UifU` qd) `UifU` pcast(eflags#)
+            Second arg (dest) is TempReg.
+            First arg (src) is Literal or TempReg or ArchReg. 
+         */
+         case ADD: case SUB:
+         case ADC: case SBB:
+            qd = SHADOW(u_in->val2);
+            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
+            create_UifU(cb, u_in->size, qs, qd);
+            create_Left(cb, u_in->size, qd);
+            if (u_in->opcode == ADC || u_in->opcode == SBB) {
+               vg_assert(u_in->flags_r != FlagsEmpty);
+               qt = create_GETVF(cb, u_in->size);
+               create_UifU(cb, u_in->size, qt, qd);
+            }
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, u_in->size, qd);
+            }
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* xor#(qs,qd) = qs `UifU` qd */
+         case XOR:
+            qd = SHADOW(u_in->val2);
+            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
+            create_UifU(cb, u_in->size, qs, qd);
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, u_in->size, qd);
+            }
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* and#/or#(qs,qd) 
+               = (qs `UifU` qd) `DifD` improve(vs,qs) 
+                                `DifD` improve(vd,qd)
+            where improve is the relevant one of
+                Improve{AND,OR}_TQ
+            Use the following steps, with qt as a temp:
+               qt = improve(vd,qd)
+               qd = qs `UifU` qd
+               qd = qt `DifD` qd
+               qt = improve(vs,qs)
+               qd = qt `DifD` qd
+         */
+         case AND: case OR:
+            vg_assert(u_in->tag1 == TempReg);
+            vg_assert(u_in->tag2 == TempReg);
+            qd = SHADOW(u_in->val2);
+            qs = SHADOW(u_in->val1);
+            qt = newShadow(cb);
+
+            /* qt = improve(vd,qd) */
+            uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt);
+            if (u_in->opcode == AND)
+               create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt);
+            else
+               create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt);
+            /* qd = qs `UifU` qd */
+            create_UifU(cb, u_in->size, qs, qd);
+            /* qd = qt `DifD` qd */
+            create_DifD(cb, u_in->size, qt, qd);
+            /* qt = improve(vs,qs) */
+            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
+            if (u_in->opcode == AND)
+               create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt);
+            else
+               create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt);
+            /* qd = qt `DifD` qd */
+               create_DifD(cb, u_in->size, qt, qd);
+            /* So, finally qd is the result tag. */
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, u_in->size, qd);
+            }
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* Machinery to do with supporting CALLM.  Copy the start and
+            end markers only to make the result easier to read
+            (debug); they generate no code and have no effect. 
+         */
+         case CALLM_S: case CALLM_E:
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* Copy PUSH and POP verbatim.  Arg/result absval
+            calculations are done when the associated CALL is
+            processed.  CLEAR has no effect on absval calculations but
+            needs to be copied.  
+         */
+         case PUSH: case POP: case CLEAR:
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* In short:
+               callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#)
+            We have to decide on a size to do the computation at,
+            although the choice doesn't affect correctness.  We will
+            do a pcast to the final size anyway, so the only important
+            factor is to choose a size which minimises the total
+            number of casts needed.  Valgrind: just use size 0,
+            regardless.  It may not be very good for performance
+            but does simplify matters, mainly by reducing the number
+            of different pessimising casts which have to be implemented.
+         */
+         case CALLM: {
+            UInstr* uu;
+            Bool res_used;
+
+            /* Now generate the code.  Get the final result absval
+               into qt. */
+            qt  = newShadow(cb);
+            qtt = newShadow(cb);
+            uInstr1(cb, SETV, 0, TempReg, qt);
+            for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) {
+               uu = & cb_in->instrs[j];
+               if (uu->opcode != PUSH) continue;
+               /* cast via a temporary */
+               uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1),
+                                   TempReg, qtt);
+               create_PCast(cb, uu->size, 0, qtt);
+               create_UifU(cb, 0, qtt, qt);
+            }
+            /* Remembering also that flags read count as inputs. */
+            if (u_in->flags_r != FlagsEmpty) {
+               qtt = create_GETVF(cb, 0);
+               create_UifU(cb, 0, qtt, qt);
+            }
+
+            /* qt now holds the result tag.  If any results from the
+               call are used, either by fetching with POP or
+               implicitly by writing the flags, we copy the result
+               absval to the relevant location.  If not used, the call
+               must have been for its side effects, so we test qt here
+               and now.  Note that this assumes that all values
+               removed by POP continue to be live.  So dead args
+               *must* be removed with CLEAR, not by POPping them into
+               a dummy tempreg. 
+            */
+            res_used = False;
+            for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) {
+               uu = & cb_in->instrs[j];
+               if (uu->opcode != POP) continue;
+               /* Cast via a temp. */
+               uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt);
+               create_PCast(cb, 0, uu->size, qtt);
+               uInstr2(cb, MOV, 4, TempReg, qtt, 
+                                   TempReg, SHADOW(uu->val1));
+               res_used = True;
+            }
+            if (u_in->flags_w != FlagsEmpty) {
+               create_PUTVF(cb, 0, qt);
+               res_used = True;
+            }
+            if (!res_used) {
+               uInstr1(cb, TESTV, 0, TempReg, qt);
+               /* qt should never be referred to again.  Nevertheless
+                  ... */
+               uInstr1(cb, SETV, 0, TempReg, qt);
+            }
+            VG_(copyUInstr)(cb, u_in);
+            break;
+         }
+         /* Whew ... */
+
+         case JMP:
+            if (u_in->tag1 == TempReg) {
+               uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
+               uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
+            } else {
+               vg_assert(u_in->tag1 == Literal);
+            }
+            if (u_in->cond != CondAlways) {
+               vg_assert(u_in->flags_r != FlagsEmpty);
+               qt = create_GETVF(cb, 0);
+               uInstr1(cb, TESTV, 0, TempReg, qt);
+               /* qt should never be referred to again.  Nevertheless
+                  ... */
+               uInstr1(cb, SETV, 0, TempReg, qt);
+            }
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         case JIFZ:
+            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
+            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         /* Emit a check on the address used.  The value loaded into the 
+            FPU is checked by the call to fpu_{read/write}_check().  */
+         case FPU_R: case FPU_W: {
+            Int t_size = INVALID_TEMPREG;
+
+            vg_assert(u_in->tag2 == TempReg);
+            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
+            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val2));
+
+            t_size = newTemp(cb);
+            uInstr2(cb, MOV,   4, Literal, 0, TempReg, t_size);
+            uLiteral(cb, u_in->size);
+            uInstr2(cb, CCALL, 0, TempReg, u_in->val2, TempReg, t_size);
+            uCCall(cb, 
+                   u_in->opcode==FPU_R ? (Addr) & SK_(fpu_read_check) 
+                                       : (Addr) & SK_(fpu_write_check),
+                   2, 2, False);
+
+            VG_(copyUInstr)(cb, u_in);
+            break;
+         }
+
+         /* For FPU insns not referencing memory, just copy thru. */
+         case FPU: 
+            VG_(copyUInstr)(cb, u_in);
+            break;
+
+         default:
+            VG_(ppUInstr)(0, u_in);
+            VG_(panic)( "memcheck_instrument: unhandled case");
+
+      } /* end of switch (u_in->opcode) */
+
+   } /* end of for loop */
+
+   VG_(freeCodeBlock)(cb_in);
+   return cb;
+}
+
+/*------------------------------------------------------------*/
+/*--- Clean up mem check instrumentation.                  ---*/
+/*------------------------------------------------------------*/
+
+Bool VG_(clo_memcheck_codegen) = False;
+
+#define dis    VG_(print_codegen)
+
+
+#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1)
+#define VGC_UNDEF ((UChar)100)
+#define VGC_VALUE ((UChar)101)
+
+#define NOP_no_msg(uu)                                            \
+   do { VG_(newNOP)(uu); } while (False)
+
+#define NOP_tag1_op(uu)                                           \
+   do { VG_(newNOP)(uu);                                          \
+        if (dis)                                                  \
+           VG_(printf)("   at %2d: delete %s due to defd arg\n",  \
+                       i, nameOfTagOp(u->val3));                  \
+   } while (False)
+
+#define SETV_tag1_op(uu,newsz)                                    \
+   do { uu->opcode = SETV;                                        \
+        uu->size = newsz;                                         \
+        uu->tag2 = uu->tag3 = NoValue;                            \
+        if (dis)                                                  \
+           VG_(printf)("   at %2d: convert %s to SETV%d "         \
+                       "due to defd arg\n",                       \
+                       i, nameOfTagOp(u->val3), newsz);           \
+   } while (False)
+
+
+
+/* Run backwards and delete SETVs on shadow temps for which the next
+   action is a write.  Needs an env saying whether or not the next
+   action is a write.  The supplied UCodeBlock is destructively
+   modified.
+*/
+static void vg_delete_redundant_SETVs ( UCodeBlock* cb )
+{
+   Int     i, j, k;
+   Int     n_temps = cb->nextTemp;
+   Bool*   next_is_write;
+   UInstr* u;
+   RegUse  tempUse[3];
+
+   if (n_temps == 0) return;
+
+   next_is_write = VG_(malloc)(n_temps * sizeof(Bool));
+
+   for (i = 0; i < n_temps; i++) next_is_write[i] = True;
+
+   for (i = cb->used-1; i >= 0; i--) {
+      u = &cb->instrs[i];
+
+      /* If we're not checking address V bits, there will be a lot of
+         GETVs, TAG1s and TAG2s calculating values which are never
+         used.  These first three cases get rid of them. */
+
+      if (u->opcode == GETV && VGC_IS_SHADOW(u->val2) 
+                            && next_is_write[u->val2]
+                            && !SK_(clo_check_addrVs)) {
+         VG_(newNOP)(u);
+         if (dis) 
+            VG_(printf)("   at %2d: delete GETV\n", i);
+      } else
+
+      if (u->opcode == TAG1 && VGC_IS_SHADOW(u->val1) 
+                            && next_is_write[u->val1]
+                            && !SK_(clo_check_addrVs)) {
+         VG_(newNOP)(u);
+         if (dis) 
+            VG_(printf)("   at %2d: delete TAG1\n", i);
+      } else
+
+      if (u->opcode == TAG2 && VGC_IS_SHADOW(u->val2) 
+                            && next_is_write[u->val2]
+                            && !SK_(clo_check_addrVs)) {
+         VG_(newNOP)(u);
+         if (dis) 
+            VG_(printf)("   at %2d: delete TAG2\n", i);
+      } else
+
+      /* We do the rest of these regardless of whether or not
+         addresses are V-checked. */
+
+      if (u->opcode == MOV && VGC_IS_SHADOW(u->val2) 
+                           && next_is_write[u->val2]) {
+         /* This MOV is pointless because the target is dead at this
+            point.  Delete it. */
+         VG_(newNOP)(u);
+         if (dis) 
+            VG_(printf)("   at %2d: delete MOV\n", i);
+      } else
+
+      if (u->opcode == SETV) {
+         if (u->tag1 == TempReg) {
+            vg_assert(VGC_IS_SHADOW(u->val1));
+            if (next_is_write[u->val1]) {
+               /* This write is pointless, so annul it. */
+               VG_(newNOP)(u);
+               if (dis) 
+                  VG_(printf)("   at %2d: delete SETV\n", i);
+            } else {
+               /* This write has a purpose; don't annul it, but do
+                  notice that we did it. */
+               next_is_write[u->val1] = True;
+            }
+              
+         }
+
+      } else {
+         /* Find out what this insn does to the temps. */
+         k = VG_(getRegUsage)(u, TempReg, &tempUse[0]);
+         vg_assert(k <= 3);
+         for (j = k-1; j >= 0; j--) {
+            next_is_write[ tempUse[j].num ]
+                         = tempUse[j].isWrite;
+         }
+      }
+   }
+}
+
+
+/* Run forwards, propagating and using the is-completely-defined
+   property.  This removes a lot of redundant tag-munging code.
+   Unfortunately it requires intimate knowledge of how each uinstr and
+   tagop modifies its arguments.  This duplicates knowledge of uinstr
+   tempreg uses embodied in VG_(getRegUsage)(), which is unfortunate. 
+   The supplied UCodeBlock* is modified in-place.
+
+   For each value temp, def[] should hold VGC_VALUE.
+
+   For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is
+   definitely known to be fully defined at that size.  In all other
+   circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly
+   undefined.  In cases of doubt, VGC_UNDEF is always safe.
+*/
+static void vg_propagate_definedness ( UCodeBlock* cb )
+{
+   Int     i, j, k, t;
+   Int     n_temps = cb->nextTemp;
+   UChar*  def;
+   UInstr* u;
+   RegUse  tempUse[3];
+
+   if (n_temps == 0) return;
+
+   def = VG_(malloc)(n_temps * sizeof(UChar));
+
+   for (i = 0; i < n_temps; i++) 
+      def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE;
+
+   /* Run forwards, detecting and using the all-defined property. */
+
+   for (i = 0; i < cb->used; i++) {
+      u = &cb->instrs[i];
+      switch (u->opcode) {
+
+      /* Tag-handling uinstrs. */
+
+         /* Deal with these quickly. */
+         case NOP:
+         case INCEIP:
+            break;
+
+         /* Make a tag defined. */
+         case SETV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            def[u->val1] = u->size;
+            break;
+
+         /* Check definedness of a tag. */
+         case TESTV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] <= 4) { 
+               vg_assert(def[u->val1] == u->size); 
+               NOP_no_msg(u);
+               if (dis) 
+                  VG_(printf)("   at %2d: delete TESTV on defd arg\n", i);
+            }
+            break;
+
+         /* Applies to both values and tags.  Propagate Definedness
+            property through copies.  Note that this isn't optional;
+            we *have* to do this to keep def[] correct. */
+         case MOV:
+            vg_assert(u->tag2 == TempReg);
+            if (u->tag1 == TempReg) {
+               if (VGC_IS_SHADOW(u->val1)) {
+                  vg_assert(VGC_IS_SHADOW(u->val2));
+                  def[u->val2] = def[u->val1];
+               }
+            }
+            break;
+
+         case PUTV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] <= 4) {
+               vg_assert(def[u->val1] == u->size);
+               u->tag1 = Literal;
+               u->val1 = 0;
+               switch (u->size) {
+                  case 4: u->lit32 = 0x00000000; break;
+                  case 2: u->lit32 = 0xFFFF0000; break;
+                  case 1: u->lit32 = 0xFFFFFF00; break;
+                  default: VG_(panic)("vg_cleanup(PUTV)");
+               }
+               if (dis) 
+                  VG_(printf)(
+                     "   at %2d: propagate definedness into PUTV\n", i);
+            }
+            break;
+
+         case STOREV:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] <= 4) {
+               vg_assert(def[u->val1] == u->size);
+               u->tag1 = Literal;
+               u->val1 = 0;
+               switch (u->size) {
+                  case 4: u->lit32 = 0x00000000; break;
+                  case 2: u->lit32 = 0xFFFF0000; break;
+                  case 1: u->lit32 = 0xFFFFFF00; break;
+                  default: VG_(panic)("vg_cleanup(STOREV)");
+               }
+               if (dis) 
+                  VG_(printf)(
+                     "   at %2d: propagate definedness into STandV\n", i);
+            }
+            break;
+
+         /* Nothing interesting we can do with this, I think. */
+         case PUTVF:
+            break;
+
+         /* Tag handling operations. */
+         case TAG2:
+            vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
+            vg_assert(u->tag3 == Lit16);
+            /* Ultra-paranoid "type" checking. */
+            switch (u->val3) {
+               case Tag_ImproveAND4_TQ: case Tag_ImproveAND2_TQ:
+               case Tag_ImproveAND1_TQ: case Tag_ImproveOR4_TQ:
+               case Tag_ImproveOR2_TQ: case Tag_ImproveOR1_TQ:
+                  vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1));
+                  break;
+               default:
+                  vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+                  break;
+            }
+            switch (u->val3) {
+               Int sz;
+               case Tag_UifU4: 
+                  sz = 4; goto do_UifU;
+               case Tag_UifU2: 
+                  sz = 2; goto do_UifU;
+               case Tag_UifU1:
+                  sz = 1; goto do_UifU;
+               case Tag_UifU0:
+                  sz = 0; goto do_UifU;
+               do_UifU:
+                  vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+                  vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
+                  if (def[u->val1] <= 4) {
+                     /* UifU.  The first arg is defined, so result is
+                        simply second arg.  Delete this operation. */
+                     vg_assert(def[u->val1] == sz);
+                     NOP_no_msg(u);
+                     if (dis) 
+                        VG_(printf)(
+                           "   at %2d: delete UifU%d due to defd arg1\n", 
+                           i, sz);
+                  }
+                  else 
+                  if (def[u->val2] <= 4) {
+                     /* UifU.  The second arg is defined, so result is
+                        simply first arg.  Copy to second. */
+                     vg_assert(def[u->val2] == sz);
+                     u->opcode = MOV; 
+                     u->size = 4;
+                     u->tag3 = NoValue;
+                     def[u->val2] = def[u->val1];
+                     if (dis) 
+                        VG_(printf)(
+                           "   at %2d: change UifU%d to MOV due to defd"
+                           " arg2\n", 
+                           i, sz);
+                  }
+                  break;
+               case Tag_ImproveAND4_TQ:
+                  sz = 4; goto do_ImproveAND;
+               case Tag_ImproveAND1_TQ:
+                  sz = 1; goto do_ImproveAND;
+               do_ImproveAND:
+                  /* Implements Q = T OR Q.  So if Q is entirely defined,
+                     ie all 0s, we get MOV T, Q. */
+		  if (def[u->val2] <= 4) {
+                     vg_assert(def[u->val2] == sz);
+                     u->size = 4; /* Regardless of sz */
+                     u->opcode = MOV;
+                     u->tag3 = NoValue;
+                     def[u->val2] = VGC_UNDEF;
+                     if (dis) 
+                        VG_(printf)(
+                            "   at %2d: change ImproveAND%d_TQ to MOV due "
+                            "to defd arg2\n", 
+                            i, sz);
+                  }
+                  break;
+               default: 
+                  goto unhandled;
+            }
+            break;
+
+         case TAG1:
+            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
+            if (def[u->val1] > 4) break;
+            /* We now know that the arg to the op is entirely defined.
+               If the op changes the size of the arg, we must replace
+               it with a SETV at the new size.  If it doesn't change
+               the size, we can delete it completely. */
+            switch (u->val3) {
+               /* Maintain the same size ... */
+               case Tag_Left4: 
+                  vg_assert(def[u->val1] == 4);
+                  NOP_tag1_op(u);
+                  break;
+               case Tag_PCast11: 
+                  vg_assert(def[u->val1] == 1);
+                  NOP_tag1_op(u);
+                  break;
+               /* Change size ... */
+               case Tag_PCast40: 
+                  vg_assert(def[u->val1] == 4);
+                  SETV_tag1_op(u,0);
+                  def[u->val1] = 0;
+                  break;
+               case Tag_PCast14: 
+                  vg_assert(def[u->val1] == 1);
+                  SETV_tag1_op(u,4);
+                  def[u->val1] = 4;
+                  break;
+               case Tag_PCast12: 
+                  vg_assert(def[u->val1] == 1);
+                  SETV_tag1_op(u,2);
+                  def[u->val1] = 2;
+                  break;
+               case Tag_PCast10: 
+                  vg_assert(def[u->val1] == 1);
+                  SETV_tag1_op(u,0);
+                  def[u->val1] = 0;
+                  break;
+               case Tag_PCast02: 
+                  vg_assert(def[u->val1] == 0);
+                  SETV_tag1_op(u,2);
+                  def[u->val1] = 2;
+                  break;
+               default: 
+                  goto unhandled;
+            }
+            if (dis) 
+               VG_(printf)(
+                  "   at %2d: delete TAG1 %s due to defd arg\n",
+                  i, nameOfTagOp(u->val3));
+            break;
+
+         default:
+         unhandled:
+            /* We don't know how to handle this uinstr.  Be safe, and 
+               set to VGC_VALUE or VGC_UNDEF all temps written by it. */
+            k = VG_(getRegUsage)(u, TempReg, &tempUse[0]);
+            vg_assert(k <= 3);
+            for (j = 0; j < k; j++) {
+               t = tempUse[j].num;
+               vg_assert(t >= 0 && t < n_temps);
+               if (!tempUse[j].isWrite) {
+                  /* t is read; ignore it. */
+                  if (0&& VGC_IS_SHADOW(t) && def[t] <= 4)
+                     VG_(printf)("ignoring def %d at %s %s\n", 
+                                 def[t], 
+                                 VG_(nameUOpcode)(True, u->opcode),
+                                 (u->opcode == TAG1 || u->opcode == TAG2)
+                                    ? nameOfTagOp(u->val3) 
+                                    : (Char*)"");
+               } else {
+                  /* t is written; better nullify it. */
+                  def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE;
+               }
+            }
+      }
+   }
+}
+
+
+/* Top level post-MemCheck-instrumentation cleanup function. */
+static void vg_cleanup ( UCodeBlock* cb )
+{
+   vg_propagate_definedness ( cb );
+   vg_delete_redundant_SETVs ( cb );
+}
+
+
+/* Caller will print out final instrumented code if necessary;  we
+   print out intermediate instrumented code here if necessary. */
+UCodeBlock* SK_(instrument) ( UCodeBlock* cb, Addr not_used )
+{
+   cb = memcheck_instrument ( cb );
+   if (SK_(clo_cleanup)) {
+      if (dis) {
+         VG_(ppUCodeBlock) ( cb, "Unimproved instrumented UCode:" );
+         VG_(printf)("Instrumentation improvements:\n");
+      }
+      vg_cleanup(cb);
+      if (dis) VG_(printf)("\n");
+   }
+   return cb;
+}
+
+#undef dis
+
+/*--------------------------------------------------------------------*/
+/*--- end                                  vg_memcheck_translate.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_memory.c b/vg_memory.c
index eea79cb..5ea4246 100644
--- a/vg_memory.c
+++ b/vg_memory.c
@@ -1,7 +1,7 @@
 
 /*--------------------------------------------------------------------*/
-/*--- Maintain bitmaps of memory, tracking the accessibility (A)   ---*/
-/*--- and validity (V) status of each byte.                        ---*/
+/*--- Memory-related stuff: segment initialisation and tracking,   ---*/
+/*--- stack operations                                             ---*/
 /*---                                                  vg_memory.c ---*/
 /*--------------------------------------------------------------------*/
 
@@ -27,1275 +27,208 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
 
-/* Define to debug the mem audit system. */
-/* #define VG_DEBUG_MEMORY */
 
-/* Define to debug the memory-leak-detector. */
-/* #define VG_DEBUG_LEAKCHECK */
+/*--------------------------------------------------------------*/
+/*--- Initialise program data/text etc on program startup.   ---*/
+/*--------------------------------------------------------------*/
 
-/* Define to collect detailed performance info. */
-/* #define VG_PROFILE_MEMORY */
-
-
-/*------------------------------------------------------------*/
-/*--- Low-level support for memory checking.               ---*/
-/*------------------------------------------------------------*/
-
-/* 
-   All reads and writes are checked against a memory map, which
-   records the state of all memory in the process.  The memory map is
-   organised like this:
-
-   The top 16 bits of an address are used to index into a top-level
-   map table, containing 65536 entries.  Each entry is a pointer to a
-   second-level map, which records the accesibililty and validity
-   permissions for the 65536 bytes indexed by the lower 16 bits of the
-   address.  Each byte is represented by nine bits, one indicating
-   accessibility, the other eight validity.  So each second-level map
-   contains 73728 bytes.  This two-level arrangement conveniently
-   divides the 4G address space into 64k lumps, each size 64k bytes.
-
-   All entries in the primary (top-level) map must point to a valid
-   secondary (second-level) map.  Since most of the 4G of address
-   space will not be in use -- ie, not mapped at all -- there is a
-   distinguished secondary map, which indicates `not addressible and
-   not valid' writeable for all bytes.  Entries in the primary map for
-   which the entire 64k is not in use at all point at this
-   distinguished map.
-
-   [...] lots of stuff deleted due to out of date-ness
-
-   As a final optimisation, the alignment and address checks for
-   4-byte loads and stores are combined in a neat way.  The primary
-   map is extended to have 262144 entries (2^18), rather than 2^16.
-   The top 3/4 of these entries are permanently set to the
-   distinguished secondary map.  For a 4-byte load/store, the
-   top-level map is indexed not with (addr >> 16) but instead f(addr),
-   where
-
-    f( XXXX XXXX XXXX XXXX ____ ____ ____ __YZ )
-        = ____ ____ ____ __YZ XXXX XXXX XXXX XXXX  or 
-        = ____ ____ ____ __ZY XXXX XXXX XXXX XXXX
-
-   ie the lowest two bits are placed above the 16 high address bits.
-   If either of these two bits are nonzero, the address is misaligned;
-   this will select a secondary map from the upper 3/4 of the primary
-   map.  Because this is always the distinguished secondary map, a
-   (bogus) address check failure will result.  The failure handling
-   code can then figure out whether this is a genuine addr check
-   failure or whether it is a possibly-legitimate access at a
-   misaligned address.  
-*/
-
-
-/*------------------------------------------------------------*/
-/*--- Crude profiling machinery.                           ---*/
-/*------------------------------------------------------------*/
-
-#ifdef VG_PROFILE_MEMORY
-
-#define N_PROF_EVENTS 150
-
-static UInt event_ctr[N_PROF_EVENTS];
-
-static void init_prof_mem ( void )
-{
-   Int i;
-   for (i = 0; i < N_PROF_EVENTS; i++)
-      event_ctr[i] = 0;
-}
-
-void VG_(done_prof_mem) ( void )
-{
-   Int i;
-   for (i = 0; i < N_PROF_EVENTS; i++) {
-      if ((i % 10) == 0) 
-         VG_(printf)("\n");
-      if (event_ctr[i] > 0)
-         VG_(printf)( "prof mem event %2d: %d\n", i, event_ctr[i] );
+typedef
+   struct _ExeSeg {
+      Addr start;
+      UInt size;
+      struct _ExeSeg* next;
    }
-   VG_(printf)("\n");
-}
+   ExeSeg;
 
-#define PROF_EVENT(ev)                                  \
-   do { vg_assert((ev) >= 0 && (ev) < N_PROF_EVENTS);   \
-        event_ctr[ev]++;                                \
-   } while (False);
+/* The list of current executable segments loaded.  Required so that when a
+   segment is munmap'd, if it's executable we can recognise it as such and
+   invalidate translations for it, and drop any basic-block specific
+   information being stored.  If symbols are being used, this list will have
+   the same segments recorded in it as the SegInfo symbols list (but much
+   less information about each segment).
+*/
+static ExeSeg* exeSegsHead = NULL;
 
-#else
-
-static void init_prof_mem ( void ) { }
-       void VG_(done_prof_mem) ( void ) { }
-
-#define PROF_EVENT(ev) /* */
-
-#endif
-
-/* Event index.  If just the name of the fn is given, this means the
-   number of calls to the fn.  Otherwise it is the specified event.
-
-   10   alloc_secondary_map
-
-   20   get_abit
-   21   get_vbyte
-   22   set_abit
-   23   set_vbyte
-   24   get_abits4_ALIGNED
-   25   get_vbytes4_ALIGNED
-
-   30   set_address_range_perms
-   31   set_address_range_perms(lower byte loop)
-   32   set_address_range_perms(quadword loop)
-   33   set_address_range_perms(upper byte loop)
+/* Prepend it -- mmaps/munmaps likely to follow a stack pattern(?) so this
+   is good.
+   Also check no segments overlap, which would be very bad.  Check is linear
+   for each seg added (quadratic overall) but the total number should be
+   small (konqueror has around 50 --njn). */
+static void add_exe_segment_to_list( a, len ) 
+{
+   Addr lo = a;
+   Addr hi = a + len - 1;
+   ExeSeg* es;
+   ExeSeg* es2;
    
-   35   make_noaccess
-   36   make_writable
-   37   make_readable
+   /* Prepend it */
+   es        = (ExeSeg*)VG_(arena_malloc)(VG_AR_CORE, sizeof(ExeSeg));
+   es->start = a;
+   es->size  = len;
+   es->next  = exeSegsHead;
+   exeSegsHead = es;
 
-   40   copy_address_range_perms
-   41   copy_address_range_perms(byte loop)
-   42   check_writable
-   43   check_writable(byte loop)
-   44   check_readable
-   45   check_readable(byte loop)
-   46   check_readable_asciiz
-   47   check_readable_asciiz(byte loop)
-
-   50   make_aligned_word_NOACCESS
-   51   make_aligned_word_WRITABLE
-
-   60   helperc_LOADV4
-   61   helperc_STOREV4
-   62   helperc_LOADV2
-   63   helperc_STOREV2
-   64   helperc_LOADV1
-   65   helperc_STOREV1
-
-   70   rim_rd_V4_SLOWLY
-   71   rim_wr_V4_SLOWLY
-   72   rim_rd_V2_SLOWLY
-   73   rim_wr_V2_SLOWLY
-   74   rim_rd_V1_SLOWLY
-   75   rim_wr_V1_SLOWLY
-
-   80   fpu_read
-   81   fpu_read aligned 4
-   82   fpu_read aligned 8
-   83   fpu_read 2
-   84   fpu_read 10
-
-   85   fpu_write
-   86   fpu_write aligned 4
-   87   fpu_write aligned 8
-   88   fpu_write 2
-   89   fpu_write 10
-
-   90   fpu_read_check_SLOWLY
-   91   fpu_read_check_SLOWLY(byte loop)
-   92   fpu_write_check_SLOWLY
-   93   fpu_write_check_SLOWLY(byte loop)
-
-   100  is_plausible_stack_addr
-   101  handle_esp_assignment
-   102  handle_esp_assignment(-4)
-   103  handle_esp_assignment(+4)
-   104  handle_esp_assignment(-12)
-   105  handle_esp_assignment(-8)
-   106  handle_esp_assignment(+16)
-   107  handle_esp_assignment(+12)
-   108  handle_esp_assignment(0)
-   109  handle_esp_assignment(+8)
-   110  handle_esp_assignment(-16)
-   111  handle_esp_assignment(+20)
-   112  handle_esp_assignment(-20)
-   113  handle_esp_assignment(+24)
-   114  handle_esp_assignment(-24)
-
-   120  vg_handle_esp_assignment_SLOWLY
-   121  vg_handle_esp_assignment_SLOWLY(normal; move down)
-   122  vg_handle_esp_assignment_SLOWLY(normal; move up)
-   123  vg_handle_esp_assignment_SLOWLY(normal)
-   124  vg_handle_esp_assignment_SLOWLY(>= HUGE_DELTA)
-*/
-
-/*------------------------------------------------------------*/
-/*--- Function declarations.                               ---*/
-/*------------------------------------------------------------*/
-
-/* Set permissions for an address range.  Not speed-critical. */
-void VGM_(make_noaccess) ( Addr a, UInt len );
-void VGM_(make_writable) ( Addr a, UInt len );
-void VGM_(make_readable) ( Addr a, UInt len );
-
-/* Check permissions for an address range.  Not speed-critical. */
-Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr );
-Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr );
-Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr );
-
-static UInt vgm_rd_V4_SLOWLY ( Addr a );
-static UInt vgm_rd_V2_SLOWLY ( Addr a );
-static UInt vgm_rd_V1_SLOWLY ( Addr a );
-static void vgm_wr_V4_SLOWLY ( Addr a, UInt vbytes );
-static void vgm_wr_V2_SLOWLY ( Addr a, UInt vbytes );
-static void vgm_wr_V1_SLOWLY ( Addr a, UInt vbytes );
-static void fpu_read_check_SLOWLY ( Addr addr, Int size );
-static void fpu_write_check_SLOWLY ( Addr addr, Int size );
-
-
-/*------------------------------------------------------------*/
-/*--- Data defns.                                          ---*/
-/*------------------------------------------------------------*/
-
-typedef 
-   struct {
-      UChar abits[8192];
-      UChar vbyte[65536];
-   }
-   SecMap;
-
-/* These two are statically allocated.  Should they be non-public? */
-SecMap* VG_(primary_map)[ /*65536*/ 262144 ];
-static SecMap  vg_distinguished_secondary_map;
-
-#define IS_DISTINGUISHED_SM(smap) \
-   ((smap) == &vg_distinguished_secondary_map)
-
-#define ENSURE_MAPPABLE(addr,caller)                                   \
-   do {                                                                \
-      if (IS_DISTINGUISHED_SM(VG_(primary_map)[(addr) >> 16])) {       \
-         VG_(primary_map)[(addr) >> 16] = alloc_secondary_map(caller); \
-         /* VG_(printf)("new 2map because of %p\n", addr);   */       \
-      }                                                                \
-   } while(0)
-
-#define BITARR_SET(aaa_p,iii_p)                         \
-   do {                                                 \
-      UInt   iii = (UInt)iii_p;                         \
-      UChar* aaa = (UChar*)aaa_p;                       \
-      aaa[iii >> 3] |= (1 << (iii & 7));                \
-   } while (0)
-
-#define BITARR_CLEAR(aaa_p,iii_p)                       \
-   do {                                                 \
-      UInt   iii = (UInt)iii_p;                         \
-      UChar* aaa = (UChar*)aaa_p;                       \
-      aaa[iii >> 3] &= ~(1 << (iii & 7));               \
-   } while (0)
-
-#define BITARR_TEST(aaa_p,iii_p)                        \
-      (0 != (((UChar*)aaa_p)[ ((UInt)iii_p) >> 3 ]      \
-               & (1 << (((UInt)iii_p) & 7))))           \
-
-
-#define VGM_BIT_VALID      0
-#define VGM_BIT_INVALID    1
-
-#define VGM_NIBBLE_VALID   0
-#define VGM_NIBBLE_INVALID 0xF
-
-#define VGM_BYTE_VALID     0
-#define VGM_BYTE_INVALID   0xFF
-
-/* Now in vg_include.h.
-#define VGM_WORD_VALID     0
-#define VGM_WORD_INVALID   0xFFFFFFFF
-*/
-
-#define VGM_EFLAGS_VALID   0xFFFFFFFE
-#define VGM_EFLAGS_INVALID 0xFFFFFFFF
-
-
-#define IS_ALIGNED4_ADDR(aaa_p) (0 == (((UInt)(aaa_p)) & 3))
-
-
-/*------------------------------------------------------------*/
-/*--- Basic bitmap management, reading and writing.        ---*/
-/*------------------------------------------------------------*/
-
-/* Allocate and initialise a secondary map. */
-
-static SecMap* alloc_secondary_map ( __attribute__ ((unused)) 
-                                     Char* caller )
-{
-   SecMap* map;
-   UInt  i;
-   PROF_EVENT(10);
-
-   /* Mark all bytes as invalid access and invalid value. */
-
-   /* It just happens that a SecMap occupies exactly 18 pages --
-      although this isn't important, so the following assert is
-      spurious. */
-   vg_assert(0 == (sizeof(SecMap) % VKI_BYTES_PER_PAGE));
-   map = VG_(get_memory_from_mmap)( sizeof(SecMap), caller );
-
-   for (i = 0; i < 8192; i++)
-      map->abits[i] = VGM_BYTE_INVALID; /* Invalid address */
-   for (i = 0; i < 65536; i++)
-      map->vbyte[i] = VGM_BYTE_INVALID; /* Invalid Value */
-
-   /* VG_(printf)("ALLOC_2MAP(%s)\n", caller ); */
-   return map;
-}
-
-
-/* Basic reading/writing of the bitmaps, for byte-sized accesses. */
-
-static __inline__ UChar get_abit ( Addr a )
-{
-   SecMap* sm     = VG_(primary_map)[a >> 16];
-   UInt    sm_off = a & 0xFFFF;
-   PROF_EVENT(20);
-   return BITARR_TEST(sm->abits, sm_off) 
-             ? VGM_BIT_INVALID : VGM_BIT_VALID;
-}
-
-static __inline__ UChar get_vbyte ( Addr a )
-{
-   SecMap* sm     = VG_(primary_map)[a >> 16];
-   UInt    sm_off = a & 0xFFFF;
-   PROF_EVENT(21);
-   return sm->vbyte[sm_off];
-}
-
-static __inline__ void set_abit ( Addr a, UChar abit )
-{
-   SecMap* sm;
-   UInt    sm_off;
-   PROF_EVENT(22);
-   ENSURE_MAPPABLE(a, "set_abit");
-   sm     = VG_(primary_map)[a >> 16];
-   sm_off = a & 0xFFFF;
-   if (abit) 
-      BITARR_SET(sm->abits, sm_off);
-   else
-      BITARR_CLEAR(sm->abits, sm_off);
-}
-
-static __inline__ void set_vbyte ( Addr a, UChar vbyte )
-{
-   SecMap* sm;
-   UInt    sm_off;
-   PROF_EVENT(23);
-   ENSURE_MAPPABLE(a, "set_vbyte");
-   sm     = VG_(primary_map)[a >> 16];
-   sm_off = a & 0xFFFF;
-   sm->vbyte[sm_off] = vbyte;
-}
-
-
-/* Reading/writing of the bitmaps, for aligned word-sized accesses. */
-
-static __inline__ UChar get_abits4_ALIGNED ( Addr a )
-{
-   SecMap* sm;
-   UInt    sm_off;
-   UChar   abits8;
-   PROF_EVENT(24);
-#  ifdef VG_DEBUG_MEMORY
-   vg_assert(IS_ALIGNED4_ADDR(a));
-#  endif
-   sm     = VG_(primary_map)[a >> 16];
-   sm_off = a & 0xFFFF;
-   abits8 = sm->abits[sm_off >> 3];
-   abits8 >>= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
-   abits8 &= 0x0F;
-   return abits8;
-}
-
-static UInt __inline__ get_vbytes4_ALIGNED ( Addr a )
-{
-   SecMap* sm     = VG_(primary_map)[a >> 16];
-   UInt    sm_off = a & 0xFFFF;
-   PROF_EVENT(25);
-#  ifdef VG_DEBUG_MEMORY
-   vg_assert(IS_ALIGNED4_ADDR(a));
-#  endif
-   return ((UInt*)(sm->vbyte))[sm_off >> 2];
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Setting permissions over address ranges.             ---*/
-/*------------------------------------------------------------*/
-
-static void set_address_range_perms ( Addr a, UInt len, 
-                                      UInt example_a_bit,
-                                      UInt example_v_bit )
-{
-   UChar   vbyte, abyte8;
-   UInt    vword4, sm_off;
-   SecMap* sm;
-
-   PROF_EVENT(30);
-
-   if (len == 0)
-      return;
-
-   if (len > 100 * 1000 * 1000) 
-      VG_(message)(Vg_UserMsg, 
-                   "Warning: set address range perms: "
-                   "large range %d, a %d, v %d",
-                   len, example_a_bit, example_v_bit );
-
-   VGP_PUSHCC(VgpSARP);
-
-   /* Requests to change permissions of huge address ranges may
-      indicate bugs in our machinery.  30,000,000 is arbitrary, but so
-      far all legitimate requests have fallen beneath that size. */
-   /* 4 Mar 02: this is just stupid; get rid of it. */
-   /* vg_assert(len < 30000000); */
-
-   /* Check the permissions make sense. */
-   vg_assert(example_a_bit == VGM_BIT_VALID 
-             || example_a_bit == VGM_BIT_INVALID);
-   vg_assert(example_v_bit == VGM_BIT_VALID 
-             || example_v_bit == VGM_BIT_INVALID);
-   if (example_a_bit == VGM_BIT_INVALID)
-      vg_assert(example_v_bit == VGM_BIT_INVALID);
-
-   /* The validity bits to write. */
-   vbyte = example_v_bit==VGM_BIT_VALID 
-              ? VGM_BYTE_VALID : VGM_BYTE_INVALID;
-
-   /* In order that we can charge through the address space at 8
-      bytes/main-loop iteration, make up some perms. */
-   abyte8 = (example_a_bit << 7)
-            | (example_a_bit << 6)
-            | (example_a_bit << 5)
-            | (example_a_bit << 4)
-            | (example_a_bit << 3)
-            | (example_a_bit << 2)
-            | (example_a_bit << 1)
-            | (example_a_bit << 0);
-   vword4 = (vbyte << 24) | (vbyte << 16) | (vbyte << 8) | vbyte;
-
-#  ifdef VG_DEBUG_MEMORY
-   /* Do it ... */
-   while (True) {
-      PROF_EVENT(31);
-      if (len == 0) break;
-      set_abit ( a, example_a_bit );
-      set_vbyte ( a, vbyte );
-      a++;
-      len--;
-   }
-
-#  else
-   /* Slowly do parts preceding 8-byte alignment. */
-   while (True) {
-      PROF_EVENT(31);
-      if (len == 0) break;
-      if ((a % 8) == 0) break;
-      set_abit ( a, example_a_bit );
-      set_vbyte ( a, vbyte );
-      a++;
-      len--;
-   }   
-
-   if (len == 0) {
-      VGP_POPCC;
-      return;
-   }
-   vg_assert((a % 8) == 0 && len > 0);
-
-   /* Once aligned, go fast. */
-   while (True) {
-      PROF_EVENT(32);
-      if (len < 8) break;
-      ENSURE_MAPPABLE(a, "set_address_range_perms(fast)");
-      sm = VG_(primary_map)[a >> 16];
-      sm_off = a & 0xFFFF;
-      sm->abits[sm_off >> 3] = abyte8;
-      ((UInt*)(sm->vbyte))[(sm_off >> 2) + 0] = vword4;
-      ((UInt*)(sm->vbyte))[(sm_off >> 2) + 1] = vword4;
-      a += 8;
-      len -= 8;
-   }
-
-   if (len == 0) {
-      VGP_POPCC;
-      return;
-   }
-   vg_assert((a % 8) == 0 && len > 0 && len < 8);
-
-   /* Finish the upper fragment. */
-   while (True) {
-      PROF_EVENT(33);
-      if (len == 0) break;
-      set_abit ( a, example_a_bit );
-      set_vbyte ( a, vbyte );
-      a++;
-      len--;
-   }   
-#  endif
-
-   /* Check that zero page and highest page have not been written to
-      -- this could happen with buggy syscall wrappers.  Today
-      (2001-04-26) had precisely such a problem with
-      __NR_setitimer. */
-   vg_assert(VG_(first_and_last_secondaries_look_plausible)());
-   VGP_POPCC;
-}
-
-
-/* Set permissions for address ranges ... */
-
-void VGM_(make_noaccess) ( Addr a, UInt len )
-{
-   PROF_EVENT(35);
-   set_address_range_perms ( a, len, VGM_BIT_INVALID, VGM_BIT_INVALID );
-}
-
-void VGM_(make_writable) ( Addr a, UInt len )
-{
-   PROF_EVENT(36);
-   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_INVALID );
-}
-
-void VGM_(make_readable) ( Addr a, UInt len )
-{
-   PROF_EVENT(37);
-   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID );
-}
-
-void VGM_(make_readwritable) ( Addr a, UInt len )
-{
-   PROF_EVENT(38);
-   set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID );
-}
-
-/* Block-copy permissions (needed for implementing realloc()). */
-
-void VGM_(copy_address_range_perms) ( Addr src, Addr dst, UInt len )
-{
-   UInt i;
-   PROF_EVENT(40);
-   for (i = 0; i < len; i++) {
-      UChar abit  = get_abit ( src+i );
-      UChar vbyte = get_vbyte ( src+i );
-      PROF_EVENT(41);
-      set_abit ( dst+i, abit );
-      set_vbyte ( dst+i, vbyte );
-   }
-}
-
-
-/* Check permissions for address range.  If inadequate permissions
-   exist, *bad_addr is set to the offending address, so the caller can
-   know what it is. */
-
-Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr )
-{
-   UInt  i;
-   UChar abit;
-   PROF_EVENT(42);
-   for (i = 0; i < len; i++) {
-      PROF_EVENT(43);
-      abit = get_abit(a);
-      if (abit == VGM_BIT_INVALID) {
-         if (bad_addr != NULL) *bad_addr = a;
-         return False;
+   /* Check there's no overlap with the rest of the list */
+   for (es2 = es->next; es2 != NULL; es2 = es2->next) {
+      Addr lo2 = es2->start;
+      Addr hi2 = es2->start + es2->size - 1;
+      Bool overlap;
+      vg_assert(lo < hi);
+      vg_assert(lo2 < hi2);
+      /* the main assertion */
+      overlap = (lo <= lo2 && lo2 <= hi)
+                 || (lo <= hi2 && hi2 <= hi);
+      if (overlap) {
+         VG_(printf)("\n\nOVERLAPPING EXE SEGMENTS\n"
+                     "  new: start %p, size %d\n"
+                     "  old: start %p, size %d\n\n",
+                     es->start, es->size, es2->start, es2->size );
+         vg_assert(! overlap);
       }
-      a++;
    }
+}
+
+static Bool remove_if_exe_segment_from_list( Addr a, UInt len )
+{
+   ExeSeg **prev_next_ptr = & exeSegsHead, 
+          *curr = exeSegsHead;
+
+   while (True) {
+      if (curr == NULL) break;
+      if (a == curr->start) break;
+      prev_next_ptr = &curr->next;
+      curr = curr->next;
+   }
+   if (curr == NULL)
+      return False;
+
+   vg_assert(*prev_next_ptr == curr);
+
+   *prev_next_ptr = curr->next;
+
+   VG_(arena_free)(VG_AR_CORE, curr);
    return True;
 }
 
-Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr )
+/* Records the exe segment in the ExeSeg list (checking for overlaps), and
+   reads debug info if required.  Note the entire /proc/pid/maps file is 
+   read for the debug info, but it just reads symbols for newly added exe
+   segments.  This is required to find out their names if they have one.  So
+   we don't use this at startup because it's overkill and can screw reading
+   of /proc/pid/maps.
+ */
+void VG_(new_exe_segment) ( Addr a, UInt len )
 {
-   UInt  i;
-   UChar abit;
-   UChar vbyte;
-   PROF_EVENT(44);
-   for (i = 0; i < len; i++) {
-      abit  = get_abit(a);
-      vbyte = get_vbyte(a);
-      PROF_EVENT(45);
-      if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) {
-         if (bad_addr != NULL) *bad_addr = a;
-         return False;
-      }
-      a++;
-   }
-   return True;
+   // SSS: only bother if size != 0?  Does that happen? (probably can)
+
+   add_exe_segment_to_list( a, len );
+   VG_(maybe_read_symbols)();
 }
 
-
-/* Check a zero-terminated ascii string.  Tricky -- don't want to
-   examine the actual bytes, to find the end, until we're sure it is
-   safe to do so. */
-
-Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr )
+/* Invalidate translations as necessary (also discarding any basic
+   block-specific info retained by the skin) and unload any debug
+   symbols. */
+// Nb: remove_if_exe_segment_from_list() and VG_(maybe_unload_symbols)()
+// both ignore 'len', but that seems that's ok for most programs...  see
+// comment above vg_syscalls.c:mmap_segment() et al for more details.
+void VG_(remove_if_exe_segment) ( Addr a, UInt len )
 {
-   UChar abit;
-   UChar vbyte;
-   PROF_EVENT(46);
-   while (True) {
-      PROF_EVENT(47);
-      abit  = get_abit(a);
-      vbyte = get_vbyte(a);
-      if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) {
-         if (bad_addr != NULL) *bad_addr = a;
-         return False;
-      }
-      /* Ok, a is safe to read. */
-      if (* ((UChar*)a) == 0) return True;
-      a++;
+   if (remove_if_exe_segment_from_list( a, len )) {
+      VG_(invalidate_translations) ( a, len );
+      VG_(maybe_unload_symbols)    ( a, len );
    }
 }
 
 
-/* Setting permissions for aligned words.  This supports fast stack
-   operations. */
-
-static __inline__ void make_aligned_word_NOACCESS ( Addr a )
+static
+void startup_segment_callback ( Addr start, UInt size, 
+                                Char rr, Char ww, Char xx, 
+                                UInt foffset, UChar* filename )
 {
-   SecMap* sm;
-   UInt    sm_off;
-   UChar   mask;
-   PROF_EVENT(50);
-#  ifdef VG_DEBUG_MEMORY
-   vg_assert(IS_ALIGNED4_ADDR(a));
-#  endif
-   ENSURE_MAPPABLE(a, "make_aligned_word_NOACCESS");
-   sm     = VG_(primary_map)[a >> 16];
-   sm_off = a & 0xFFFF;
-   ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID;
-   mask = 0x0F;
-   mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
-   /* mask now contains 1s where we wish to make address bits
-      invalid (1s). */
-   sm->abits[sm_off >> 3] |= mask;
-}
+   UInt r_esp;
+   Bool is_stack_segment;
 
-static __inline__ void make_aligned_word_WRITABLE ( Addr a )
-{
-   SecMap* sm;
-   UInt    sm_off;
-   UChar   mask;
-   PROF_EVENT(51);
-#  ifdef VG_DEBUG_MEMORY
-   vg_assert(IS_ALIGNED4_ADDR(a));
-#  endif
-   ENSURE_MAPPABLE(a, "make_aligned_word_WRITABLE");
-   sm     = VG_(primary_map)[a >> 16];
-   sm_off = a & 0xFFFF;
-   ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID;
-   mask = 0x0F;
-   mask <<= (a & 4 /* 100b */);   /* a & 4 is either 0 or 4 */
-   /* mask now contains 1s where we wish to make address bits
-      invalid (0s). */
-   sm->abits[sm_off >> 3] &= ~mask;
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Functions called directly from generated code.       ---*/
-/*------------------------------------------------------------*/
-
-static __inline__ UInt rotateRight16 ( UInt x )
-{
-   /* Amazingly, gcc turns this into a single rotate insn. */
-   return (x >> 16) | (x << 16);
-}
-
-
-static __inline__ UInt shiftRight16 ( UInt x )
-{
-   return x >> 16;
-}
-
-
-/* Read/write 1/2/4 sized V bytes, and emit an address error if
-   needed. */
-
-/* VG_(helperc_{LD,ST}V{1,2,4}) handle the common case fast.
-   Under all other circumstances, it defers to the relevant _SLOWLY
-   function, which can handle all situations.
-*/
-UInt VG_(helperc_LOADV4) ( Addr a )
-{
-#  ifdef VG_DEBUG_MEMORY
-   return vgm_rd_V4_SLOWLY(a);
-#  else
-   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   UChar   abits  = sm->abits[a_off];
-   abits >>= (a & 4);
-   abits &= 15;
-   PROF_EVENT(60);
-   if (abits == VGM_NIBBLE_VALID) {
-      /* Handle common case quickly: a is suitably aligned, is mapped,
-         and is addressible. */
-      UInt v_off = a & 0xFFFF;
-      return ((UInt*)(sm->vbyte))[ v_off >> 2 ];
-   } else {
-      /* Slow but general case. */
-      return vgm_rd_V4_SLOWLY(a);
-   }
-#  endif
-}
-
-void VG_(helperc_STOREV4) ( Addr a, UInt vbytes )
-{
-#  ifdef VG_DEBUG_MEMORY
-   vgm_wr_V4_SLOWLY(a, vbytes);
-#  else
-   UInt    sec_no = rotateRight16(a) & 0x3FFFF;
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   UChar   abits  = sm->abits[a_off];
-   abits >>= (a & 4);
-   abits &= 15;
-   PROF_EVENT(61);
-   if (abits == VGM_NIBBLE_VALID) {
-      /* Handle common case quickly: a is suitably aligned, is mapped,
-         and is addressible. */
-      UInt v_off = a & 0xFFFF;
-      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = vbytes;
-   } else {
-      /* Slow but general case. */
-      vgm_wr_V4_SLOWLY(a, vbytes);
-   }
-#  endif
-}
-
-UInt VG_(helperc_LOADV2) ( Addr a )
-{
-#  ifdef VG_DEBUG_MEMORY
-   return vgm_rd_V2_SLOWLY(a);
-#  else
-   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   PROF_EVENT(62);
-   if (sm->abits[a_off] == VGM_BYTE_VALID) {
-      /* Handle common case quickly. */
-      UInt v_off = a & 0xFFFF;
-      return 0xFFFF0000 
-             |  
-             (UInt)( ((UShort*)(sm->vbyte))[ v_off >> 1 ] );
-   } else {
-      /* Slow but general case. */
-      return vgm_rd_V2_SLOWLY(a);
-   }
-#  endif
-}
-
-void VG_(helperc_STOREV2) ( Addr a, UInt vbytes )
-{
-#  ifdef VG_DEBUG_MEMORY
-   vgm_wr_V2_SLOWLY(a, vbytes);
-#  else
-   UInt    sec_no = rotateRight16(a) & 0x1FFFF;
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   PROF_EVENT(63);
-   if (sm->abits[a_off] == VGM_BYTE_VALID) {
-      /* Handle common case quickly. */
-      UInt v_off = a & 0xFFFF;
-      ((UShort*)(sm->vbyte))[ v_off >> 1 ] = vbytes & 0x0000FFFF;
-   } else {
-      /* Slow but general case. */
-      vgm_wr_V2_SLOWLY(a, vbytes);
-   }
-#  endif
-}
-
-UInt VG_(helperc_LOADV1) ( Addr a )
-{
-#  ifdef VG_DEBUG_MEMORY
-   return vgm_rd_V1_SLOWLY(a);
-#  else
-   UInt    sec_no = shiftRight16(a);
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   PROF_EVENT(64);
-   if (sm->abits[a_off] == VGM_BYTE_VALID) {
-      /* Handle common case quickly. */
-      UInt v_off = a & 0xFFFF;
-      return 0xFFFFFF00
-             |
-             (UInt)( ((UChar*)(sm->vbyte))[ v_off ] );
-   } else {
-      /* Slow but general case. */
-      return vgm_rd_V1_SLOWLY(a);
-   }
-#  endif
-}
-
-void VG_(helperc_STOREV1) ( Addr a, UInt vbytes )
-{
-#  ifdef VG_DEBUG_MEMORY
-   vgm_wr_V1_SLOWLY(a, vbytes);
-#  else
-   UInt    sec_no = shiftRight16(a);
-   SecMap* sm     = VG_(primary_map)[sec_no];
-   UInt    a_off  = (a & 0xFFFF) >> 3;
-   PROF_EVENT(65);
-   if (sm->abits[a_off] == VGM_BYTE_VALID) {
-      /* Handle common case quickly. */
-      UInt v_off = a & 0xFFFF;
-      ((UChar*)(sm->vbyte))[ v_off ] = vbytes & 0x000000FF;
-   } else {
-      /* Slow but general case. */
-      vgm_wr_V1_SLOWLY(a, vbytes);
-   }
-#  endif
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Fallback functions to handle cases that the above    ---*/
-/*--- VG_(helperc_{LD,ST}V{1,2,4}) can't manage.           ---*/
-/*------------------------------------------------------------*/
-
-static UInt vgm_rd_V4_SLOWLY ( Addr a )
-{
-   Bool a0ok, a1ok, a2ok, a3ok;
-   UInt vb0, vb1, vb2, vb3;
-
-   PROF_EVENT(70);
-
-   /* First establish independently the addressibility of the 4 bytes
-      involved. */
-   a0ok = get_abit(a+0) == VGM_BIT_VALID;
-   a1ok = get_abit(a+1) == VGM_BIT_VALID;
-   a2ok = get_abit(a+2) == VGM_BIT_VALID;
-   a3ok = get_abit(a+3) == VGM_BIT_VALID;
-
-   /* Also get the validity bytes for the address. */
-   vb0 = (UInt)get_vbyte(a+0);
-   vb1 = (UInt)get_vbyte(a+1);
-   vb2 = (UInt)get_vbyte(a+2);
-   vb3 = (UInt)get_vbyte(a+3);
-
-   /* Now distinguish 3 cases */
-
-   /* Case 1: the address is completely valid, so:
-      - no addressing error
-      - return V bytes as read from memory
-   */
-   if (a0ok && a1ok && a2ok && a3ok) {
-      UInt vw = VGM_WORD_INVALID;
-      vw <<= 8; vw |= vb3;
-      vw <<= 8; vw |= vb2;
-      vw <<= 8; vw |= vb1;
-      vw <<= 8; vw |= vb0;
-      return vw;
-   }
-
-   /* Case 2: the address is completely invalid.  
-      - emit addressing error
-      - return V word indicating validity.  
-      This sounds strange, but if we make loads from invalid addresses 
-      give invalid data, we also risk producing a number of confusing
-      undefined-value errors later, which confuses the fact that the
-      error arose in the first place from an invalid address. 
-   */
-   /* VG_(printf)("%p (%d %d %d %d)\n", a, a0ok, a1ok, a2ok, a3ok); */
-   if (!VG_(clo_partial_loads_ok) 
-       || ((a & 3) != 0)
-       || (!a0ok && !a1ok && !a2ok && !a3ok)) {
-      VG_(record_address_error)( a, 4, False );
-      return (VGM_BYTE_VALID << 24) | (VGM_BYTE_VALID << 16) 
-             | (VGM_BYTE_VALID << 8) | VGM_BYTE_VALID;
-   }
-
-   /* Case 3: the address is partially valid.  
-      - no addressing error
-      - returned V word is invalid where the address is invalid, 
-        and contains V bytes from memory otherwise. 
-      Case 3 is only allowed if VG_(clo_partial_loads_ok) is True
-      (which is the default), and the address is 4-aligned.  
-      If not, Case 2 will have applied.
-   */
-   vg_assert(VG_(clo_partial_loads_ok));
-   {
-      UInt vw = VGM_WORD_INVALID;
-      vw <<= 8; vw |= (a3ok ? vb3 : VGM_BYTE_INVALID);
-      vw <<= 8; vw |= (a2ok ? vb2 : VGM_BYTE_INVALID);
-      vw <<= 8; vw |= (a1ok ? vb1 : VGM_BYTE_INVALID);
-      vw <<= 8; vw |= (a0ok ? vb0 : VGM_BYTE_INVALID);
-      return vw;
-   }
-}
-
-static void vgm_wr_V4_SLOWLY ( Addr a, UInt vbytes )
-{
-   /* Check the address for validity. */
-   Bool aerr = False;
-   PROF_EVENT(71);
-
-   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
-   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
-   if (get_abit(a+2) != VGM_BIT_VALID) aerr = True;
-   if (get_abit(a+3) != VGM_BIT_VALID) aerr = True;
-
-   /* Store the V bytes, remembering to do it little-endian-ly. */
-   set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8;
-   set_vbyte( a+1, vbytes & 0x000000FF ); vbytes >>= 8;
-   set_vbyte( a+2, vbytes & 0x000000FF ); vbytes >>= 8;
-   set_vbyte( a+3, vbytes & 0x000000FF );
-
-   /* If an address error has happened, report it. */
-   if (aerr)
-      VG_(record_address_error)( a, 4, True );
-}
-
-static UInt vgm_rd_V2_SLOWLY ( Addr a )
-{
-   /* Check the address for validity. */
-   UInt vw   = VGM_WORD_INVALID;
-   Bool aerr = False;
-   PROF_EVENT(72);
-
-   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
-   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
-
-   /* Fetch the V bytes, remembering to do it little-endian-ly. */
-   vw <<= 8; vw |= (UInt)get_vbyte(a+1);
-   vw <<= 8; vw |= (UInt)get_vbyte(a+0);
-
-   /* If an address error has happened, report it. */
-   if (aerr) {
-      VG_(record_address_error)( a, 2, False );
-      vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) 
-           | (VGM_BYTE_VALID << 8) | (VGM_BYTE_VALID);
-   }
-   return vw;   
-}
-
-static void vgm_wr_V2_SLOWLY ( Addr a, UInt vbytes )
-{
-   /* Check the address for validity. */
-   Bool aerr = False;
-   PROF_EVENT(73);
-
-   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
-   if (get_abit(a+1) != VGM_BIT_VALID) aerr = True;
-
-   /* Store the V bytes, remembering to do it little-endian-ly. */
-   set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8;
-   set_vbyte( a+1, vbytes & 0x000000FF );
-
-   /* If an address error has happened, report it. */
-   if (aerr)
-      VG_(record_address_error)( a, 2, True );
-}
-
-static UInt vgm_rd_V1_SLOWLY ( Addr a )
-{
-   /* Check the address for validity. */
-   UInt vw   = VGM_WORD_INVALID;
-   Bool aerr = False;
-   PROF_EVENT(74);
-
-   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
-
-   /* Fetch the V byte. */
-   vw <<= 8; vw |= (UInt)get_vbyte(a+0);
-
-   /* If an address error has happened, report it. */
-   if (aerr) {
-      VG_(record_address_error)( a, 1, False );
-      vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) 
-           | (VGM_BYTE_INVALID << 8) | (VGM_BYTE_VALID);
-   }
-   return vw;   
-}
-
-static void vgm_wr_V1_SLOWLY ( Addr a, UInt vbytes )
-{
-   /* Check the address for validity. */
-   Bool aerr = False;
-   PROF_EVENT(75);
-   if (get_abit(a+0) != VGM_BIT_VALID) aerr = True;
-
-   /* Store the V bytes, remembering to do it little-endian-ly. */
-   set_vbyte( a+0, vbytes & 0x000000FF );
-
-   /* If an address error has happened, report it. */
-   if (aerr)
-      VG_(record_address_error)( a, 1, True );
-}
-
-
-/* ---------------------------------------------------------------------
-   Called from generated code, or from the assembly helpers.
-   Handlers for value check failures.
-   ------------------------------------------------------------------ */
-
-void VG_(helperc_value_check0_fail) ( void )
-{
-   VG_(record_value_error) ( 0 );
-}
-
-void VG_(helperc_value_check1_fail) ( void )
-{
-   VG_(record_value_error) ( 1 );
-}
-
-void VG_(helperc_value_check2_fail) ( void )
-{
-   VG_(record_value_error) ( 2 );
-}
-
-void VG_(helperc_value_check4_fail) ( void )
-{
-   VG_(record_value_error) ( 4 );
-}
-
-
-/* ---------------------------------------------------------------------
-   FPU load and store checks, called from generated code.
-   ------------------------------------------------------------------ */
-
-void VGM_(fpu_read_check) ( Addr addr, Int size )
-{
-   /* Ensure the read area is both addressible and valid (ie,
-      readable).  If there's an address error, don't report a value
-      error too; but if there isn't an address error, check for a
-      value error. 
-
-      Try to be reasonably fast on the common case; wimp out and defer
-      to fpu_read_check_SLOWLY for everything else.  */
-
-   SecMap* sm;
-   UInt    sm_off, v_off, a_off;
-   Addr    addr4;
-
-   PROF_EVENT(80);
-
-#  ifdef VG_DEBUG_MEMORY
-   fpu_read_check_SLOWLY ( addr, size );
-#  else
-
-   if (size == 4) {
-      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
-      PROF_EVENT(81);
-      /* Properly aligned. */
-      sm     = VG_(primary_map)[addr >> 16];
-      sm_off = addr & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
-      /* Properly aligned and addressible. */
-      v_off = addr & 0xFFFF;
-      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
-         goto slow4;
-      /* Properly aligned, addressible and with valid data. */
-      return;
-     slow4:
-      fpu_read_check_SLOWLY ( addr, 4 );
-      return;
-   }
-
-   if (size == 8) {
-      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
-      PROF_EVENT(82);
-      /* Properly aligned.  Do it in two halves. */
-      addr4 = addr + 4;
-      /* First half. */
-      sm     = VG_(primary_map)[addr >> 16];
-      sm_off = addr & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
-      /* First half properly aligned and addressible. */
-      v_off = addr & 0xFFFF;
-      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
-         goto slow8;
-      /* Second half. */
-      sm     = VG_(primary_map)[addr4 >> 16];
-      sm_off = addr4 & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
-      /* Second half properly aligned and addressible. */
-      v_off = addr4 & 0xFFFF;
-      if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) 
-         goto slow8;
-      /* Both halves properly aligned, addressible and with valid
-         data. */
-      return;
-     slow8:
-      fpu_read_check_SLOWLY ( addr, 8 );
-      return;
-   }
-
-   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
-      cases go quickly.  */
-   if (size == 2) {
-      PROF_EVENT(83);
-      fpu_read_check_SLOWLY ( addr, 2 );
-      return;
-   }
-
-   if (size == 10) {
-      PROF_EVENT(84);
-      fpu_read_check_SLOWLY ( addr, 10 );
-      return;
-   }
-
-   if (size == 28) {
-      PROF_EVENT(84); /* XXX assign correct event number */
-      fpu_read_check_SLOWLY ( addr, 28 );
-      return;
-   }
-
-   VG_(printf)("size is %d\n", size);
-   VG_(panic)("vgm_fpu_read_check: unhandled size");
-#  endif
-}
-
-
-void VGM_(fpu_write_check) ( Addr addr, Int size )
-{
-   /* Ensure the written area is addressible, and moan if otherwise.
-      If it is addressible, make it valid, otherwise invalid. 
-   */
-
-   SecMap* sm;
-   UInt    sm_off, v_off, a_off;
-   Addr    addr4;
-
-   PROF_EVENT(85);
-
-#  ifdef VG_DEBUG_MEMORY
-   fpu_write_check_SLOWLY ( addr, size );
-#  else
-
-   if (size == 4) {
-      if (!IS_ALIGNED4_ADDR(addr)) goto slow4;
-      PROF_EVENT(86);
-      /* Properly aligned. */
-      sm     = VG_(primary_map)[addr >> 16];
-      sm_off = addr & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4;
-      /* Properly aligned and addressible.  Make valid. */
-      v_off = addr & 0xFFFF;
-      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
-      return;
-     slow4:
-      fpu_write_check_SLOWLY ( addr, 4 );
-      return;
-   }
-
-   if (size == 8) {
-      if (!IS_ALIGNED4_ADDR(addr)) goto slow8;
-      PROF_EVENT(87);
-      /* Properly aligned.  Do it in two halves. */
-      addr4 = addr + 4;
-      /* First half. */
-      sm     = VG_(primary_map)[addr >> 16];
-      sm_off = addr & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
-      /* First half properly aligned and addressible.  Make valid. */
-      v_off = addr & 0xFFFF;
-      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
-      /* Second half. */
-      sm     = VG_(primary_map)[addr4 >> 16];
-      sm_off = addr4 & 0xFFFF;
-      a_off  = sm_off >> 3;
-      if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8;
-      /* Second half properly aligned and addressible. */
-      v_off = addr4 & 0xFFFF;
-      ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID;
-      /* Properly aligned, addressible and with valid data. */
-      return;
-     slow8:
-      fpu_write_check_SLOWLY ( addr, 8 );
-      return;
-   }
-
-   /* Can't be bothered to huff'n'puff to make these (allegedly) rare
-      cases go quickly.  */
-   if (size == 2) {
-      PROF_EVENT(88);
-      fpu_write_check_SLOWLY ( addr, 2 );
-      return;
-   }
-
-   if (size == 10) {
-      PROF_EVENT(89);
-      fpu_write_check_SLOWLY ( addr, 10 );
-      return;
-   }
-
-   if (size == 28) {
-      PROF_EVENT(89); /* XXX assign correct event number */
-      fpu_write_check_SLOWLY ( addr, 28 );
-      return;
-   }
-
-   VG_(printf)("size is %d\n", size);
-   VG_(panic)("vgm_fpu_write_check: unhandled size");
-#  endif
-}
-
-
-/* ---------------------------------------------------------------------
-   Slow, general cases for FPU load and store checks.
-   ------------------------------------------------------------------ */
-
-/* Generic version.  Test for both addr and value errors, but if
-   there's an addr error, don't report a value error even if it
-   exists. */
-
-void fpu_read_check_SLOWLY ( Addr addr, Int size )
-{
-   Int  i;
-   Bool aerr = False;
-   Bool verr = False;
-   PROF_EVENT(90);
-   for (i = 0; i < size; i++) {
-      PROF_EVENT(91);
-      if (get_abit(addr+i) != VGM_BIT_VALID)
-         aerr = True;
-      if (get_vbyte(addr+i) != VGM_BYTE_VALID)
-         verr = True;
-   }
-
-   if (aerr) {
-      VG_(record_address_error)( addr, size, False );
-   } else {
-     if (verr)
-        VG_(record_value_error)( size );
-   }
-}
-
-
-/* Generic version.  Test for addr errors.  Valid addresses are
-   given valid values, and invalid addresses invalid values. */
-
-void fpu_write_check_SLOWLY ( Addr addr, Int size )
-{
-   Int  i;
-   Addr a_here;
-   Bool a_ok;
-   Bool aerr = False;
-   PROF_EVENT(92);
-   for (i = 0; i < size; i++) {
-      PROF_EVENT(93);
-      a_here = addr+i;
-      a_ok = get_abit(a_here) == VGM_BIT_VALID;
-      if (a_ok) {
-	set_vbyte(a_here, VGM_BYTE_VALID);
-      } else {
-	set_vbyte(a_here, VGM_BYTE_INVALID);
-        aerr = True;
+   /* Sanity check ... if this is the executable's text segment,
+      ensure it is loaded where we think it ought to be.  Any file
+      name which doesn't contain ".so" is assumed to be the
+      executable. */
+   if (filename != NULL
+       && xx == 'x'
+       && VG_(strstr(filename, ".so")) == NULL
+      ) {
+      /* We assume this is the executable. */
+      if (start != VG_ASSUMED_EXE_BASE) {
+         VG_(message)(Vg_UserMsg,
+                      "FATAL: executable base addr not as assumed.");
+         VG_(message)(Vg_UserMsg, "name %s, actual %p, assumed %p.",
+                      filename, start, VG_ASSUMED_EXE_BASE);
+         VG_(message)(Vg_UserMsg,
+            "One reason this could happen is that you have a shared object");
+         VG_(message)(Vg_UserMsg,
+            " whose name doesn't contain the characters \".so\", so Valgrind ");
+         VG_(message)(Vg_UserMsg,
+            "naively assumes it is the executable.  ");
+         VG_(message)(Vg_UserMsg,
+            "In that case, rename it appropriately.");
+         VG_(panic)("VG_ASSUMED_EXE_BASE doesn't match reality");
       }
    }
-   if (aerr) {
-      VG_(record_address_error)( addr, size, True );
+
+   if (0)
+      VG_(message)(Vg_DebugMsg,
+                   "initial map %8x-%8x %c%c%c? %8x (%d) (%s)",
+                   start,start+size,rr,ww,xx,foffset,
+                   size, filename?filename:(UChar*)"NULL");
+
+   if (rr != 'r' && xx != 'x' && ww != 'w') {
+      VG_(printf)("No permissions on the segment named %s\n", filename);
+      VG_(panic)("Non-readable, writable, executable segment at startup");
    }
+
+   /* This parallels what happens when we mmap some new memory */
+   if (filename != NULL && xx == 'x') {
+      VG_(new_exe_segment)( start, size );
+   }
+   VG_TRACK( new_mem_startup, start, size, rr=='r', ww=='w', xx=='x' );
+
+   /* If this is the stack segment mark all below %esp as noaccess. */
+   r_esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   is_stack_segment = start <= r_esp && r_esp < start+size;
+   if (is_stack_segment) {
+      if (0)
+         VG_(message)(Vg_DebugMsg, "invalidating stack area: %x .. %x",
+                      start,r_esp);
+      VG_TRACK( die_mem_stack, start, r_esp-start );
+   }
+}
+
+
+/* 1. Records exe segments from /proc/pid/maps -- always necessary, because 
+      if they're munmap()ed we need to know if they were executable in order
+      to discard translations.  Also checks there's no exe segment overlaps.
+
+   2. Marks global variables that might be accessed from generated code;
+
+   3. Sets up the end of the data segment so that vg_syscalls.c can make
+      sense of calls to brk().
+ */
+void VG_(init_memory) ( void )
+{
+   /* 1 and 2 */
+   VG_(read_procselfmaps) ( startup_segment_callback );
+
+   /* 3 */
+   VG_TRACK( post_mem_write, (Addr) & VG_(running_on_simd_CPU), 1 );
+   VG_TRACK( post_mem_write, (Addr) & VG_(clo_trace_malloc),    1 );
+   VG_TRACK( post_mem_write, (Addr) & VG_(clo_sloppy_malloc),   1 );
+
+   /* 4 */
+   VG_(init_dataseg_end_for_brk)();
 }
 
 
@@ -1340,7 +273,7 @@
 Bool is_plausible_stack_addr ( ThreadState* tst, Addr aa )
 {
    UInt a = (UInt)aa;
-   PROF_EVENT(100);
+   //PROF_EVENT(100);   PPP
    if (a <= tst->stack_highest_word && 
        a > tst->stack_highest_word - VG_PLAUSIBLE_STACK_SIZE)
       return True;
@@ -1349,18 +282,6 @@
 }
 
 
-/* Is this address within some small distance below %ESP?  Used only
-   for the --workaround-gcc296-bugs kludge. */
-Bool VG_(is_just_below_ESP)( Addr esp, Addr aa )
-{
-   if ((UInt)esp > (UInt)aa
-       && ((UInt)esp - (UInt)aa) <= VG_GCC296_BUG_STACK_SLOP)
-      return True;
-   else
-      return False;
-}
-
-
 /* Kludgey ... how much does %esp have to change before we reckon that
    the application is switching stacks ? */
 #define VG_HUGE_DELTA (VG_PLAUSIBLE_STACK_SIZE / 4)
@@ -1370,133 +291,59 @@
    return a & ~(VKI_BYTES_PER_PAGE-1);
 }
 
+static void vg_handle_esp_assignment_SLOWLY ( Addr old_esp, Addr new_esp );
 
-static void vg_handle_esp_assignment_SLOWLY ( Addr );
-
-void VGM_(handle_esp_assignment) ( Addr new_espA )
+__attribute__ ((regparm (1)))
+void VG_(handle_esp_assignment) ( Addr new_esp )
 {
-   UInt old_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   UInt new_esp = (UInt)new_espA;
-   Int  delta   = ((Int)new_esp) - ((Int)old_esp);
+   UInt old_esp;
+   Int  delta;
 
-   PROF_EVENT(101);
+   VGP_MAYBE_PUSHCC(VgpStack);
+
+   old_esp = VG_(baseBlock)[VGOFF_(m_esp)];
+   delta = ((Int)new_esp) - ((Int)old_esp);
+
+   /* Update R_ESP */
+   VG_(baseBlock)[VGOFF_(m_esp)] = new_esp;
+
+   //PROF_EVENT(101);   PPP
 
 #  ifndef VG_DEBUG_MEMORY
 
-   if (IS_ALIGNED4_ADDR(old_esp)) {
+   if (IS_ALIGNED4_ADDR(old_esp) &&  IS_ALIGNED4_ADDR(new_esp)) {
 
       /* Deal with the most common cases fast.  These are ordered in
          the sequence most common first. */
 
-      if (delta == -4) {
-         /* Moving down by 4 and properly aligned.. */
-         PROF_EVENT(102);
-         make_aligned_word_WRITABLE(new_esp);
-         return;
+#     ifdef VG_PROFILE_MEMORY
+      // PPP
+      if      (delta = - 4) PROF_EVENT(102);
+      else if (delta =   4) PROF_EVENT(103);
+      else if (delta = -12) PROF_EVENT(104);
+      else if (delta = - 8) PROF_EVENT(105);
+      else if (delta =  16) PROF_EVENT(106);
+      else if (delta =  12) PROF_EVENT(107);
+      else if (delta =   0) PROF_EVENT(108);
+      else if (delta =   8) PROF_EVENT(109);
+      else if (delta = -16) PROF_EVENT(110);
+      else if (delta =  20) PROF_EVENT(111);
+      else if (delta = -20) PROF_EVENT(112);
+      else if (delta =  24) PROF_EVENT(113);
+      else if (delta = -24) PROF_EVENT(114);
+      else if (delta > 0)   PROF_EVENT(115); // PPP: new: aligned_big_pos
+      else                  PROF_EVENT(116); // PPP: new: aligned_big_neg
+#     endif
+      
+      if (delta < 0) {
+         VG_TRACK(new_mem_stack_aligned, new_esp, -delta);
+      } else if (delta > 0) {
+         VG_TRACK(die_mem_stack_aligned, old_esp, delta);
       }
+      /* Do nothing if (delta==0) */
 
-      if (delta == 4) {
-         /* Moving up by 4 and properly aligned. */
-         PROF_EVENT(103);
-         make_aligned_word_NOACCESS(old_esp);
-         return;
-      }
-
-      if (delta == -12) {
-         PROF_EVENT(104);
-         make_aligned_word_WRITABLE(new_esp);
-         make_aligned_word_WRITABLE(new_esp+4);
-         make_aligned_word_WRITABLE(new_esp+8);
-         return;
-      }
-
-      if (delta == -8) {
-         PROF_EVENT(105);
-         make_aligned_word_WRITABLE(new_esp);
-         make_aligned_word_WRITABLE(new_esp+4);
-         return;
-      }
-
-      if (delta == 16) {
-         PROF_EVENT(106);
-         make_aligned_word_NOACCESS(old_esp);
-         make_aligned_word_NOACCESS(old_esp+4);
-         make_aligned_word_NOACCESS(old_esp+8);
-         make_aligned_word_NOACCESS(old_esp+12);
-         return;
-      }
-
-      if (delta == 12) {
-         PROF_EVENT(107);
-         make_aligned_word_NOACCESS(old_esp);
-         make_aligned_word_NOACCESS(old_esp+4);
-         make_aligned_word_NOACCESS(old_esp+8);
-         return;
-      }
-
-      if (delta == 0) {
-         PROF_EVENT(108);
-         return;
-      }
-
-      if (delta == 8) {
-         PROF_EVENT(109);
-         make_aligned_word_NOACCESS(old_esp);
-         make_aligned_word_NOACCESS(old_esp+4);
-         return;
-      }
-
-      if (delta == -16) {
-         PROF_EVENT(110);
-         make_aligned_word_WRITABLE(new_esp);
-         make_aligned_word_WRITABLE(new_esp+4);
-         make_aligned_word_WRITABLE(new_esp+8);
-         make_aligned_word_WRITABLE(new_esp+12);
-         return;
-      }
-
-      if (delta == 20) {
-         PROF_EVENT(111);
-         make_aligned_word_NOACCESS(old_esp);
-         make_aligned_word_NOACCESS(old_esp+4);
-         make_aligned_word_NOACCESS(old_esp+8);
-         make_aligned_word_NOACCESS(old_esp+12);
-         make_aligned_word_NOACCESS(old_esp+16);
-         return;
-      }
-
-      if (delta == -20) {
-         PROF_EVENT(112);
-         make_aligned_word_WRITABLE(new_esp);
-         make_aligned_word_WRITABLE(new_esp+4);
-         make_aligned_word_WRITABLE(new_esp+8);
-         make_aligned_word_WRITABLE(new_esp+12);
-         make_aligned_word_WRITABLE(new_esp+16);
-         return;
-      }
-
-      if (delta == 24) {
-         PROF_EVENT(113);
-         make_aligned_word_NOACCESS(old_esp);
-         make_aligned_word_NOACCESS(old_esp+4);
-         make_aligned_word_NOACCESS(old_esp+8);
-         make_aligned_word_NOACCESS(old_esp+12);
-         make_aligned_word_NOACCESS(old_esp+16);
-         make_aligned_word_NOACCESS(old_esp+20);
-         return;
-      }
-
-      if (delta == -24) {
-         PROF_EVENT(114);
-         make_aligned_word_WRITABLE(new_esp);
-         make_aligned_word_WRITABLE(new_esp+4);
-         make_aligned_word_WRITABLE(new_esp+8);
-         make_aligned_word_WRITABLE(new_esp+12);
-         make_aligned_word_WRITABLE(new_esp+16);
-         make_aligned_word_WRITABLE(new_esp+20);
-         return;
-      }
-
+      VGP_MAYBE_POPCC(VgpStack);
+      return;
    }
 
 #  endif
@@ -1504,33 +351,35 @@
    /* The above special cases handle 90% to 95% of all the stack
       adjustments.  The rest we give to the slow-but-general
       mechanism. */
-   vg_handle_esp_assignment_SLOWLY ( new_espA );
+   vg_handle_esp_assignment_SLOWLY ( old_esp, new_esp );
+   VGP_MAYBE_POPCC(VgpStack);
 }
 
 
-static void vg_handle_esp_assignment_SLOWLY ( Addr new_espA )
+static void vg_handle_esp_assignment_SLOWLY ( Addr old_esp, Addr new_esp )
 {
-   UInt old_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   UInt new_esp = (UInt)new_espA;
-   Int  delta   = ((Int)new_esp) - ((Int)old_esp);
-   //   VG_(printf)("%d ", delta);
-   PROF_EVENT(120);
+   Int  delta;
+   
+   delta = ((Int)new_esp) - ((Int)old_esp);
+   //VG_(printf)("delta %d (%x) %x --> %x\n", delta, delta, old_esp, new_esp);
+   //PROF_EVENT(120);   PPP
    if (-(VG_HUGE_DELTA) < delta && delta < VG_HUGE_DELTA) {
       /* "Ordinary" stack change. */
       if (new_esp < old_esp) {
          /* Moving down; the stack is growing. */
-         PROF_EVENT(121);
-         VGM_(make_writable) ( new_esp, old_esp - new_esp );
-         return;
-      }
-      if (new_esp > old_esp) {
+         //PROF_EVENT(121); PPP
+         VG_TRACK( new_mem_stack, new_esp, -delta );
+      
+      } else if (new_esp > old_esp) {
          /* Moving up; the stack is shrinking. */
-         PROF_EVENT(122);
-         VGM_(make_noaccess) ( old_esp, new_esp - old_esp );
-         return;
+         //PROF_EVENT(122); PPP
+         VG_TRACK( die_mem_stack, old_esp, delta );
+
+      } else {
+         /* when old_esp == new_esp */
+         //PROF_EVENT(123);    PPP
       }
-      PROF_EVENT(123);
-      return; /* when old_esp == new_esp */
+      return;
    }
 
    /* %esp has changed by more than HUGE_DELTA.  We take this to mean
@@ -1552,863 +401,21 @@
      Addr valid_up_to     = get_page_base(new_esp) + VKI_BYTES_PER_PAGE
                             + 0 * VKI_BYTES_PER_PAGE;
      ThreadState* tst     = VG_(get_current_thread_state)();
-     PROF_EVENT(124);
+     //PROF_EVENT(124); PPP
      if (VG_(clo_verbosity) > 1)
         VG_(message)(Vg_UserMsg, "Warning: client switching stacks?  "
-                                 "%%esp: %p --> %p",
-                                  old_esp, new_esp);
+                                 "%%esp: %p --> %p", old_esp, new_esp);
      /* VG_(printf)("na %p,   %%esp %p,   wr %p\n",
                     invalid_down_to, new_esp, valid_up_to ); */
-     VGM_(make_noaccess) ( invalid_down_to, new_esp - invalid_down_to );
+     VG_TRACK( die_mem_stack, invalid_down_to, new_esp - invalid_down_to );
      if (!is_plausible_stack_addr(tst, new_esp)) {
-        VGM_(make_readable) ( new_esp, valid_up_to - new_esp );
+        VG_TRACK( post_mem_write, new_esp, valid_up_to - new_esp );
      }
    }
 }
 
 
-/*--------------------------------------------------------------*/
-/*--- Initialise the memory audit system on program startup. ---*/
-/*--------------------------------------------------------------*/
-
-/* Handle one entry derived from /proc/self/maps. */
-
-static
-void init_memory_audit_callback ( 
-        Addr start, UInt size, 
-        Char rr, Char ww, Char xx, 
-        UInt foffset, UChar* filename )
-{
-   UChar example_a_bit;
-   UChar example_v_bit;
-   UInt  r_esp;
-   Bool  is_stack_segment;
-
-   /* Sanity check ... if this is the executable's text segment,
-      ensure it is loaded where we think it ought to be.  Any file
-      name which doesn't contain ".so" is assumed to be the
-      executable. */
-   if (filename != NULL
-       && xx == 'x'
-       && VG_(strstr(filename, ".so")) == NULL
-      ) {
-      /* We assume this is the executable. */
-      if (start != VG_ASSUMED_EXE_BASE) {
-         VG_(message)(Vg_UserMsg,
-                      "FATAL: executable base addr not as assumed.");
-         VG_(message)(Vg_UserMsg, "name %s, actual %p, assumed %p.",
-                      filename, start, VG_ASSUMED_EXE_BASE);
-         VG_(message)(Vg_UserMsg,
-            "One reason this could happen is that you have a shared object");
-         VG_(message)(Vg_UserMsg,
-            " whose name doesn't contain the characters \".so\", so Valgrind ");
-         VG_(message)(Vg_UserMsg,
-            "naively assumes it is the executable.  ");
-         VG_(message)(Vg_UserMsg,
-            "In that case, rename it appropriately.");
-         VG_(panic)("VG_ASSUMED_EXE_BASE doesn't match reality");
-      }
-   }
-    
-   if (0)
-      VG_(message)(Vg_DebugMsg, 
-                   "initial map %8x-%8x %c%c%c? %8x (%d) (%s)",
-                   start,start+size,rr,ww,xx,foffset,
-                   size, filename?filename:(UChar*)"NULL");
-
-   r_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   is_stack_segment = start <= r_esp && r_esp < start+size;
-
-   /* Figure out the segment's permissions.
-
-      All segments are addressible -- since a process can read its
-      own text segment.
-
-      A read-but-not-write segment presumably contains initialised
-      data, so is all valid.  Read-write segments presumably contains
-      uninitialised data, so is all invalid.  */
-
-   /* ToDo: make this less bogus. */
-   if (rr != 'r' && xx != 'x' && ww != 'w') {
-      /* Very bogus; this path never gets taken. */
-      /* A no, V no */
-      example_a_bit = VGM_BIT_INVALID;
-      example_v_bit = VGM_BIT_INVALID;
-   } else {
-      /* A yes, V yes */
-      example_a_bit = VGM_BIT_VALID;
-      example_v_bit = VGM_BIT_VALID;
-      /* Causes a lot of errs for unknown reasons. 
-         if (filename is valgrind.so 
-               [careful about end conditions on filename]) {
-            example_a_bit = VGM_BIT_INVALID;
-            example_v_bit = VGM_BIT_INVALID;
-         }
-      */
-   }
-
-   set_address_range_perms ( start, size, 
-                             example_a_bit, example_v_bit );
-
-   if (is_stack_segment) {
-      /* This is the stack segment.  Mark all below %esp as
-         noaccess. */
-      if (0)
-         VG_(message)(Vg_DebugMsg, 
-                      "invalidating stack area: %x .. %x",
-                      start,r_esp);
-      VGM_(make_noaccess)( start, r_esp-start );
-   }
-}
-
-
-/* Initialise the memory audit system. */
-void VGM_(init_memory_audit) ( void )
-{
-   Int i;
-
-   init_prof_mem();
-
-   for (i = 0; i < 8192; i++)
-      vg_distinguished_secondary_map.abits[i] 
-         = VGM_BYTE_INVALID; /* Invalid address */
-   for (i = 0; i < 65536; i++)
-      vg_distinguished_secondary_map.vbyte[i] 
-         = VGM_BYTE_INVALID; /* Invalid Value */
-
-   /* These entries gradually get overwritten as the used address
-      space expands. */
-   for (i = 0; i < 65536; i++)
-      VG_(primary_map)[i] = &vg_distinguished_secondary_map;
-   /* These ones should never change; it's a bug in Valgrind if they
-      do. */
-   for (i = 65536; i < 262144; i++)
-      VG_(primary_map)[i] = &vg_distinguished_secondary_map;
-
-   /* Read the initial memory mapping from the /proc filesystem, and
-      set up our own maps accordingly. */
-   VG_(read_procselfmaps) ( init_memory_audit_callback );
-
-   /* Last but not least, set up the shadow regs with reasonable (sic)
-      values.  All regs are claimed to have valid values.
-   */
-   VG_(baseBlock)[VGOFF_(sh_esp)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_ebp)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_eax)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_ecx)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_edx)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_ebx)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_esi)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_edi)]    = VGM_WORD_VALID;
-   VG_(baseBlock)[VGOFF_(sh_eflags)] = VGM_EFLAGS_VALID;
-
-   /* Record the end of the data segment, so that vg_syscall_mem.c
-      can make sense of calls to brk(). 
-   */
-   VGM_(curr_dataseg_end) = (Addr)VG_(brk)(0);
-   if (VGM_(curr_dataseg_end) == (Addr)(-1))
-      VG_(panic)("vgm_init_memory_audit: can't determine data-seg end");
-
-   if (0)
-      VG_(printf)("DS END is %p\n", (void*)VGM_(curr_dataseg_end));
-
-   /* Read the list of errors to suppress.  This should be found in
-      the file specified by vg_clo_suppressions. */
-   VG_(load_suppressions)();
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Low-level address-space scanning, for the leak       ---*/
-/*--- detector.                                            ---*/
-/*------------------------------------------------------------*/
-
-static 
-jmp_buf memscan_jmpbuf;
-
-static
-void vg_scan_all_valid_memory_sighandler ( Int sigNo )
-{
-   __builtin_longjmp(memscan_jmpbuf, 1);
-}
-
-UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) )
-{
-   /* All volatile, because some gccs seem paranoid about longjmp(). */
-   volatile UInt res, numPages, page, vbytes, primaryMapNo, nWordsNotified;
-   volatile Addr pageBase, addr;
-   volatile SecMap* sm;
-   volatile UChar abits;
-   volatile UInt page_first_word;
-
-   vki_ksigaction sigbus_saved;
-   vki_ksigaction sigbus_new;
-   vki_ksigaction sigsegv_saved;
-   vki_ksigaction sigsegv_new;
-   vki_ksigset_t  blockmask_saved;
-   vki_ksigset_t  unblockmask_new;
-
-   /* Temporarily install a new sigsegv and sigbus handler, and make
-      sure SIGBUS, SIGSEGV and SIGTERM are unblocked.  (Perhaps the
-      first two can never be blocked anyway?)  */
-
-   sigbus_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
-   sigbus_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
-   sigbus_new.ksa_restorer = NULL;
-   res = VG_(ksigemptyset)( &sigbus_new.ksa_mask );
-   vg_assert(res == 0);
-
-   sigsegv_new.ksa_handler = vg_scan_all_valid_memory_sighandler;
-   sigsegv_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART;
-   sigsegv_new.ksa_restorer = NULL;
-   res = VG_(ksigemptyset)( &sigsegv_new.ksa_mask );
-   vg_assert(res == 0+0);
-
-   res =  VG_(ksigemptyset)( &unblockmask_new );
-   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGBUS );
-   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGSEGV );
-   res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGTERM );
-   vg_assert(res == 0+0+0);
-
-   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_new, &sigbus_saved );
-   vg_assert(res == 0+0+0+0);
-
-   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_new, &sigsegv_saved );
-   vg_assert(res == 0+0+0+0+0);
-
-   res = VG_(ksigprocmask)( VKI_SIG_UNBLOCK, &unblockmask_new, &blockmask_saved );
-   vg_assert(res == 0+0+0+0+0+0);
-
-   /* The signal handlers are installed.  Actually do the memory scan. */
-   numPages = 1 << (32-VKI_BYTES_PER_PAGE_BITS);
-   vg_assert(numPages == 1048576);
-   vg_assert(4096 == (1 << VKI_BYTES_PER_PAGE_BITS));
-
-   nWordsNotified = 0;
-
-   for (page = 0; page < numPages; page++) {
-      pageBase = page << VKI_BYTES_PER_PAGE_BITS;
-      primaryMapNo = pageBase >> 16;
-      sm = VG_(primary_map)[primaryMapNo];
-      if (IS_DISTINGUISHED_SM(sm)) continue;
-      if (__builtin_setjmp(memscan_jmpbuf) == 0) {
-         /* try this ... */
-         page_first_word = * (volatile UInt*)pageBase;
-         /* we get here if we didn't get a fault */
-         /* Scan the page */
-         for (addr = pageBase; addr < pageBase+VKI_BYTES_PER_PAGE; addr += 4) {
-            abits  = get_abits4_ALIGNED(addr);
-            vbytes = get_vbytes4_ALIGNED(addr);
-            if (abits == VGM_NIBBLE_VALID 
-                && vbytes == VGM_WORD_VALID) {
-               nWordsNotified++;
-               notify_word ( addr, *(UInt*)addr );
-	    }
-         }
-      } else {
-         /* We get here if reading the first word of the page caused a
-            fault, which in turn caused the signal handler to longjmp.
-            Ignore this page. */
-         if (0)
-         VG_(printf)(
-            "vg_scan_all_valid_memory_sighandler: ignoring page at %p\n",
-            (void*)pageBase 
-         );
-      }
-   }
-
-   /* Restore signal state to whatever it was before. */
-   res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_saved, NULL );
-   vg_assert(res == 0 +0);
-
-   res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_saved, NULL );
-   vg_assert(res == 0 +0 +0);
-
-   res = VG_(ksigprocmask)( VKI_SIG_SETMASK, &blockmask_saved, NULL );
-   vg_assert(res == 0 +0 +0 +0);
-
-   return nWordsNotified;
-}
-
-
-/*------------------------------------------------------------*/
-/*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
-/*------------------------------------------------------------*/
-
-/* A block is either 
-   -- Proper-ly reached; a pointer to its start has been found
-   -- Interior-ly reached; only an interior pointer to it has been found
-   -- Unreached; so far, no pointers to any part of it have been found. 
-*/
-typedef 
-   enum { Unreached, Interior, Proper } 
-   Reachedness;
-
-/* A block record, used for generating err msgs. */
-typedef
-   struct _LossRecord {
-      struct _LossRecord* next;
-      /* Where these lost blocks were allocated. */
-      ExeContext*  allocated_at;
-      /* Their reachability. */
-      Reachedness  loss_mode;
-      /* Number of blocks and total # bytes involved. */
-      UInt         total_bytes;
-      UInt         num_blocks;
-   }
-   LossRecord;
-
-
-/* Find the i such that ptr points at or inside the block described by
-   shadows[i].  Return -1 if none found.  This assumes that shadows[]
-   has been sorted on the ->data field. */
-
-#ifdef VG_DEBUG_LEAKCHECK
-/* Used to sanity-check the fast binary-search mechanism. */
-static Int find_shadow_for_OLD ( Addr          ptr, 
-                                 ShadowChunk** shadows,
-                                 Int           n_shadows )
-
-{
-   Int  i;
-   Addr a_lo, a_hi;
-   PROF_EVENT(70);
-   for (i = 0; i < n_shadows; i++) {
-      PROF_EVENT(71);
-      a_lo = shadows[i]->data;
-      a_hi = ((Addr)shadows[i]->data) + shadows[i]->size - 1;
-      if (a_lo <= ptr && ptr <= a_hi)
-         return i;
-   }
-   return -1;
-}
-#endif
-
-
-static Int find_shadow_for ( Addr          ptr, 
-                             ShadowChunk** shadows,
-                             Int           n_shadows )
-{
-   Addr a_mid_lo, a_mid_hi;
-   Int lo, mid, hi, retVal;
-   PROF_EVENT(70);
-   /* VG_(printf)("find shadow for %p = ", ptr); */
-   retVal = -1;
-   lo = 0;
-   hi = n_shadows-1;
-   while (True) {
-      PROF_EVENT(71);
-
-      /* invariant: current unsearched space is from lo to hi,
-         inclusive. */
-      if (lo > hi) break; /* not found */
-
-      mid      = (lo + hi) / 2;
-      a_mid_lo = shadows[mid]->data;
-      a_mid_hi = ((Addr)shadows[mid]->data) + shadows[mid]->size - 1;
-
-      if (ptr < a_mid_lo) {
-         hi = mid-1;
-         continue;
-      } 
-      if (ptr > a_mid_hi) {
-         lo = mid+1;
-         continue;
-      }
-      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
-      retVal = mid;
-      break;
-   }
-
-#  ifdef VG_DEBUG_LEAKCHECK
-   vg_assert(retVal == find_shadow_for_OLD ( ptr, shadows, n_shadows ));
-#  endif
-   /* VG_(printf)("%d\n", retVal); */
-   return retVal;
-}
-
-
-
-static void sort_malloc_shadows ( ShadowChunk** shadows, UInt n_shadows )
-{
-   Int   incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
-                      9841, 29524, 88573, 265720,
-                      797161, 2391484 };
-   Int          lo = 0;
-   Int          hi = n_shadows-1;
-   Int          i, j, h, bigN, hp;
-   ShadowChunk* v;
-
-   PROF_EVENT(72);
-   bigN = hi - lo + 1; if (bigN < 2) return;
-   hp = 0; while (incs[hp] < bigN) hp++; hp--;
-
-   for (; hp >= 0; hp--) {
-      PROF_EVENT(73);
-      h = incs[hp];
-      i = lo + h;
-      while (1) {
-         PROF_EVENT(74);
-         if (i > hi) break;
-         v = shadows[i];
-         j = i;
-         while (shadows[j-h]->data > v->data) {
-            PROF_EVENT(75);
-            shadows[j] = shadows[j-h];
-            j = j - h;
-            if (j <= (lo + h - 1)) break;
-         }
-         shadows[j] = v;
-         i++;
-      }
-   }
-}
-
-/* Globals, for the callback used by VG_(detect_memory_leaks). */
-
-static ShadowChunk** vglc_shadows;
-static Int           vglc_n_shadows;
-static Reachedness*  vglc_reachedness;
-static Addr          vglc_min_mallocd_addr;
-static Addr          vglc_max_mallocd_addr;
-
-static 
-void vg_detect_memory_leaks_notify_addr ( Addr a, UInt word_at_a )
-{
-   Int  sh_no;
-   Addr ptr;
-
-   /* Rule out some known causes of bogus pointers.  Mostly these do
-      not cause much trouble because only a few false pointers can
-      ever lurk in these places.  This mainly stops it reporting that
-      blocks are still reachable in stupid test programs like this
-
-         int main (void) { char* a = malloc(100); return 0; }
-
-      which people seem inordinately fond of writing, for some reason.  
-
-      Note that this is a complete kludge.  It would be better to
-      ignore any addresses corresponding to valgrind.so's .bss and
-      .data segments, but I cannot think of a reliable way to identify
-      where the .bss segment has been put.  If you can, drop me a
-      line.  
-   */
-   if (a >= ((Addr)(&VG_(stack)))
-       && a <= ((Addr)(&VG_(stack))) + sizeof(VG_(stack))) {
-      return;
-   }
-   if (a >= ((Addr)(&VG_(m_state_static)))
-       && a <= ((Addr)(&VG_(m_state_static))) + sizeof(VG_(m_state_static))) {
-      return;
-   }
-   if (a == (Addr)(&vglc_min_mallocd_addr))
-      return;
-   if (a == (Addr)(&vglc_max_mallocd_addr))
-      return;
-
-   /* OK, let's get on and do something Useful for a change. */
-
-   ptr = (Addr)word_at_a;
-   if (ptr >= vglc_min_mallocd_addr && ptr <= vglc_max_mallocd_addr) {
-      /* Might be legitimate; we'll have to investigate further. */
-      sh_no = find_shadow_for ( ptr, vglc_shadows, vglc_n_shadows );
-      if (sh_no != -1) {
-         /* Found a block at/into which ptr points. */
-         vg_assert(sh_no >= 0 && sh_no < vglc_n_shadows);
-         vg_assert(ptr < vglc_shadows[sh_no]->data 
-                         + vglc_shadows[sh_no]->size);
-         /* Decide whether Proper-ly or Interior-ly reached. */
-         if (ptr == vglc_shadows[sh_no]->data) {
-            if (0) VG_(printf)("pointer at %p to %p\n", a, word_at_a );
-            vglc_reachedness[sh_no] = Proper;
-         } else {
-            if (vglc_reachedness[sh_no] == Unreached)
-               vglc_reachedness[sh_no] = Interior;
-         }
-      }
-   }
-}
-
-
-void VG_(detect_memory_leaks) ( void )
-{
-   Int    i;
-   Int    blocks_leaked, bytes_leaked;
-   Int    blocks_dubious, bytes_dubious;
-   Int    blocks_reachable, bytes_reachable;
-   Int    n_lossrecords;
-   UInt   bytes_notified;
-   
-   LossRecord*  errlist;
-   LossRecord*  p;
-
-   Bool (*ec_comparer_fn) ( ExeContext*, ExeContext* );
-   PROF_EVENT(76);
-   vg_assert(VG_(clo_instrument));
-
-   /* Decide how closely we want to match ExeContexts in leak
-      records. */
-   switch (VG_(clo_leak_resolution)) {
-      case 2: 
-         ec_comparer_fn = VG_(eq_ExeContext_top2); 
-         break;
-      case 4: 
-         ec_comparer_fn = VG_(eq_ExeContext_top4); 
-         break;
-      case VG_DEEPEST_BACKTRACE: 
-         ec_comparer_fn = VG_(eq_ExeContext_all); 
-         break;
-      default: 
-         VG_(panic)("VG_(detect_memory_leaks): "
-                    "bad VG_(clo_leak_resolution)");
-         break;
-   }
-
-   /* vg_get_malloc_shadows allocates storage for shadows */
-   vglc_shadows = VG_(get_malloc_shadows)( &vglc_n_shadows );
-   if (vglc_n_shadows == 0) {
-      vg_assert(vglc_shadows == NULL);
-      VG_(message)(Vg_UserMsg, 
-                   "No malloc'd blocks -- no leaks are possible.\n");
-      return;
-   }
-
-   VG_(message)(Vg_UserMsg, 
-                "searching for pointers to %d not-freed blocks.", 
-                vglc_n_shadows );
-   sort_malloc_shadows ( vglc_shadows, vglc_n_shadows );
-
-   /* Sanity check; assert that the blocks are now in order and that
-      they don't overlap. */
-   for (i = 0; i < vglc_n_shadows-1; i++) {
-      vg_assert( ((Addr)vglc_shadows[i]->data)
-                 < ((Addr)vglc_shadows[i+1]->data) );
-      vg_assert( ((Addr)vglc_shadows[i]->data) + vglc_shadows[i]->size
-                 < ((Addr)vglc_shadows[i+1]->data) );
-   }
-
-   vglc_min_mallocd_addr = ((Addr)vglc_shadows[0]->data);
-   vglc_max_mallocd_addr = ((Addr)vglc_shadows[vglc_n_shadows-1]->data)
-                         + vglc_shadows[vglc_n_shadows-1]->size - 1;
-
-   vglc_reachedness 
-      = VG_(malloc)( VG_AR_PRIVATE, vglc_n_shadows * sizeof(Reachedness) );
-   for (i = 0; i < vglc_n_shadows; i++)
-      vglc_reachedness[i] = Unreached;
-
-   /* Do the scan of memory. */
-   bytes_notified
-       = VG_(scan_all_valid_memory)( &vg_detect_memory_leaks_notify_addr )
-         * VKI_BYTES_PER_WORD;
-
-   VG_(message)(Vg_UserMsg, "checked %d bytes.", bytes_notified);
-
-   blocks_leaked    = bytes_leaked    = 0;
-   blocks_dubious   = bytes_dubious   = 0;
-   blocks_reachable = bytes_reachable = 0;
-
-   for (i = 0; i < vglc_n_shadows; i++) {
-      if (vglc_reachedness[i] == Unreached) {
-         blocks_leaked++;
-         bytes_leaked += vglc_shadows[i]->size;
-      }
-      else if (vglc_reachedness[i] == Interior) {
-         blocks_dubious++;
-         bytes_dubious += vglc_shadows[i]->size;
-      }
-      else if (vglc_reachedness[i] == Proper) {
-         blocks_reachable++;
-         bytes_reachable += vglc_shadows[i]->size;
-      }
-   }
-
-   VG_(message)(Vg_UserMsg, "");
-   VG_(message)(Vg_UserMsg, "definitely lost: %d bytes in %d blocks.", 
-                            bytes_leaked, blocks_leaked );
-   VG_(message)(Vg_UserMsg, "possibly lost:   %d bytes in %d blocks.", 
-                            bytes_dubious, blocks_dubious );
-   VG_(message)(Vg_UserMsg, "still reachable: %d bytes in %d blocks.", 
-                            bytes_reachable, blocks_reachable );
-
-
-   /* Common up the lost blocks so we can print sensible error
-      messages. */
-
-   n_lossrecords = 0;
-   errlist       = NULL;
-   for (i = 0; i < vglc_n_shadows; i++) {
-      for (p = errlist; p != NULL; p = p->next) {
-         if (p->loss_mode == vglc_reachedness[i]
-             && ec_comparer_fn (
-                   p->allocated_at, 
-                   vglc_shadows[i]->where) ) {
-            break;
-	 }
-      }
-      if (p != NULL) {
-         p->num_blocks  ++;
-         p->total_bytes += vglc_shadows[i]->size;
-      } else {
-         n_lossrecords ++;
-         p = VG_(malloc)(VG_AR_PRIVATE, sizeof(LossRecord));
-         p->loss_mode    = vglc_reachedness[i];
-         p->allocated_at = vglc_shadows[i]->where;
-         p->total_bytes  = vglc_shadows[i]->size;
-         p->num_blocks   = 1;
-         p->next         = errlist;
-         errlist         = p;
-      }
-   }
-   
-   for (i = 0; i < n_lossrecords; i++) {
-      LossRecord* p_min = NULL;
-      UInt        n_min = 0xFFFFFFFF;
-      for (p = errlist; p != NULL; p = p->next) {
-         if (p->num_blocks > 0 && p->total_bytes < n_min) {
-            n_min = p->total_bytes;
-            p_min = p;
-         }
-      }
-      vg_assert(p_min != NULL);
-
-      if ( (!VG_(clo_show_reachable)) && p_min->loss_mode == Proper) {
-         p_min->num_blocks = 0;
-         continue;
-      }
-
-      VG_(message)(Vg_UserMsg, "");
-      VG_(message)(
-         Vg_UserMsg,
-         "%d bytes in %d blocks are %s in loss record %d of %d",
-         p_min->total_bytes, p_min->num_blocks,
-         p_min->loss_mode==Unreached ? "definitely lost" :
-            (p_min->loss_mode==Interior ? "possibly lost"
-                                        : "still reachable"),
-         i+1, n_lossrecords
-      );
-      VG_(pp_ExeContext)(p_min->allocated_at);
-      p_min->num_blocks = 0;
-   }
-
-   VG_(message)(Vg_UserMsg, "");
-   VG_(message)(Vg_UserMsg, "LEAK SUMMARY:");
-   VG_(message)(Vg_UserMsg, "   definitely lost: %d bytes in %d blocks.", 
-                            bytes_leaked, blocks_leaked );
-   VG_(message)(Vg_UserMsg, "   possibly lost:   %d bytes in %d blocks.", 
-                            bytes_dubious, blocks_dubious );
-   VG_(message)(Vg_UserMsg, "   still reachable: %d bytes in %d blocks.", 
-                            bytes_reachable, blocks_reachable );
-   if (!VG_(clo_show_reachable)) {
-      VG_(message)(Vg_UserMsg, 
-         "Reachable blocks (those to which a pointer was found) are not shown.");
-      VG_(message)(Vg_UserMsg, 
-         "To see them, rerun with: --show-reachable=yes");
-   }
-   VG_(message)(Vg_UserMsg, "");
-
-   VG_(free) ( VG_AR_PRIVATE, vglc_shadows );
-   VG_(free) ( VG_AR_PRIVATE, vglc_reachedness );
-}
-
-
-/* ---------------------------------------------------------------------
-   Sanity check machinery (permanently engaged).
-   ------------------------------------------------------------------ */
-
-/* Check that nobody has spuriously claimed that the first or last 16
-   pages (64 KB) of address space have become accessible.  Failure of
-   the following do not per se indicate an internal consistency
-   problem, but they are so likely to that we really want to know
-   about it if so. */
-
-Bool VG_(first_and_last_secondaries_look_plausible) ( void )
-{
-   if (IS_DISTINGUISHED_SM(VG_(primary_map)[0])
-       && IS_DISTINGUISHED_SM(VG_(primary_map)[65535])) {
-      return True;
-   } else {
-      return False;
-   }
-}
-
-
-/* A fast sanity check -- suitable for calling circa once per
-   millisecond. */
-
-void VG_(do_sanity_checks) ( Bool force_expensive )
-{
-   Int          i;
-   Bool         do_expensive_checks;
-
-   if (VG_(sanity_level) < 1) return;
-
-   /* --- First do all the tests that we can do quickly. ---*/
-
-   VG_(sanity_fast_count)++;
-
-   /* Check that we haven't overrun our private stack. */
-   for (i = 0; i < 10; i++) {
-      vg_assert(VG_(stack)[i]
-                == ((UInt)(&VG_(stack)[i]) ^ 0xA4B3C2D1));
-      vg_assert(VG_(stack)[10000-1-i] 
-                == ((UInt)(&VG_(stack)[10000-i-1]) ^ 0xABCD4321));
-   }
-
-   /* Check stuff pertaining to the memory check system. */
-
-   if (VG_(clo_instrument)) {
-
-      /* Check that nobody has spuriously claimed that the first or
-         last 16 pages of memory have become accessible [...] */
-      vg_assert(VG_(first_and_last_secondaries_look_plausible)());
-   }
-
-   /* --- Now some more expensive checks. ---*/
-
-   /* Once every 25 times, check some more expensive stuff. */
-
-   do_expensive_checks = False;
-   if (force_expensive) 
-      do_expensive_checks = True;
-   if (VG_(sanity_level) > 1) 
-      do_expensive_checks = True;
-   if (VG_(sanity_level) == 1 
-       && (VG_(sanity_fast_count) % 25) == 0)
-      do_expensive_checks = True;
-
-   if (do_expensive_checks) {
-      VG_(sanity_slow_count)++;
-
-#     if 0
-      { void zzzmemscan(void); zzzmemscan(); }
-#     endif
-
-      if ((VG_(sanity_fast_count) % 250) == 0)
-         VG_(sanity_check_tc_tt)();
-
-      if (VG_(clo_instrument)) {
-         /* Make sure nobody changed the distinguished secondary. */
-         for (i = 0; i < 8192; i++)
-            vg_assert(vg_distinguished_secondary_map.abits[i] 
-                      == VGM_BYTE_INVALID);
-         for (i = 0; i < 65536; i++)
-            vg_assert(vg_distinguished_secondary_map.vbyte[i] 
-                      == VGM_BYTE_INVALID);
-
-         /* Make sure that the upper 3/4 of the primary map hasn't
-            been messed with. */
-         for (i = 65536; i < 262144; i++)
-            vg_assert(VG_(primary_map)[i] 
-                      == & vg_distinguished_secondary_map);
-      }
-      /* 
-      if ((VG_(sanity_fast_count) % 500) == 0) VG_(mallocSanityCheckAll)(); 
-      */
-   }
-
-   if (VG_(sanity_level) > 1) {
-      /* Check sanity of the low-level memory manager.  Note that bugs
-         in the client's code can cause this to fail, so we don't do
-         this check unless specially asked for.  And because it's
-         potentially very expensive. */
-      VG_(mallocSanityCheckAll)();
-   }
-}
-
-
-/* ---------------------------------------------------------------------
-   Debugging machinery (turn on to debug).  Something of a mess.
-   ------------------------------------------------------------------ */
-
-/* Print the value tags on the 8 integer registers & flag reg. */
-
-static void uint_to_bits ( UInt x, Char* str )
-{
-   Int i;
-   Int w = 0;
-   /* str must point to a space of at least 36 bytes. */
-   for (i = 31; i >= 0; i--) {
-      str[w++] = (x & ( ((UInt)1) << i)) ? '1' : '0';
-      if (i == 24 || i == 16 || i == 8)
-         str[w++] = ' ';
-   }
-   str[w++] = 0;
-   vg_assert(w == 36);
-}
-
-/* Caution!  Not vthread-safe; looks in VG_(baseBlock), not the thread
-   state table. */
-
-void VG_(show_reg_tags) ( void )
-{
-   Char buf1[36];
-   Char buf2[36];
-   UInt z_eax, z_ebx, z_ecx, z_edx, 
-        z_esi, z_edi, z_ebp, z_esp, z_eflags;
-
-   z_eax    = VG_(baseBlock)[VGOFF_(sh_eax)];
-   z_ebx    = VG_(baseBlock)[VGOFF_(sh_ebx)];
-   z_ecx    = VG_(baseBlock)[VGOFF_(sh_ecx)];
-   z_edx    = VG_(baseBlock)[VGOFF_(sh_edx)];
-   z_esi    = VG_(baseBlock)[VGOFF_(sh_esi)];
-   z_edi    = VG_(baseBlock)[VGOFF_(sh_edi)];
-   z_ebp    = VG_(baseBlock)[VGOFF_(sh_ebp)];
-   z_esp    = VG_(baseBlock)[VGOFF_(sh_esp)];
-   z_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
-   
-   uint_to_bits(z_eflags, buf1);
-   VG_(message)(Vg_DebugMsg, "efl %\n", buf1);
-
-   uint_to_bits(z_eax, buf1);
-   uint_to_bits(z_ebx, buf2);
-   VG_(message)(Vg_DebugMsg, "eax %s   ebx %s\n", buf1, buf2);
-
-   uint_to_bits(z_ecx, buf1);
-   uint_to_bits(z_edx, buf2);
-   VG_(message)(Vg_DebugMsg, "ecx %s   edx %s\n", buf1, buf2);
-
-   uint_to_bits(z_esi, buf1);
-   uint_to_bits(z_edi, buf2);
-   VG_(message)(Vg_DebugMsg, "esi %s   edi %s\n", buf1, buf2);
-
-   uint_to_bits(z_ebp, buf1);
-   uint_to_bits(z_esp, buf2);
-   VG_(message)(Vg_DebugMsg, "ebp %s   esp %s\n", buf1, buf2);
-}
-
-
-#if 0
-/* For debugging only.  Scan the address space and touch all allegedly
-   addressible words.  Useful for establishing where Valgrind's idea of
-   addressibility has diverged from what the kernel believes. */
-
-static 
-void zzzmemscan_notify_word ( Addr a, UInt w )
-{
-}
-
-void zzzmemscan ( void )
-{
-   Int n_notifies
-      = VG_(scan_all_valid_memory)( zzzmemscan_notify_word );
-   VG_(printf)("zzzmemscan: n_bytes = %d\n", 4 * n_notifies );
-}
-#endif
-
-
-
-
-#if 0
-static Int zzz = 0;
-
-void show_bb ( Addr eip_next )
-{
-   VG_(printf)("[%4d] ", zzz);
-   VG_(show_reg_tags)( &VG_(m_shadow );
-   VG_(translate) ( eip_next, NULL, NULL, NULL );
-}
-#endif /* 0 */
-
 /*--------------------------------------------------------------------*/
 /*--- end                                              vg_memory.c ---*/
 /*--------------------------------------------------------------------*/
+
diff --git a/vg_messages.c b/vg_messages.c
index 3eaf8cd..b0051bd 100644
--- a/vg_messages.c
+++ b/vg_messages.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 
diff --git a/vg_mylibc.c b/vg_mylibc.c
index e32aee8..3fe6032 100644
--- a/vg_mylibc.c
+++ b/vg_mylibc.c
@@ -27,7 +27,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -177,7 +177,7 @@
 {
    if (set == NULL)
       return -1;
-   if (signum < 1 && signum > VKI_KNSIG)
+   if (signum < 1 || signum > VKI_KNSIG)
       return -1;
    signum--;
    set->ws[signum / VKI_KNSIG_BPW] |= (1 << (signum % VKI_KNSIG_BPW));
@@ -188,7 +188,7 @@
 {
    if (set == NULL)
       return -1;
-   if (signum < 1 && signum > VKI_KNSIG)
+   if (signum < 1 || signum > VKI_KNSIG)
       return -1;
    signum--;
    set->ws[signum / VKI_KNSIG_BPW] &= ~(1 << (signum % VKI_KNSIG_BPW));
@@ -199,7 +199,7 @@
 {
    if (set == NULL)
       return 0;
-   if (signum < 1 && signum > VKI_KNSIG)
+   if (signum < 1 || signum > VKI_KNSIG)
       return 0;
    signum--;
    if (1 & ((set->ws[signum / VKI_KNSIG_BPW]) >> (signum % VKI_KNSIG_BPW)))
@@ -675,22 +675,49 @@
 }
 
 
-Long VG_(atoll36) ( Char* str )
+Long VG_(atoll16) ( Char* str )
 {
    Bool neg = False;
    Long n = 0;
    if (*str == '-') { str++; neg = True; };
    while (True) {
       if (*str >= '0' && *str <= '9') {
-         n = 36*n + (Long)(*str - '0');
+         n = 16*n + (Long)(*str - '0');
       }
       else 
-      if (*str >= 'A' && *str <= 'Z') {
-         n = 36*n + (Long)((*str - 'A') + 10);
+      if (*str >= 'A' && *str <= 'F') {
+         n = 16*n + (Long)((*str - 'A') + 10);
       }
       else 
-      if (*str >= 'a' && *str <= 'z') {
-         n = 36*n + (Long)((*str - 'a') + 10);
+      if (*str >= 'a' && *str <= 'f') {
+         n = 16*n + (Long)((*str - 'a') + 10);
+      }
+      else {
+	break;
+      }
+      str++;
+   }
+   if (neg) n = -n;
+   return n;
+}
+
+Long VG_(atoll36) ( UInt base, Char* str )
+{
+   Bool neg = False;
+   Long n = 0;
+   vg_assert(base >= 2 && base <= 36);
+   if (*str == '-') { str++; neg = True; };
+   while (True) {
+      if (*str >= '0' && *str <=('9' - (10 - base))) {
+         n = base*n + (Long)(*str - '0');
+      }
+      else 
+      if (base > 10 && *str >= 'A' && *str <= ('Z' - (36 - base))) {
+         n = base*n + (Long)((*str - 'A') + 10);
+      }
+      else 
+      if (base > 10 && *str >= 'a' && *str <= ('z' - (36 - base))) {
+         n = base*n + (Long)((*str - 'a') + 10);
       }
       else {
 	break;
@@ -763,9 +790,18 @@
 }
 
 
-void VG_(strncpy) ( Char* dest, const Char* src, Int ndest )
+Char* VG_(strncpy) ( Char* dest, const Char* src, Int ndest )
 {
-   VG_(strncpy_safely)( dest, src, ndest+1 ); 
+   Int i = 0;
+   while (True) {
+      if (i >= ndest) return dest;     /* reached limit */
+      dest[i] = src[i];
+      if (src[i++] == 0) {
+         /* reached NUL;  pad rest with zeroes as required */
+         while (i < ndest) dest[i++] = 0;
+         return dest;
+      }
+   }
 }
 
 
@@ -868,16 +904,22 @@
 }
 
 
-Char* VG_(strdup) ( ArenaId aid, const Char* s )
+/* Inline just for the wrapper VG_(strdup) below */
+__inline__ Char* VG_(arena_strdup) ( ArenaId aid, const Char* s )
 {
-    Int   i;
-    Int   len = VG_(strlen)(s) + 1;
-    Char* res = VG_(malloc) (aid, len);
-    for (i = 0; i < len; i++)
-       res[i] = s[i];
-    return res;
+   Int   i;
+   Int   len = VG_(strlen)(s) + 1;
+   Char* res = VG_(arena_malloc) (aid, len);
+   for (i = 0; i < len; i++)
+      res[i] = s[i];
+   return res;
 }
 
+/* Wrapper to avoid exposing skins to ArenaId's */
+Char* VG_(strdup) ( const Char* s )
+{
+   return VG_(arena_strdup) ( VG_AR_SKIN, s ); 
+}
 
 /* ---------------------------------------------------------------------
    A simple string matching routine, purloined from Hugs98.
@@ -966,66 +1008,32 @@
    VG_(exit)(1);
 }
 
+void VG_(skin_error) ( Char* str )
+{
+   VG_(printf)("\n%s: misconfigured skin:\n   %s\n\n", VG_(needs).name, str);
+   //VG_(printf)("Please report this bug to me at: %s\n\n", VG_EMAIL_ADDR);
+   VG_(shutdown_logging)();
+   VG_(exit)(1);
+}
+
 
 /* ---------------------------------------------------------------------
    Primitive support for reading files.
    ------------------------------------------------------------------ */
 
 /* Returns -1 on failure. */
-Int VG_(open_read) ( Char* pathname )
-{
+Int VG_(open) ( const Char* pathname, Int flags, Int mode )
+{  
    Int fd;
-   /* VG_(printf)("vg_open_read %s\n", pathname ); */
 
+   /* (old comment, not sure if it still applies  NJN 2002-sep-09) */
    /* This gets a segmentation fault if pathname isn't a valid file.
       I don't know why.  It seems like the call to open is getting
       intercepted and messed with by glibc ... */
    /* fd = open( pathname, O_RDONLY ); */
    /* ... so we go direct to the horse's mouth, which seems to work
       ok: */
-   const int O_RDONLY = 0; /* See /usr/include/bits/fcntl.h */
-   fd = vg_do_syscall3(__NR_open, (UInt)pathname, O_RDONLY, 0);
-   /* VG_(printf)("result = %d\n", fd); */
-   if (VG_(is_kerror)(fd)) fd = -1;
-   return fd;
-}
-
-/* Returns -1 on failure. */
-static Int VG_(chmod_u_rw) ( Int fd )
-{
-   Int res;
-   const int O_IRUSR_IWUSR = 000600; /* See /usr/include/cpio.h */
-   res = vg_do_syscall2(__NR_fchmod, fd, O_IRUSR_IWUSR);
-   if (VG_(is_kerror)(res)) res = -1;
-   return res;
-}
- 
-/* Returns -1 on failure. */
-Int VG_(create_and_write) ( Char* pathname )
-{
-   Int fd;
-
-   const int O_CR_AND_WR_ONLY = 0101; /* See /usr/include/bits/fcntl.h */
-   fd = vg_do_syscall3(__NR_open, (UInt)pathname, O_CR_AND_WR_ONLY, 0);
-   /* VG_(printf)("result = %d\n", fd); */
-   if (VG_(is_kerror)(fd)) {
-      fd = -1;
-   } else {
-      VG_(chmod_u_rw)(fd);
-      if (VG_(is_kerror)(fd)) {
-         fd = -1;
-      }
-   }
-   return fd;
-}
- 
-/* Returns -1 on failure. */
-Int VG_(open_write) ( Char* pathname )
-{  
-   Int fd;
-
-   const int O_WRONLY_AND_TRUNC = 01001; /* See /usr/include/bits/fcntl.h */
-   fd = vg_do_syscall3(__NR_open, (UInt)pathname, O_WRONLY_AND_TRUNC, 0);
+   fd = vg_do_syscall3(__NR_open, (UInt)pathname, flags, mode);
    /* VG_(printf)("result = %d\n", fd); */
    if (VG_(is_kerror)(fd)) {
       fd = -1;
@@ -1068,7 +1076,7 @@
 /* Misc functions looking for a proper home. */
 
 /* We do getenv without libc's help by snooping around in
-   VG_(client_env) as determined at startup time. */
+   VG_(client_envp) as determined at startup time. */
 Char* VG_(getenv) ( Char* varname )
 {
    Int i, n;
@@ -1266,11 +1274,40 @@
             tot_alloc, nBytes, p, ((char*)p) + nBytes - 1, who );
       return p;
    }
-   VG_(printf)("vg_get_memory_from_mmap failed on request of %d\n", 
+   VG_(printf)("\n");
+   VG_(printf)("VG_(get_memory_from_mmap): request for %d bytes failed.\n", 
                nBytes);
-   VG_(panic)("vg_get_memory_from_mmap: out of memory!  Fatal!  Bye!\n");
+   VG_(printf)("VG_(get_memory_from_mmap): %d bytes already allocated.\n", 
+               tot_alloc);
+   VG_(printf)("\n");
+   VG_(printf)("This may mean that you have run out of swap space,\n");
+   VG_(printf)("since running programs on valgrind increases their memory\n");
+   VG_(printf)("usage at least 3 times.  You might want to use 'top'\n");
+   VG_(printf)("to determine whether you really have run out of swap.\n");
+   VG_(printf)("If so, you may be able to work around it by adding a\n");
+   VG_(printf)("temporary swap file -- this is easier than finding a\n");
+   VG_(printf)("new swap partition.  Go ask your sysadmin(s) [politely!]\n");
+   VG_(printf)("\n");
+   VG_(printf)("VG_(get_memory_from_mmap): out of memory!  Fatal!  Bye!\n");
+   VG_(printf)("\n");
+   VG_(exit)(1);
 }
 
+/* ---------------------------------------------------------------------
+   Generally useful...
+   ------------------------------------------------------------------ */
+
+Int VG_(log2) ( Int x ) 
+{
+   Int i;
+   /* Any more than 32 and we overflow anyway... */
+   for (i = 0; i < 32; i++) {
+      if (1 << i == x) return i;
+   }
+   return -1;
+}
+
+
 
 /*--------------------------------------------------------------------*/
 /*--- end                                              vg_mylibc.c ---*/
diff --git a/vg_none.c b/vg_none.c
new file mode 100644
index 0000000..398b88e
--- /dev/null
+++ b/vg_none.c
@@ -0,0 +1,57 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The null skin.                                     vg_none.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2002 Nicholas Nethercote
+      njn25@cam.ac.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_skin.h"
+
+
+void SK_(pre_clo_init)(VgNeeds* needs, VgTrackEvents* track) 
+{
+   needs->name                    = "nulgrind";
+   needs->description             = "a binary JIT-compiler";
+
+   /* No needs, no core events to track */
+}
+
+void SK_(post_clo_init)(void)
+{
+}
+
+UCodeBlock* SK_(instrument)(UCodeBlock* cb, Addr a)
+{
+    return cb;
+}
+
+void SK_(fini)(void)
+{
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                vg_none.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_procselfmaps.c b/vg_procselfmaps.c
index ceba7b3..840f34b 100644
--- a/vg_procselfmaps.c
+++ b/vg_procselfmaps.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 
@@ -102,7 +102,7 @@
    UChar  rr, ww, xx, pp, ch;
 
    /* Read the initial memory mapping from the /proc filesystem. */
-   fd = VG_(open_read) ( "/proc/self/maps" );
+   fd = VG_(open) ( "/proc/self/maps", VKI_O_RDONLY, 0 );
    if (fd == -1) {
       VG_(message)(Vg_UserMsg, "FATAL: can't open /proc/self/maps");
       VG_(exit)(1);
@@ -172,6 +172,7 @@
        VG_(exit)(1);
 
     read_line_ok:
+
       /* Try and find the name of the file mapped to this segment, if
          it exists. */
       while (procmap_buf[i] != '\n' && i < M_PROCMAP_BUF-1) i++;
diff --git a/vg_profile.c b/vg_profile.c
index 34e98d6..4b0288d 100644
--- a/vg_profile.c
+++ b/vg_profile.c
@@ -1,6 +1,8 @@
 
 /*--------------------------------------------------------------------*/
-/*--- Profiling machinery -- not for release builds!               ---*/
+/*--- Profiling machinery.  #include this file into a skin to      ---*/
+/*--- enable --profile=yes, but not for release versions of skins, ---*/
+/*--- because it uses glibc code.                                  ---*/
 /*---                                                 vg_profile.c ---*/
 /*--------------------------------------------------------------------*/
 
@@ -26,46 +28,84 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
-#include "vg_include.h"
+#ifndef __VG_PROFILE_C
+#define __VG_PROFILE_C
 
-#ifdef VG_PROFILE
+#include "vg_skin.h"
 
 /* get rid of these, if possible */
 #include <signal.h>
 #include <sys/time.h>
 
-#define VGP_PAIR(enumname,str) str
-static const Char* vgp_names[VGP_M_CCS] = { VGP_LIST };
-#undef VGP_PAIR
+/* Override the empty definitions from vg_skin.h */
+#undef  VGP_PUSHCC
+#undef  VGP_POPCC
+#define VGP_PUSHCC(x)   if (VG_(clo_profile)) VGP_(pushcc)(x)
+#define VGP_POPCC(x)    if (VG_(clo_profile)) VGP_(popcc)(x)
 
+#define VGP_M_STACK     20
+#define VGP_MAX_CCS     50
+
+
+/* All zeroed initially because they're static */
 static Int   vgp_nticks;
-static Int   vgp_counts[VGP_M_CCS];
-static Int   vgp_entries[VGP_M_CCS];
+
+static Int   vgp_counts [VGP_MAX_CCS];
+static Int   vgp_entries[VGP_MAX_CCS];
+static Char* vgp_names  [VGP_MAX_CCS];
 
 static Int   vgp_sp;
-static VgpCC vgp_stack[VGP_M_STACK];
+static UInt  vgp_stack[VGP_M_STACK];
+
+/* These definitions override the panicking ones in vg_profile.c */
+
+void VGP_(register_profile_event) ( Int n, Char* name )
+{
+   /* Adjust for negative values */
+   n += VgpUnc;
+   if (n >= VGP_MAX_CCS) {
+      VG_(printf)("\nProfile event #%d higher than VGP_MAX_CCS of %d.\n"
+                  "If you really need this many profile events, increase\n"
+                  "VGP_MAX_CCS and recompile Valgrind.\n",
+                  n, VGP_MAX_CCS);
+      VG_(skin_error)("profile event too high");
+   }
+   if (vgp_names[n] != NULL) {
+      VG_(printf)("\nProfile event #%d being registered as `%s'\n"
+                  "already registered as `%s'.\n"
+                  "Note that skin and core event numbers must not overlap.\n",
+                  n, name, vgp_names[n]);
+      VG_(skin_error)("profile event already registered");
+   }
+
+   vgp_names[n] = name;
+}
 
 void VGP_(tick) ( int sigNo )
 {
    Int cc;
    vgp_nticks++;
    cc = vgp_stack[vgp_sp];
-   vg_assert(cc >= 0 && cc < VGP_M_CCS);
+   vg_assert(cc >= 0 && cc < VGP_MAX_CCS);
    vgp_counts[ cc ]++;
 }
 
 void VGP_(init_profiling) ( void )
 {
    struct itimerval value;
-   Int i, ret;
+   Int ret;
 
-   for (i = 0; i < VGP_M_CCS; i++)
-     vgp_counts[i] = vgp_entries[i] = 0;
+   /* Register core events... tricky macro definition causes
+      VGP_(register_profile_event)() to be called once for each core event
+      in VGP_CORE_LIST. */
+   vg_assert(VgpUnc == 0);
+#  define VGP_PAIR(n,name) VGP_(register_profile_event)(n,name)
+   VGP_CORE_LIST;
+#  undef  VGP_PAIR
 
-   vgp_nticks = 0;
    vgp_sp = -1;
    VGP_(pushcc) ( VgpUnc );
 
@@ -81,30 +121,52 @@
 void VGP_(done_profiling) ( void )
 {
    Int i;
-   VG_(printf)("Profiling done, %d ticks\n", vgp_nticks);
-   for (i = 0; i < VGP_M_CCS; i++)
-      VG_(printf)("%2d: %4d (%3d %%%%) ticks,  %8d entries   for  %s\n",
-                  i, vgp_counts[i], 
-                  (Int)(1000.0 * (double)vgp_counts[i] / (double)vgp_nticks),
-                  vgp_entries[i],
-                  vgp_names[i] );
+   VG_(printf)("\nProfiling done, %d ticks\n", vgp_nticks);
+   for (i = 0; i < VGP_MAX_CCS; i++)
+      if (NULL != vgp_names[i])
+         VG_(printf)(
+            "%2d: %4d (%3d %%%%) ticks,  %10d entries   for  %s\n",
+            i, vgp_counts[i], 
+            (Int)(1000.0 * (double)vgp_counts[i] / (double)vgp_nticks),
+            vgp_entries[i], vgp_names[i] );
 }
 
-void VGP_(pushcc) ( VgpCC cc )
+void VGP_(pushcc) ( UInt cc )
 {
-   if (vgp_sp >= VGP_M_STACK-1) VG_(panic)("vgp_pushcc");
+   if (vgp_sp >= VGP_M_STACK-1) { 
+      VG_(printf)(
+         "\nMaximum profile stack depth (%d) reached for event #%d (`%s').\n"
+         "This is probably due to a VGP_(pushcc)() without a matching\n"
+         "VGP_(popcc)().  Make sure they all match.\n"
+         "Or if you are nesting profiling events very deeply, increase\n"
+         "VGP_M_STACK and recompile Valgrind.\n",
+         VGP_M_STACK, cc, vgp_names[cc]);
+      VG_(skin_error)("Profiling stack overflow");
+   }
    vgp_sp++;
    vgp_stack[vgp_sp] = cc;
    vgp_entries[ cc ] ++;
 }
 
-void VGP_(popcc) ( void )
+void VGP_(popcc) ( UInt cc )
 {
-   if (vgp_sp <= 0) VG_(panic)("vgp_popcc");
+   if (vgp_sp <= 0) {
+      VG_(printf)(
+         "\nProfile stack underflow.  This is due to a VGP_(popcc)() without\n"
+         "a matching VGP_(pushcc)().  Make sure they all match.\n");
+      VG_(skin_error)("Profiling stack underflow");
+   }
+   if (vgp_stack[vgp_sp] != cc) {
+      Int i;
+      VG_(printf)("popping %s, stack looks like:\n", vgp_names[cc]);
+      for (i = vgp_sp; i >= 0; i--)
+         VG_(printf)("%2d: %s\n", i, vgp_names[vgp_stack[i]]);
+      VG_(exit)(1);
+   }
    vgp_sp--;
 }
 
-#endif /* VG_PROFILE */
+#endif /* __VG_PROFILE_C */
 
 /*--------------------------------------------------------------------*/
 /*--- end                                             vg_profile.c ---*/
diff --git a/vg_scheduler.c b/vg_scheduler.c
index 0ad56b1..b65426b 100644
--- a/vg_scheduler.c
+++ b/vg_scheduler.c
@@ -25,13 +25,12 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
-#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and
-                         VG_USERREQ__DO_LEAK_CHECK */
+#include "valgrind.h" /* for VG_USERREQ__RUNNING_ON_VALGRIND and
+                             VG_USERREQ__DISCARD_TRANSLATIONS */
 
 /* BORKAGE/ISSUES as of 29 May 02
 
@@ -126,6 +125,10 @@
          happens, this entire record is marked as no longer in use, by
          making the fd field be -1.  */
       Bool     ready; 
+
+      /* The result from SK_(pre_blocking_syscall)();  is passed to
+       * SK_(post_blocking_syscall)(). */
+      void*    pre_result;
    }
    VgWaitedOnFd;
 
@@ -149,12 +152,13 @@
 typedef UInt ThreadKey;
 
 
+UInt VG_(written_shadow_reg);
+
 /* Forwards */
 static void do_client_request ( ThreadId tid );
 static void scheduler_sanity ( void );
 static void do_pthread_cond_timedwait_TIMEOUT ( ThreadId tid );
 
-
 /* ---------------------------------------------------------------------
    Helper functions for the scheduler.
    ------------------------------------------------------------------ */
@@ -181,11 +185,12 @@
 
 
 /* For constructing error messages only: try and identify a thread
-   whose stack this address currently falls within, or return
-   VG_INVALID_THREADID if it doesn't.  A small complication is dealing
-   with any currently VG_(baseBlock)-resident thread. 
+   whose stack satisfies the predicate p, or return VG_INVALID_THREADID
+   if none do.  A small complication is dealing with any currently
+   VG_(baseBlock)-resident thread. 
 */
-ThreadId VG_(identify_stack_addr)( Addr a )
+ThreadId VG_(any_matching_thread_stack)
+              ( Bool (*p) ( Addr stack_min, Addr stack_max ))
 {
    ThreadId tid, tid_to_skip;
 
@@ -195,8 +200,8 @@
       VG_(baseBlock). */
    if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) {
       tid = vg_tid_currently_in_baseBlock;
-      if (VG_(baseBlock)[VGOFF_(m_esp)] <= a
-          && a <= VG_(threads)[tid].stack_highest_word) 
+      if ( p ( VG_(baseBlock)[VGOFF_(m_esp)], 
+               VG_(threads)[tid].stack_highest_word) )
          return tid;
       else
          tid_to_skip = tid;
@@ -205,8 +210,8 @@
    for (tid = 1; tid < VG_N_THREADS; tid++) {
       if (VG_(threads)[tid].status == VgTs_Empty) continue;
       if (tid == tid_to_skip) continue;
-      if (VG_(threads)[tid].m_esp <= a 
-          && a <= VG_(threads)[tid].stack_highest_word)
+      if ( p ( VG_(threads)[tid].m_esp,
+               VG_(threads)[tid].stack_highest_word) )
          return tid;
    }
    return VG_INVALID_THREADID;
@@ -238,14 +243,16 @@
                   VG_(threads)[i].associated_mx,
                   VG_(threads)[i].associated_cv );
       VG_(pp_ExeContext)( 
-         VG_(get_ExeContext)( False, VG_(threads)[i].m_eip, 
-                                     VG_(threads)[i].m_ebp ));
+         VG_(get_ExeContext2)( VG_(threads)[i].m_eip, VG_(threads)[i].m_ebp,
+                               VG_(threads)[i].m_esp, 
+                               VG_(threads)[i].stack_highest_word)
+      );
    }
    VG_(printf)("\n");
 }
 
 static
-void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no )
+void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no, void* pre_res )
 {
    Int i;
 
@@ -265,6 +272,7 @@
    vg_waiting_fds[i].tid        = tid;
    vg_waiting_fds[i].ready      = False;
    vg_waiting_fds[i].syscall_no = syscall_no;
+   vg_waiting_fds[i].pre_result = pre_res;
 }
 
 
@@ -325,7 +333,7 @@
                        ( trans_addr, trans_size );
    tte.mru_epoch  = VG_(current_epoch);
    /* Free the intermediary -- was allocated by VG_(emit_code). */
-   VG_(jitfree)( (void*)trans_addr );
+   VG_(arena_free)( VG_AR_JITTER, (void*)trans_addr );
    /* Add to trans tab and set back pointer. */
    VG_(add_to_trans_tab) ( &tte );
    /* Update stats. */
@@ -353,6 +361,11 @@
    /*NOTREACHED*/
 }
 
+ThreadState* VG_(get_ThreadState)( ThreadId tid )
+{
+   vg_assert(tid >= 0 && tid < VG_N_THREADS);
+   return & VG_(threads)[tid];
+}
 
 ThreadState* VG_(get_current_thread_state) ( void )
 {
@@ -367,6 +380,15 @@
    return vg_tid_currently_in_baseBlock;
 }
 
+ThreadId VG_(get_current_tid_1_if_root) ( void )
+{
+   if (0 == vg_tid_currently_in_baseBlock)
+      return 1;     /* root thread */
+    
+   vg_assert(VG_(is_valid_tid)(vg_tid_currently_in_baseBlock));
+   return vg_tid_currently_in_baseBlock;
+}
+
 
 /* Copy the saved state of a thread into VG_(baseBlock), ready for it
    to be run. */
@@ -390,15 +412,31 @@
    for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
       VG_(baseBlock)[VGOFF_(m_fpustate) + i] = VG_(threads)[tid].m_fpu[i];
 
-   VG_(baseBlock)[VGOFF_(sh_eax)] = VG_(threads)[tid].sh_eax;
-   VG_(baseBlock)[VGOFF_(sh_ebx)] = VG_(threads)[tid].sh_ebx;
-   VG_(baseBlock)[VGOFF_(sh_ecx)] = VG_(threads)[tid].sh_ecx;
-   VG_(baseBlock)[VGOFF_(sh_edx)] = VG_(threads)[tid].sh_edx;
-   VG_(baseBlock)[VGOFF_(sh_esi)] = VG_(threads)[tid].sh_esi;
-   VG_(baseBlock)[VGOFF_(sh_edi)] = VG_(threads)[tid].sh_edi;
-   VG_(baseBlock)[VGOFF_(sh_ebp)] = VG_(threads)[tid].sh_ebp;
-   VG_(baseBlock)[VGOFF_(sh_esp)] = VG_(threads)[tid].sh_esp;
-   VG_(baseBlock)[VGOFF_(sh_eflags)] = VG_(threads)[tid].sh_eflags;
+   if (VG_(needs).shadow_regs) {
+      VG_(baseBlock)[VGOFF_(sh_eax)] = VG_(threads)[tid].sh_eax;
+      VG_(baseBlock)[VGOFF_(sh_ebx)] = VG_(threads)[tid].sh_ebx;
+      VG_(baseBlock)[VGOFF_(sh_ecx)] = VG_(threads)[tid].sh_ecx;
+      VG_(baseBlock)[VGOFF_(sh_edx)] = VG_(threads)[tid].sh_edx;
+      VG_(baseBlock)[VGOFF_(sh_esi)] = VG_(threads)[tid].sh_esi;
+      VG_(baseBlock)[VGOFF_(sh_edi)] = VG_(threads)[tid].sh_edi;
+      VG_(baseBlock)[VGOFF_(sh_ebp)] = VG_(threads)[tid].sh_ebp;
+      VG_(baseBlock)[VGOFF_(sh_esp)] = VG_(threads)[tid].sh_esp;
+      VG_(baseBlock)[VGOFF_(sh_eflags)] = VG_(threads)[tid].sh_eflags;
+   } else {
+      /* Fields shouldn't be used -- check their values haven't changed. */
+      /* Nb: they are written to by some macros like SET_EDX, but they
+       *     should just write VG_UNUSED_SHADOW_REG_VALUE. */
+      vg_assert(
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_eax &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_ebx &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_ecx &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_edx &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_esi &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_edi &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_ebp &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_esp &&
+         VG_UNUSED_SHADOW_REG_VALUE == VG_(threads)[tid].sh_eflags);
+   }
 
    vg_tid_currently_in_baseBlock = tid;
 }
@@ -432,15 +470,28 @@
    for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++)
       VG_(threads)[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i];
 
-   VG_(threads)[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
-   VG_(threads)[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
-   VG_(threads)[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
-   VG_(threads)[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
-   VG_(threads)[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
-   VG_(threads)[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
-   VG_(threads)[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
-   VG_(threads)[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
-   VG_(threads)[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
+   if (VG_(needs).shadow_regs) {
+      VG_(threads)[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)];
+      VG_(threads)[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)];
+      VG_(threads)[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)];
+      VG_(threads)[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)];
+      VG_(threads)[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)];
+      VG_(threads)[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)];
+      VG_(threads)[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)];
+      VG_(threads)[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)];
+      VG_(threads)[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)];
+   } else {
+      /* Fill with recognisable junk */
+      VG_(threads)[tid].sh_eax =
+      VG_(threads)[tid].sh_ebx =
+      VG_(threads)[tid].sh_ecx =
+      VG_(threads)[tid].sh_edx =
+      VG_(threads)[tid].sh_esi =
+      VG_(threads)[tid].sh_edi =
+      VG_(threads)[tid].sh_ebp =
+      VG_(threads)[tid].sh_esp = 
+      VG_(threads)[tid].sh_eflags = VG_UNUSED_SHADOW_REG_VALUE;
+   }
 
    /* Fill it up with junk. */
    VG_(baseBlock)[VGOFF_(m_eax)] = junk;
@@ -491,7 +542,7 @@
    vg_assert(!VG_(scheduler_jmpbuf_valid));
 
    VG_(save_thread_state) ( tid );
-   VGP_POPCC;
+   VGP_POPCC(VgpRun);
    return trc;
 }
 
@@ -566,14 +617,18 @@
 
    if (VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_1)
        || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_2) 
-       || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_3)) {
+       || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_3)
+       || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_4)) {
       /* Jolly good! */
    } else {
-      VG_(printf)("%%esp at startup = %p is not near %p, %p or %p; aborting\n", 
-                  (void*)startup_esp, 
-                  (void*)VG_STARTUP_STACK_BASE_1,
-                  (void*)VG_STARTUP_STACK_BASE_2,
-                  (void*)VG_STARTUP_STACK_BASE_3 );
+      VG_(printf)(
+         "%%esp at startup = %p is not near %p, %p, %p or %p; aborting\n", 
+         (void*)startup_esp, 
+         (void*)VG_STARTUP_STACK_BASE_1,
+         (void*)VG_STARTUP_STACK_BASE_2,
+         (void*)VG_STARTUP_STACK_BASE_3,
+         (void*)VG_STARTUP_STACK_BASE_4 
+      );
       VG_(panic)("unexpected %esp at startup");
    }
 
@@ -751,11 +806,12 @@
 static
 void sched_do_syscall ( ThreadId tid )
 {
-   UInt saved_eax;
-   UInt res, syscall_no;
-   UInt fd;
-   Bool orig_fd_blockness;
-   Char msg_buf[100];
+   UInt  saved_eax;
+   UInt  res, syscall_no;
+   UInt  fd;
+   void* pre_res;
+   Bool  orig_fd_blockness;
+   Char  msg_buf[100];
 
    vg_assert(VG_(is_valid_tid)(tid));
    vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
@@ -797,12 +853,13 @@
 
    /* Deal with error case immediately. */
    if (!fd_is_valid(fd)) {
-      VG_(message)(Vg_UserMsg, 
-         "Warning: invalid file descriptor %d in syscall %s",
-         fd, syscall_no == __NR_read ? "read()" : "write()" );
-      VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
+      if (VG_(needs).core_errors)
+         VG_(message)(Vg_UserMsg, 
+            "Warning: invalid file descriptor %d in syscall %s",
+            fd, syscall_no == __NR_read ? "read()" : "write()" );
+      pre_res = VG_(pre_known_blocking_syscall)(tid, syscall_no);
       KERNEL_DO_SYSCALL(tid, res);
-      VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
+      VG_(post_known_blocking_syscall)(tid, syscall_no, pre_res, res);
       /* We're still runnable. */
       vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
       return;
@@ -813,7 +870,7 @@
    orig_fd_blockness = fd_is_blockful(fd);
    set_fd_nonblocking(fd);
    vg_assert(!fd_is_blockful(fd));
-   VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */);
+   pre_res = VG_(pre_known_blocking_syscall)(tid, syscall_no);
 
    /* This trashes the thread's %eax; we have to preserve it. */
    saved_eax = VG_(threads)[tid].m_eax;
@@ -834,7 +891,7 @@
              the I/O completion -- the client is.  So don't file a 
              completion-wait entry. 
       */
-      VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
+      VG_(post_known_blocking_syscall)(tid, syscall_no, pre_res, res);
       /* We're still runnable. */
       vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
 
@@ -848,7 +905,8 @@
       /* Put this fd in a table of fds on which we are waiting for
          completion. The arguments for select() later are constructed
          from this table.  */
-      add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */);
+      add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */,
+                     pre_res);
       /* Deschedule thread until an I/O completion happens. */
       VG_(threads)[tid].status = VgTs_WaitFD;
       if (VG_(clo_trace_sched)) {
@@ -1042,6 +1100,7 @@
 void complete_blocked_syscalls ( void )
 {
    Int      fd, i, res, syscall_no;
+   void*    pre_res;
    ThreadId tid;
    Char     msg_buf[100];
 
@@ -1069,6 +1128,8 @@
       syscall_no = vg_waiting_fds[i].syscall_no;
       vg_assert(syscall_no == VG_(threads)[tid].m_eax);
 
+      pre_res = vg_waiting_fds[i].pre_result;
+
       /* In a rare case pertaining to writing into a pipe, write()
          will block when asked to write > 4096 bytes even though the
          kernel claims, when asked via select(), that blocking will
@@ -1086,7 +1147,7 @@
       }
 
       KERNEL_DO_SYSCALL(tid,res);
-      VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */);
+      VG_(post_known_blocking_syscall)(tid, syscall_no, pre_res, res);
 
       /* Reschedule. */
       VG_(threads)[tid].status = VgTs_Runnable;
@@ -1125,7 +1186,7 @@
    struct vki_timespec req;
    struct vki_timespec rem;
    req.tv_sec = 0;
-   req.tv_nsec = 20 * 1000 * 1000;
+   req.tv_nsec = 10 * 1000 * 1000;
    res = VG_(nanosleep)( &req, &rem );   
    vg_assert(res == 0 /* ok */ || res == 1 /* interrupted by signal */);
 }
@@ -1266,6 +1327,8 @@
       if (0)
          VG_(printf)("SCHED: tid %d\n", tid);
 
+      VG_TRACK( thread_run, tid );
+
       /* Figure out how many bbs to ask vg_run_innerloop to do.  Note
          that it decrements the counter before testing it for zero, so
          that if VG_(dispatch_ctr) is set to N you get at most N-1
@@ -1326,7 +1389,8 @@
                = VG_(search_transtab) ( VG_(threads)[tid].m_eip );
             if (trans_addr == (Addr)0) {
                /* Not found; we need to request a translation. */
-               create_translation_for( tid, VG_(threads)[tid].m_eip ); 
+               create_translation_for( 
+                  tid, VG_(threads)[tid].m_eip ); 
                trans_addr = VG_(search_transtab) ( VG_(threads)[tid].m_eip ); 
                if (trans_addr == (Addr)0)
                   VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry");
@@ -1382,8 +1446,13 @@
                If not valgrinding (cachegrinding, etc) don't do this.
                __libc_freeres does some invalid frees which crash
                the unprotected malloc/free system. */
+
+            /* If __NR_exit, remember the supplied argument. */
+            if (VG_(threads)[tid].m_eax == __NR_exit)
+               VG_(exitcode) = VG_(threads)[tid].m_ebx; /* syscall arg1 */
+
             if (VG_(threads)[tid].m_eax == __NR_exit 
-                && !VG_(clo_instrument)) {
+                && ! VG_(needs).run_libc_freeres) {
                if (VG_(clo_trace_syscalls) || VG_(clo_trace_sched)) {
                   VG_(message)(Vg_DebugMsg, 
                      "Caught __NR_exit; quitting");
@@ -1392,7 +1461,7 @@
             }
 
             if (VG_(threads)[tid].m_eax == __NR_exit) {
-               vg_assert(VG_(clo_instrument));
+               vg_assert(VG_(needs).run_libc_freeres);
                if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched)) {
                   VG_(message)(Vg_DebugMsg, 
                      "Caught __NR_exit; running __libc_freeres()");
@@ -1574,10 +1643,10 @@
    vki_ksigset_t irrelevant_sigmask;
    vg_assert(VG_(is_valid_or_empty_tid)(tid));
    vg_assert(VG_(threads)[tid].status == VgTs_Empty);
-   /* Mark its stack no-access */
-   if (VG_(clo_instrument) && tid != 1)
-      VGM_(make_noaccess)( VG_(threads)[tid].stack_base,
-                           VG_(threads)[tid].stack_size );
+   /* Its stack is now off-limits */
+   VG_TRACK( die_mem_stack, VG_(threads)[tid].stack_base,
+                            VG_(threads)[tid].stack_size );
+
    /* Forget about any pending signals directed specifically at this
       thread, and get rid of signal handlers specifically arranged for
       this thread. */
@@ -1620,17 +1689,14 @@
       thread_return = VG_(threads)[jnr].joiner_thread_return;
       if (thread_return != NULL) {
          /* CHECK thread_return writable */
-         if (VG_(clo_instrument)
-             && !VGM_(check_writable)( (Addr)thread_return, 
-                                       sizeof(void*), NULL))
-            VG_(record_pthread_err)( jnr, 
-               "pthread_join: thread_return points to invalid location");
+         VG_TRACK( pre_mem_write, Vg_CorePThread, &VG_(threads)[jnr],
+                                  "pthread_join: thread_return",
+                                  (Addr)thread_return, sizeof(void*));
 
          *thread_return = VG_(threads)[jee].joinee_retval;
          /* Not really right, since it makes the thread's return value
             appear to be defined even if it isn't. */
-         if (VG_(clo_instrument))
-            VGM_(make_readable)( (Addr)thread_return, sizeof(void*) );
+         VG_TRACK( post_mem_write, (Addr)thread_return, sizeof(void*) );
       }
 
       /* Joinee is discarded */
@@ -1716,8 +1782,8 @@
    }
    sp--;
    *cu = VG_(threads)[tid].custack[sp];
-   if (VG_(clo_instrument))
-      VGM_(make_readable)( (Addr)cu, sizeof(CleanupEntry) );
+   // JJJ: no corresponding pre_mem_write check??
+   VG_TRACK( post_mem_write, (Addr)cu, sizeof(CleanupEntry) );
    VG_(threads)[tid].custack_used = sp;
    SET_EDX(tid, 0);
 }
@@ -1884,7 +1950,7 @@
             "set_cancelpend for invalid tid %d", cee);
          print_sched_event(tid, msg_buf);
       }
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_cancel: target thread does not exist, or invalid");
       SET_EDX(tid, -VKI_ESRCH);
       return;
@@ -1919,7 +1985,7 @@
    vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
 
    if (jee == tid) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_join: attempt to join to self");
       SET_EDX(tid, EDEADLK); /* libc constant, not a kernel one */
       VG_(threads)[tid].status = VgTs_Runnable;
@@ -1935,7 +2001,7 @@
        || jee >= VG_N_THREADS
        || VG_(threads)[jee].status == VgTs_Empty) {
       /* Invalid thread to join to. */
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_join: target thread does not exist, or invalid");
       SET_EDX(tid, EINVAL);
       VG_(threads)[tid].status = VgTs_Runnable;
@@ -1948,7 +2014,7 @@
       if (VG_(threads)[i].status == VgTs_WaitJoinee
           && VG_(threads)[i].joiner_jee_tid == jee) {
          /* Someone already did join on this thread */
-         VG_(record_pthread_err)( tid, 
+         VG_(record_pthread_error)( tid, 
             "pthread_join: another thread already "
             "in join-wait for target thread");
          SET_EDX(tid, EINVAL);
@@ -2074,33 +2140,39 @@
                      - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4  ??? */;
    }
 
-   VG_(threads)[tid].m_esp 
-      = VG_(threads)[tid].stack_base 
-        + VG_(threads)[tid].stack_size
-        - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
+   /* Having got memory to hold the thread's stack:
+      - set %esp as base + size
+      - mark everything below %esp inaccessible
+      - mark redzone at stack end inaccessible
+    */
+   VG_(threads)[tid].m_esp = VG_(threads)[tid].stack_base 
+                           + VG_(threads)[tid].stack_size
+                           - VG_AR_CLIENT_STACKBASE_REDZONE_SZB;
 
-   if (VG_(clo_instrument))
-      VGM_(make_noaccess)( VG_(threads)[tid].m_esp, 
-                           VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
+   VG_TRACK ( die_mem_stack, VG_(threads)[tid].stack_base, 
+                           + new_stk_szb - VG_AR_CLIENT_STACKBASE_REDZONE_SZB);
+   VG_TRACK ( ban_mem_stack, VG_(threads)[tid].m_esp, 
+                             VG_AR_CLIENT_STACKBASE_REDZONE_SZB );
    
-   /* push arg */
-   VG_(threads)[tid].m_esp -= 4;
-   * (UInt*)(VG_(threads)[tid].m_esp) = (UInt)arg;
-
-   /* push (bogus) return address */
-   VG_(threads)[tid].m_esp -= 4;
+   /* push two args */
+   VG_(threads)[tid].m_esp -= 8;
+   VG_TRACK ( new_mem_stack, (Addr)VG_(threads)[tid].m_esp, 2 * 4 );
+   VG_TRACK ( pre_mem_write, Vg_CorePThread, & VG_(threads)[tid], 
+                             "new thread: stack",
+                             (Addr)VG_(threads)[tid].m_esp, 2 * 4 );
+ 
+   /* push arg and (bogus) return address */
+   * (UInt*)(VG_(threads)[tid].m_esp+4) = (UInt)arg;
    * (UInt*)(VG_(threads)[tid].m_esp) 
       = (UInt)&do__apply_in_new_thread_bogusRA;
 
-   if (VG_(clo_instrument))
-      VGM_(make_readable)( VG_(threads)[tid].m_esp, 2 * 4 );
+   VG_TRACK ( post_mem_write, VG_(threads)[tid].m_esp, 2 * 4 );
 
    /* this is where we start */
    VG_(threads)[tid].m_eip = (UInt)fn;
 
    if (VG_(clo_trace_sched)) {
-      VG_(sprintf)(msg_buf,
-         "new thread, created by %d", parent_tid );
+      VG_(sprintf)(msg_buf, "new thread, created by %d", parent_tid );
       print_sched_event(tid, msg_buf);
    }
 
@@ -2230,7 +2302,7 @@
 
    /* POSIX doesn't mandate this, but for sanity ... */
    if (mutex == NULL) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_mutex_lock/trylock: mutex is NULL");
       SET_EDX(tid, EINVAL);
       return;
@@ -2250,7 +2322,7 @@
          if (mutex->__m_count >= 0) break;
          /* else fall thru */
       default:
-         VG_(record_pthread_err)( tid, 
+         VG_(record_pthread_error)( tid, 
             "pthread_mutex_lock/trylock: mutex is invalid");
          SET_EDX(tid, EINVAL);
          return;
@@ -2304,6 +2376,9 @@
       /* We get it! [for the first time]. */
       mutex->__m_count = 1;
       mutex->__m_owner = (_pthread_descr)tid;
+
+      VG_TRACK( post_mutex_lock, tid, mutex);
+
       /* return 0 (success). */
       SET_EDX(tid, 0);
    }
@@ -2327,7 +2402,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (mutex == NULL) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_mutex_unlock: mutex is NULL");
       SET_EDX(tid, EINVAL);
       return;
@@ -2347,7 +2422,7 @@
          if (mutex->__m_count >= 0) break;
          /* else fall thru */
       default:
-         VG_(record_pthread_err)( tid, 
+         VG_(record_pthread_error)( tid, 
             "pthread_mutex_unlock: mutex is invalid");
          SET_EDX(tid, EINVAL);
          return;
@@ -2356,7 +2431,7 @@
    /* Barf if we don't currently hold the mutex. */
    if (mutex->__m_count == 0) {
       /* nobody holds it */
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_mutex_unlock: mutex is not locked");
       SET_EDX(tid, EPERM);
       return;
@@ -2364,7 +2439,7 @@
 
    if ((ThreadId)mutex->__m_owner != tid) {
       /* we don't hold it */
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_mutex_unlock: mutex is locked by a different thread");
       SET_EDX(tid, EPERM);
       return;
@@ -2384,6 +2459,8 @@
    vg_assert(mutex->__m_count == 1);
    vg_assert((ThreadId)mutex->__m_owner == tid);
 
+   VG_TRACK( post_mutex_unlock, tid, mutex);
+
    /* Release at max one thread waiting on this mutex. */
    release_one_thread_waiting_on_mutex ( mutex, "pthread_mutex_lock" );
 
@@ -2561,7 +2638,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (mutex == NULL || cond == NULL) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_cond_wait/timedwait: cond or mutex is NULL");
       SET_EDX(tid, EINVAL);
       return;
@@ -2581,7 +2658,7 @@
          if (mutex->__m_count >= 0) break;
          /* else fall thru */
       default:
-         VG_(record_pthread_err)( tid, 
+         VG_(record_pthread_error)( tid, 
             "pthread_cond_wait/timedwait: mutex is invalid");
          SET_EDX(tid, EINVAL);
          return;
@@ -2590,7 +2667,7 @@
    /* Barf if we don't currently hold the mutex. */
    if (mutex->__m_count == 0 /* nobody holds it */
        || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) {
-         VG_(record_pthread_err)( tid, 
+         VG_(record_pthread_error)( tid, 
             "pthread_cond_wait/timedwait: mutex is unlocked "
             "or is locked but not owned by thread");
       SET_EDX(tid, EINVAL);
@@ -2636,7 +2713,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (cond == NULL) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_cond_signal/broadcast: cond is NULL");
       SET_EDX(tid, EINVAL);
       return;
@@ -2699,14 +2776,11 @@
    vg_thread_keys[i].destructor = destructor;
 
    /* check key for addressibility */
-   if (VG_(clo_instrument)
-       && !VGM_(check_writable)( (Addr)key, 
-                                 sizeof(pthread_key_t), NULL))
-      VG_(record_pthread_err)( tid, 
-         "pthread_key_create: key points to invalid location");
+   VG_TRACK( pre_mem_write, Vg_CorePThread, &VG_(threads)[tid], 
+                            "pthread_key_create: key",
+                            (Addr)key, sizeof(pthread_key_t));
    *key = i;
-   if (VG_(clo_instrument))
-      VGM_(make_readable)( (Addr)key, sizeof(pthread_key_t) );
+   VG_TRACK( post_mem_write, (Addr)key, sizeof(pthread_key_t) );
 
    SET_EDX(tid, 0);
 }
@@ -2726,7 +2800,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
    
    if (!is_valid_key(key)) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_key_delete: key is invalid");
       SET_EDX(tid, EINVAL);
       return;
@@ -2760,7 +2834,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (!is_valid_key(key)) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_getspecific: key is invalid");
       SET_EDX(tid, (UInt)NULL);
       return;
@@ -2786,7 +2860,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (!is_valid_key(key)) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_setspecific: key is invalid");
       SET_EDX(tid, EINVAL);
       return;
@@ -2814,14 +2888,16 @@
    }
    vg_assert(VG_(is_valid_tid)(tid));
    vg_assert(key >= 0 && key < VG_N_THREAD_KEYS);
+
+   // JJJ: no pre_mem_write check??
+   
    if (!vg_thread_keys[key].inuse) {
       SET_EDX(tid, -1);
       return;
    }
    cu->fn = vg_thread_keys[key].destructor;
    cu->arg = VG_(threads)[tid].specifics[key];
-   if (VG_(clo_instrument))
-      VGM_(make_readable)( (Addr)cu, sizeof(CleanupEntry) );
+   VG_TRACK( post_mem_write, (Addr)cu, sizeof(CleanupEntry) );
    SET_EDX(tid, 0);
 }
 
@@ -2852,27 +2928,19 @@
    vg_assert(VG_(is_valid_tid)(tid) 
              && VG_(threads)[tid].status == VgTs_Runnable);
 
-   if (VG_(clo_instrument)) {
-      /* check newmask/oldmask are addressible/defined */
-      if (newmask
-          && !VGM_(check_readable)( (Addr)newmask, 
-                                    sizeof(vki_ksigset_t), NULL))
-         VG_(record_pthread_err)( tid, 
-            "pthread_sigmask: newmask contains "
-            "unaddressible or undefined bytes");
-      if (oldmask
-          && !VGM_(check_writable)( (Addr)oldmask, 
-                                    sizeof(vki_ksigset_t), NULL))
-         VG_(record_pthread_err)( tid, 
-            "pthread_sigmask: oldmask contains "
-            "unaddressible bytes");
-   }
+   if (newmask)
+      VG_TRACK( pre_mem_read, Vg_CorePThread, &VG_(threads)[tid],
+                              "pthread_sigmask: newmask",
+                              (Addr)newmask, sizeof(vki_ksigset_t));
+   if (oldmask)
+      VG_TRACK( pre_mem_write, Vg_CorePThread, &VG_(threads)[tid],
+                               "pthread_sigmask: oldmask",
+                               (Addr)oldmask, sizeof(vki_ksigset_t));
 
    VG_(do_pthread_sigmask_SCSS_upd) ( tid, vki_how, newmask, oldmask );
 
-   if (oldmask && VG_(clo_instrument)) {
-      VGM_(make_readable)( (Addr)oldmask, sizeof(vki_ksigset_t) );
-   }
+   if (oldmask)
+      VG_TRACK( post_mem_write, (Addr)oldmask, sizeof(vki_ksigset_t) );
 
    /* Success. */
    SET_EDX(tid, 0);
@@ -2924,7 +2992,7 @@
              && VG_(threads)[tid].status == VgTs_Runnable);
 
    if (!VG_(is_valid_tid)(thread)) {
-      VG_(record_pthread_err)( tid, 
+      VG_(record_pthread_error)( tid, 
          "pthread_kill: invalid target thread");
       SET_EDX(tid, -VKI_ESRCH);
       return;
@@ -2994,18 +3062,11 @@
 
    vg_assert(VG_(is_valid_tid)(tid) 
              && VG_(threads)[tid].status == VgTs_Runnable);
+   VG_TRACK( pre_mem_read, Vg_CorePThread, &VG_(threads)[tid],
+                           "pthread_atfork: prepare/parent/child",
+                           (Addr)fh, sizeof(ForkHandlerEntry));
 
-   if (VG_(clo_instrument)) {
-      /* check fh is addressible/defined */
-      if (!VGM_(check_readable)( (Addr)fh,
-                                 sizeof(ForkHandlerEntry), NULL)) {
-         VG_(record_pthread_err)( tid, 
-            "pthread_atfork: prepare/parent/child contains "
-            "unaddressible or undefined bytes");
-      }
-   }
-
-   if (n < 0 && n >= VG_N_FORKHANDLERSTACK) {
+   if (n < 0 || n >= VG_N_FORKHANDLERSTACK) {
       SET_EDX(tid, -1);
       return;
    } 
@@ -3027,18 +3088,11 @@
 
    vg_assert(VG_(is_valid_tid)(tid) 
              && VG_(threads)[tid].status == VgTs_Runnable);
+   VG_TRACK( pre_mem_write, Vg_CorePThread, &VG_(threads)[tid],
+                            "fork: prepare/parent/child",
+                            (Addr)fh, sizeof(ForkHandlerEntry));
 
-   if (VG_(clo_instrument)) {
-      /* check fh is addressible/defined */
-      if (!VGM_(check_writable)( (Addr)fh,
-                                 sizeof(ForkHandlerEntry), NULL)) {
-         VG_(record_pthread_err)( tid, 
-            "fork: prepare/parent/child contains "
-            "unaddressible bytes");
-      }
-   }
-
-   if (n < 0 && n >= VG_N_FORKHANDLERSTACK) {
+   if (n < 0 || n >= VG_N_FORKHANDLERSTACK) {
       SET_EDX(tid, -1);
       return;
    } 
@@ -3046,9 +3100,7 @@
    *fh = vg_fhstack[n];
    SET_EDX(tid, 0);
 
-   if (VG_(clo_instrument)) {
-      VGM_(make_readable)( (Addr)fh, sizeof(ForkHandlerEntry) );
-   }
+   VG_TRACK( post_mem_write, (Addr)fh, sizeof(ForkHandlerEntry) );
 }
 
 
@@ -3063,9 +3115,9 @@
 static
 void do_client_request ( ThreadId tid )
 {
-#  define RETURN_WITH(vvv)                        \
-       { tst->m_edx = (vvv);                      \
-         tst->sh_edx = VGM_WORD_VALID;            \
+#  define RETURN_WITH(vvv)                      \
+       { tst->m_edx = (vvv);                    \
+         tst->sh_edx = VG_(written_shadow_reg); \
        }
 
    ThreadState* tst    = &VG_(threads)[tid];
@@ -3289,7 +3341,7 @@
          break;
 
       case VG_USERREQ__PTHREAD_ERROR:
-         VG_(record_pthread_err)( tid, (Char*)(arg[1]) );
+         VG_(record_pthread_error)( tid, (Char*)(arg[1]) );
          SET_EDX(tid, 0);
          break;
 
@@ -3311,30 +3363,40 @@
                                      (ForkHandlerEntry*)(arg[2]) );
          break;
 
-      case VG_USERREQ__MAKE_NOACCESS:
-      case VG_USERREQ__MAKE_WRITABLE:
-      case VG_USERREQ__MAKE_READABLE:
-      case VG_USERREQ__DISCARD:
-      case VG_USERREQ__CHECK_WRITABLE:
-      case VG_USERREQ__CHECK_READABLE:
-      case VG_USERREQ__MAKE_NOACCESS_STACK:
-      case VG_USERREQ__DO_LEAK_CHECK:
-      case VG_USERREQ__DISCARD_TRANSLATIONS:
-         SET_EDX(
-            tid, 
-            VG_(handle_client_request) ( &VG_(threads)[tid], arg )
-         );
-	 break;
-
       case VG_USERREQ__SIGNAL_RETURNS: 
          handle_signal_return(tid);
 	 break;
 
+      /* Requests from the client program */
+
+      case VG_USERREQ__DISCARD_TRANSLATIONS:
+         if (VG_(clo_verbosity) > 2)
+            VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
+                         " addr %p,  len %d\n",
+                         (void*)arg[1], arg[2] );
+
+         VG_(invalidate_translations)( arg[1], arg[2] );
+
+         SET_EDX( tid, 0 );     /* return value is meaningless */
+	 break;
+
       default:
-         VG_(printf)("panic'd on client request = 0x%x\n", arg[0] );
-         VG_(panic)("do_client_request: "
-                    "unknown request");
-         /*NOTREACHED*/
+         if (VG_(needs).client_requests) {
+            if (VG_(clo_verbosity) > 2)
+               VG_(printf)("client request: code %d,  addr %p,  len %d\n",
+                           arg[0], (void*)arg[1], arg[2] );
+
+            SET_EDX(tid,
+                    SK_(handle_client_request) ( &VG_(threads)[tid], arg )
+            );
+         } else {
+            VG_(printf)("\nError:\n"
+                        "  unhandled client request: 0x%x.  Perhaps\n" 
+                        "  VG_(needs).client_requests should be set?\n",
+                        arg[0]);
+            VG_(panic)("do_client_request: unknown request");
+            /*NOTREACHED*/
+         }
          break;
    }
 
@@ -3392,7 +3454,7 @@
              && stack_used 
                 >= (VG_PTHREAD_STACK_MIN - 1000 /* paranoia */)) {
             VG_(message)(Vg_UserMsg,
-               "Warning: STACK OVERFLOW: "
+               "Error: STACK OVERFLOW: "
                "thread %d: stack used %d, available %d", 
                i, stack_used, VG_PTHREAD_STACK_MIN );
             VG_(message)(Vg_UserMsg,
diff --git a/vg_signals.c b/vg_signals.c
index f58ec11..f849544 100644
--- a/vg_signals.c
+++ b/vg_signals.c
@@ -26,12 +26,11 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 
 #include "vg_include.h"
-#include "vg_constants.h"
 #include "vg_unsafe.h"
 #include "valgrind.h"  /* for VALGRIND_MAGIC_SEQUENCE */
 
@@ -598,16 +597,18 @@
    return;
 
   bad_signo:
-   VG_(message)(Vg_UserMsg,
-                "Warning: bad signal number %d in __NR_sigaction.", 
-                signo);
+   if (VG_(needs).core_errors)
+      VG_(message)(Vg_UserMsg,
+                   "Warning: bad signal number %d in __NR_sigaction.", 
+                   signo);
    SET_EAX(tid, -VKI_EINVAL);
    return;
 
   bad_sigkill_or_sigstop:
-   VG_(message)(Vg_UserMsg,
-      "Warning: attempt to set %s handler in __NR_sigaction.", 
-      signo == VKI_SIGKILL ? "SIGKILL" : "SIGSTOP" );
+   if (VG_(needs).core_errors)
+      VG_(message)(Vg_UserMsg,
+         "Warning: attempt to set %s handler in __NR_sigaction.", 
+         signo == VKI_SIGKILL ? "SIGKILL" : "SIGSTOP" );
 
    SET_EAX(tid, -VKI_EINVAL);
    return;
@@ -939,11 +940,19 @@
    esp = esp_top_of_frame;
    esp -= sizeof(VgSigFrame);
    frame = (VgSigFrame*)esp;
+
+   /* For tracking memory events, indicate the entire frame has been
+    * allocated, but pretend that only the first four words are written */
+   VG_TRACK( new_mem_stack_signal, (Addr)frame, sizeof(VgSigFrame) );
+
    /* Assert that the frame is placed correctly. */
    vg_assert( (sizeof(VgSigFrame) & 0x3) == 0 );
    vg_assert( ((Char*)(&frame->magicE)) + sizeof(UInt) 
               == ((Char*)(esp_top_of_frame)) );
 
+   /* retaddr, sigNo, psigInfo, puContext fields are to be written */
+   VG_TRACK( pre_mem_write, Vg_CoreSignal, tst, "signal handler frame", 
+                            (Addr)esp, 16 );
    frame->retaddr    = (UInt)(&VG_(signalreturn_bogusRA));
    frame->sigNo      = sigNo;
    frame->psigInfo   = (Addr)NULL;
@@ -974,14 +983,9 @@
    /* This thread needs to be marked runnable, but we leave that the
       caller to do. */
 
-   /* Make retaddr, sigNo, psigInfo, puContext fields readable -- at
-      0(%ESP) .. 12(%ESP) */
-   if (VG_(clo_instrument)) {
-      VGM_(make_readable) ( ((Addr)esp)+0,  4 );
-      VGM_(make_readable) ( ((Addr)esp)+4,  4 );
-      VGM_(make_readable) ( ((Addr)esp)+8,  4 );
-      VGM_(make_readable) ( ((Addr)esp)+12, 4 );
-   }
+   /* retaddr, sigNo, psigInfo, puContext fields have been written -- 
+      at 0(%ESP) .. 12(%ESP) */
+   VG_TRACK( post_mem_write, (Addr)esp, 16 );
 
    /* 
    VG_(printf)("pushed signal frame; %%ESP now = %p, next %%EBP = %p\n", 
@@ -1021,8 +1025,7 @@
       tst->m_fpu[i] = frame->fpustate[i];
 
    /* Mark the frame structure as nonaccessible. */
-   if (VG_(clo_instrument))
-      VGM_(make_noaccess)( (Addr)frame, sizeof(VgSigFrame) );
+   VG_TRACK( die_mem_stack_signal, (Addr)frame, sizeof(VgSigFrame) );
 
    /* Restore machine state from the saved context. */
    tst->m_eax     = frame->eax;
@@ -1140,9 +1143,7 @@
          sigwait_args = (UInt*)(tst->m_eax);
          if (NULL != (UInt*)(sigwait_args[2])) {
             *(Int*)(sigwait_args[2]) = sigNo;
-            if (VG_(clo_instrument))
-               VGM_(make_readable)( (Addr)(sigwait_args[2]), 
-                                    sizeof(UInt));
+            VG_TRACK( post_mem_write, (Addr)sigwait_args[2], sizeof(UInt));
          }
 	 SET_EDX(tid, 0);
          tst->status = VgTs_Runnable;
@@ -1194,7 +1195,11 @@
             vg_dcss.dcss_sigpending[sigNo] = False;
             vg_dcss.dcss_destthread[sigNo] = VG_INVALID_THREADID;
             continue; /* for (sigNo = 1; ...) loop */
-	 }
+	 } else if (VG_(ksigismember)(&(tst->sig_mask), sigNo)) {
+            /* signal blocked in specific thread, so we can't
+               deliver it just now */
+            continue; /* for (sigNo = 1; ...) loop */
+         }
       } else {
          /* not directed to a specific thread, so search for a
             suitable candidate */
diff --git a/vg_skin.h b/vg_skin.h
new file mode 100644
index 0000000..2ecd3ae
--- /dev/null
+++ b/vg_skin.h
@@ -0,0 +1,1397 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The only header your skin will ever need to #include...      ---*/
+/*---                                                    vg_skin.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VG_SKIN_H
+#define __VG_SKIN_H
+
+#include <stdarg.h>       /* ANSI varargs stuff  */
+#include <setjmp.h>       /* for jmp_buf         */
+
+#include "vg_constants_skin.h"
+
+
+/*====================================================================*/
+/*=== Build options and table sizes.                               ===*/
+/*====================================================================*/
+
+/* You should be able to change these options or sizes, recompile, and 
+   still have a working system. */
+
+/* The maximum number of pthreads that we support.  This is
+   deliberately not very high since our implementation of some of the
+   scheduler algorithms is surely O(N) in the number of threads, since
+   that's simple, at least.  And (in practice) we hope that most
+   programs do not need many threads. */
+#define VG_N_THREADS 50
+
+/* Maximum number of pthread keys available.  Again, we start low until
+   the need for a higher number presents itself. */
+#define VG_N_THREAD_KEYS 50
+
+/* Total number of integer registers available for allocation -- all of
+   them except %esp, %ebp.  %ebp permanently points at VG_(baseBlock).
+   
+   If you change this you'll have to also change at least these:
+     - VG_(rankToRealRegNum)()
+     - VG_(realRegNumToRank)()
+     - ppRegsLiveness()
+     - the RegsLive type (maybe -- RegsLive type must have more than
+                          VG_MAX_REALREGS bits)
+   
+   Do not change this unless you really know what you are doing!  */
+#define VG_MAX_REALREGS 6
+
+
+/*====================================================================*/
+/*=== Basic types                                                  ===*/
+/*====================================================================*/
+
+#define mycat_wrk(aaa,bbb) aaa##bbb
+#define mycat(aaa,bbb) mycat_wrk(aaa,bbb)
+
+typedef unsigned char          UChar;
+typedef unsigned short         UShort;
+typedef unsigned int           UInt;
+typedef unsigned long long int ULong;
+
+typedef signed char            Char;
+typedef signed short           Short;
+typedef signed int             Int;
+typedef signed long long int   Long;
+
+typedef unsigned int           Addr;
+
+typedef unsigned char          Bool;
+#define False                  ((Bool)0)
+#define True                   ((Bool)1)
+
+
+/* ---------------------------------------------------------------------
+   Now the basic types are set up, we can haul in the kernel-interface
+   definitions.
+   ------------------------------------------------------------------ */
+
+#include "./vg_kerneliface.h"
+
+
+/*====================================================================*/
+/*=== Command-line options                                         ===*/
+/*====================================================================*/
+
+/* Verbosity level: 0 = silent, 1 (default), > 1 = more verbose. */
+extern Int   VG_(clo_verbosity);
+
+/* Profile? */
+extern Bool  VG_(clo_profile);
+
+
+/* Call this if a recognised option was bad for some reason.
+   Note: don't use it just because an option was unrecognised -- return 'False'
+   from SKN_(process_cmd_line_option) to indicate that. */
+extern void VG_(bad_option) ( Char* opt );
+
+/* Client args */
+extern Int    VG_(client_argc);
+extern Char** VG_(client_argv);
+
+/* Client environment.  Can be inspected with VG_(getenv)() (below) */
+extern Char** VG_(client_envp);
+
+
+/*====================================================================*/
+/*=== Printing messages for the user                               ===*/
+/*====================================================================*/
+
+/* Print a message prefixed by "??<pid>?? "; '?' depends on the VgMsgKind.
+   Should be used for all user output. */
+
+typedef
+   enum { Vg_UserMsg,         /* '?' == '=' */
+          Vg_DebugMsg,        /* '?' == '-' */
+          Vg_DebugExtraMsg    /* '?' == '+' */
+   }
+   VgMsgKind;
+
+/* Functions for building a message from multiple parts. */
+extern void VG_(start_msg)  ( VgMsgKind kind );
+extern void VG_(add_to_msg) ( Char* format, ... );
+/* Ends and prints the message.  Appends a newline. */
+extern void VG_(end_msg)    ( void );
+
+/* Send a simple, single-part message.  Appends a newline. */
+extern void VG_(message)    ( VgMsgKind kind, Char* format, ... );
+
+
+/*====================================================================*/
+/*=== Profiling                                                    ===*/
+/*====================================================================*/
+
+/* Nb: VGP_(register_profile_event)() relies on VgpUnc being the first one */
+#define VGP_CORE_LIST \
+   /* These ones depend on the core */                \
+   VGP_PAIR(VgpUnc,         "unclassified"),          \
+   VGP_PAIR(VgpRun,         "running"),               \
+   VGP_PAIR(VgpSched,       "scheduler"),             \
+   VGP_PAIR(VgpMalloc,      "low-lev malloc/free"),   \
+   VGP_PAIR(VgpCliMalloc,   "client  malloc/free"),   \
+   VGP_PAIR(VgpStack,       "adjust-stack"),          \
+   VGP_PAIR(VgpTranslate,   "translate-main"),        \
+   VGP_PAIR(VgpToUCode,     "to-ucode"),              \
+   VGP_PAIR(VgpFromUcode,   "from-ucode"),            \
+   VGP_PAIR(VgpImprove,     "improve"),               \
+   VGP_PAIR(VgpRegAlloc,    "reg-alloc"),             \
+   VGP_PAIR(VgpLiveness,    "liveness-analysis"),     \
+   VGP_PAIR(VgpDoLRU,       "do-lru"),                \
+   VGP_PAIR(VgpSlowFindT,   "slow-search-transtab"),  \
+   VGP_PAIR(VgpInitMem,     "init-memory"),           \
+   VGP_PAIR(VgpExeContext,  "exe-context"),           \
+   VGP_PAIR(VgpReadSyms,    "read-syms"),             \
+   VGP_PAIR(VgpSearchSyms,  "search-syms"),           \
+   VGP_PAIR(VgpAddToT,      "add-to-transtab"),       \
+   VGP_PAIR(VgpCoreSysWrap, "core-syscall-wrapper"),  \
+   VGP_PAIR(VgpDemangle,    "demangle"),              \
+   /* These ones depend on the skin */                \
+   VGP_PAIR(VgpPreCloInit,  "pre-clo-init"),          \
+   VGP_PAIR(VgpPostCloInit, "post-clo-init"),         \
+   VGP_PAIR(VgpInstrument,  "instrument"),            \
+   VGP_PAIR(VgpSkinSysWrap, "skin-syscall-wrapper"),  \
+   VGP_PAIR(VgpFini,        "fini")
+
+#define VGP_PAIR(n,name) n
+typedef enum { VGP_CORE_LIST } VgpCoreCC;
+#undef  VGP_PAIR
+
+/* When registering skin profiling events, ensure that the 'n' value is in
+ * the range (VgpFini+1..) */
+extern void VGP_(register_profile_event) ( Int n, Char* name );
+
+extern void VGP_(pushcc) ( UInt cc );
+extern void VGP_(popcc)  ( UInt cc );
+
+/* Define them only if they haven't already been defined by vg_profile.c */
+#ifndef VGP_PUSHCC
+#  define VGP_PUSHCC(x)
+#endif
+#ifndef VGP_POPCC
+#  define VGP_POPCC(x)
+#endif
+
+
+/*====================================================================*/
+/*=== Useful stuff to call from generated code                     ===*/
+/*====================================================================*/
+
+/* ------------------------------------------------------------------ */
+/* General stuff */
+
+/* Get the simulated %esp */
+extern Addr VG_(get_stack_pointer) ( void );
+
+/* Detect if an address is within Valgrind's stack */
+extern Bool VG_(within_stack)(Addr a);
+
+/* Detect if an address is in Valgrind's m_state_static */
+extern Bool VG_(within_m_state_static)(Addr a);
+
+/* Check if an address is 4-byte aligned */
+#define IS_ALIGNED4_ADDR(aaa_p) (0 == (((UInt)(aaa_p)) & 3))
+
+
+/* ------------------------------------------------------------------ */
+/* Thread-related stuff */
+
+/* Special magic value for an invalid ThreadId.  It corresponds to
+   LinuxThreads using zero as the initial value for
+   pthread_mutex_t.__m_owner and pthread_cond_t.__c_waiting. */
+#define VG_INVALID_THREADID ((ThreadId)(0))
+
+/* ThreadIds are simply indices into the vg_threads[] array. */
+typedef 
+   UInt 
+   ThreadId;
+
+typedef
+   struct _ThreadState
+   ThreadState;
+
+extern ThreadId     VG_(get_current_tid_1_if_root) ( void );
+extern ThreadState* VG_(get_ThreadState)           ( ThreadId tid );
+
+
+/*====================================================================*/
+/*=== Valgrind's version of libc                                   ===*/
+/*====================================================================*/
+
+/* Valgrind doesn't use libc at all, for good reasons (trust us).  So here
+   are its own versions of C library functions, but with VG_ prefixes.  Note
+   that the types of some are slightly different to the real ones.  Some
+   extra useful functions are provided too; descriptions of how they work
+   are given below. */
+
+#if !defined(NULL)
+#  define NULL ((void*)0)
+#endif
+
+
+/* ------------------------------------------------------------------ */
+/* stdio.h
+ *
+ * Note that they all output to the file descriptor given by the
+ * --logfile-fd=N argument, which defaults to 2 (stderr).  Hence no
+ * need for VG_(fprintf)().  
+ *
+ * Also note that VG_(printf)() and VG_(vprintf)()
+ */
+extern void VG_(printf)  ( const char *format, ... );
+/* too noisy ...  __attribute__ ((format (printf, 1, 2))) ; */
+extern void VG_(sprintf) ( Char* buf, Char *format, ... );
+extern void VG_(vprintf) ( void(*send)(Char), 
+                           const Char *format, va_list vargs );
+
+/* ------------------------------------------------------------------ */
+/* stdlib.h */
+
+extern void* VG_(malloc)         ( Int nbytes );
+extern void  VG_(free)           ( void* ptr );
+extern void* VG_(calloc)         ( Int nmemb, Int nbytes );
+extern void* VG_(realloc)        ( void* ptr, Int size );
+extern void* VG_(malloc_aligned) ( Int req_alignB, Int req_pszB );
+
+extern void  VG_(print_malloc_stats) ( void );
+
+
+extern void  VG_(exit)( Int status )
+             __attribute__ ((__noreturn__));
+/* Print a (panic) message (constant string) appending newline, and abort. */
+extern void  VG_(panic) ( Char* str )
+             __attribute__ ((__noreturn__));
+
+/* Looks up VG_(client_envp) (above) */
+extern Char* VG_(getenv) ( Char* name );
+
+/* Crude stand-in for the glibc system() call. */
+extern Int   VG_(system) ( Char* cmd );
+
+extern Long  VG_(atoll)   ( Char* str );
+
+/* Like atoll(), but converts a number of base 2..36 */
+extern Long  VG_(atoll36) ( UInt base, Char* str );
+
+
+/* ------------------------------------------------------------------ */
+/* ctype.h functions and related */
+extern Bool VG_(isspace) ( Char c );
+extern Bool VG_(isdigit) ( Char c );
+extern Char VG_(toupper) ( Char c );
+
+
+/* ------------------------------------------------------------------ */
+/* string.h */
+extern Int   VG_(strlen)         ( const Char* str );
+extern Char* VG_(strcat)         ( Char* dest, const Char* src );
+extern Char* VG_(strncat)        ( Char* dest, const Char* src, Int n );
+extern Char* VG_(strpbrk)        ( const Char* s, const Char* accept );
+extern Char* VG_(strcpy)         ( Char* dest, const Char* src );
+extern Char* VG_(strncpy)        ( Char* dest, const Char* src, Int ndest );
+extern Int   VG_(strcmp)         ( const Char* s1, const Char* s2 );
+extern Int   VG_(strncmp)        ( const Char* s1, const Char* s2, Int nmax );
+extern Char* VG_(strstr)         ( const Char* haystack, Char* needle );
+extern Char* VG_(strchr)         ( const Char* s, Char c );
+extern Char* VG_(strdup)         ( const Char* s);
+
+/* Like strcmp(),  but stops comparing at any whitespace. */
+extern Int   VG_(strcmp_ws)      ( const Char* s1, const Char* s2 );
+
+/* Like strncmp(), but stops comparing at any whitespace. */
+extern Int   VG_(strncmp_ws)     ( const Char* s1, const Char* s2, Int nmax );
+
+/* Like strncpy(), but if 'src' is longer than 'ndest' inserts a '\0' at the 
+   Nth character. */
+extern void  VG_(strncpy_safely) ( Char* dest, const Char* src, Int ndest );
+
+/* Mini-regexp function.  Searches for 'pat' in 'str'.  Supports
+ * meta-symbols '*' and '?'.  '\' escapes meta-symbols. */
+extern Bool  VG_(stringMatch)    ( Char* pat, Char* str );
+
+
+/* ------------------------------------------------------------------ */
+/* math.h */
+/* Returns the base-2 logarithm of its argument. */
+extern Int VG_(log2) ( Int x );
+
+
+/* ------------------------------------------------------------------ */
+/* unistd.h */
+extern Int   VG_(getpid) ( void );
+
+
+/* ------------------------------------------------------------------ */
+/* assert.h */
+/* Asserts permanently enabled -- no turning off with NDEBUG.  Hurrah! */
+#define VG__STRING(__str)  #__str
+
+#define vg_assert(expr)                                               \
+  ((void) ((expr) ? 0 :						      \
+	   (VG_(assert_fail) (VG__STRING(expr),			      \
+			      __FILE__, __LINE__,                     \
+                              __PRETTY_FUNCTION__), 0)))
+
+extern void VG_(assert_fail) ( Char* expr, Char* file, 
+                               Int line, Char* fn )
+            __attribute__ ((__noreturn__));
+
+
+/* ------------------------------------------------------------------ */
+/* Reading and writing files. */
+
+/* As per the system calls */
+extern Int  VG_(open)  ( const Char* pathname, Int flags, Int mode );
+extern Int  VG_(read)  ( Int fd, void* buf, Int count);
+extern Int  VG_(write) ( Int fd, void* buf, Int count);
+extern void VG_(close) ( Int fd );
+
+extern Int  VG_(stat)  ( Char* file_name, struct vki_stat* buf );
+
+
+/* ------------------------------------------------------------------ */
+/* mmap and related functions ... */
+extern void* VG_(mmap)( void* start, UInt length, 
+                        UInt prot, UInt flags, UInt fd, UInt offset );
+extern Int  VG_(munmap)( void* start, Int length );
+
+/* Get memory by anonymous mmap. */
+extern void* VG_(get_memory_from_mmap) ( Int nBytes, Char* who );
+
+
+/* ------------------------------------------------------------------ */
+/* signal.h.  
+  
+   Note that these use the vk_ (kernel) structure
+   definitions, which are different in places from those that glibc
+   defines -- hence the 'k' prefix.  Since we're operating right at the
+   kernel interface, glibc's view of the world is entirely irrelevant. */
+
+/* --- Signal set ops --- */
+extern Int  VG_(ksigfillset)( vki_ksigset_t* set );
+extern Int  VG_(ksigemptyset)( vki_ksigset_t* set );
+
+extern Bool VG_(kisfullsigset)( vki_ksigset_t* set );
+extern Bool VG_(kisemptysigset)( vki_ksigset_t* set );
+
+extern Int  VG_(ksigaddset)( vki_ksigset_t* set, Int signum );
+extern Int  VG_(ksigdelset)( vki_ksigset_t* set, Int signum );
+extern Int  VG_(ksigismember) ( vki_ksigset_t* set, Int signum );
+
+extern void VG_(ksigaddset_from_set)( vki_ksigset_t* dst, 
+                                      vki_ksigset_t* src );
+extern void VG_(ksigdelset_from_set)( vki_ksigset_t* dst, 
+                                      vki_ksigset_t* src );
+
+/* --- Mess with the kernel's sig state --- */
+extern Int VG_(ksigprocmask)( Int how, const vki_ksigset_t* set, 
+                                       vki_ksigset_t* oldset );
+extern Int VG_(ksigaction) ( Int signum,  
+                             const vki_ksigaction* act,  
+                             vki_ksigaction* oldact );
+
+extern Int VG_(ksignal)(Int signum, void (*sighandler)(Int));
+
+extern Int VG_(ksigaltstack)( const vki_kstack_t* ss, vki_kstack_t* oss );
+
+extern Int VG_(kill)( Int pid, Int signo );
+extern Int VG_(sigpending) ( vki_ksigset_t* set );
+
+
+/*====================================================================*/
+/*=== UCode definition                                             ===*/
+/*====================================================================*/
+
+/* Tags which describe what operands are. */
+typedef
+   enum { TempReg=0, ArchReg=1, RealReg=2, 
+          SpillNo=3, Literal=4, Lit16=5, 
+          NoValue=6 }
+   Tag;
+
+/* Invalid register numbers :-) */
+#define INVALID_TEMPREG 999999999
+#define INVALID_REALREG 999999999
+
+/* Microinstruction opcodes. */
+typedef
+   enum {
+      NOP,
+      GET,
+      PUT,
+      LOAD,
+      STORE,
+      MOV,
+      CMOV, /* Used for cmpxchg and cmov */
+      WIDEN,
+      JMP,
+
+      /* Read/write the %EFLAGS register into a TempReg. */
+      GETF, PUTF,
+
+      ADD, ADC, AND, OR,  XOR, SUB, SBB,
+      SHL, SHR, SAR, ROL, ROR, RCL, RCR,
+      NOT, NEG, INC, DEC, BSWAP,
+      CC2VAL,
+
+      /* Not strictly needed, but useful for making better
+         translations of address calculations. */
+      LEA1,  /* reg2 := const + reg1 */
+      LEA2,  /* reg3 := const + reg1 + reg2 * 1,2,4 or 8 */
+
+      /* not for translating x86 calls -- only to call helpers */
+      CALLM_S, CALLM_E, /* Mark start and end of push/pop sequences
+                           for CALLM. */
+      PUSH, POP, CLEAR, /* Add/remove/zap args for helpers. */
+      CALLM,  /* call to a machine-code helper */
+
+      /* For calling C functions of up to three arguments (or two if the
+         functions has a return value).  Arguments and return value must be
+         word-sized.  If you want to pass more arguments than this to a C
+         function you have to use global variables to fake it (eg. use
+         VG_(set_global_var)()).
+
+         Seven possibilities: 'arg1..3' show where args go, 'ret' shows
+         where return values go.
+        
+         CCALL(-,    -,    -   )    void f(void)
+         CCALL(arg1, -,    -   )    void f(UInt arg1)
+         CCALL(arg1, arg2, -   )    void f(UInt arg1, UInt arg2)
+         CCALL(arg1, arg2, arg3)    void f(UInt arg1, UInt arg2, UInt arg3)
+         CCALL(-,    -,    ret )    UInt f(UInt)
+         CCALL(arg1, -,    ret )    UInt f(UInt arg1)
+         CCALL(arg1, arg2, ret )    UInt f(UInt arg1, UInt arg2)
+       */
+      CCALL,
+
+      /* Hack for translating string (REP-) insns.  Jump to literal if
+         TempReg/RealReg is zero. */
+      JIFZ,
+
+      /* FPU ops which read/write mem or don't touch mem at all. */
+      FPU_R,
+      FPU_W,
+      FPU,
+
+      /* Advance the simulated %eip by some small (< 128) number. */
+      INCEIP,
+
+      /* Makes it easy for extended-UCode ops by doing:
+
+           enum { EU_OP1 = DUMMY_FINAL_OP + 1, ... } 
+   
+         WARNING: Do not add new opcodes after this one!  They can be added
+         before, though. */
+      DUMMY_FINAL_UOPCODE
+   }
+   Opcode;
+
+
+/* Condition codes, observing the Intel encoding.  CondAlways is an
+   extra. */
+typedef
+   enum {
+      CondO      = 0,  /* overflow           */
+      CondNO     = 1,  /* no overflow        */
+      CondB      = 2,  /* below              */
+      CondNB     = 3,  /* not below          */
+      CondZ      = 4,  /* zero               */
+      CondNZ     = 5,  /* not zero           */
+      CondBE     = 6,  /* below or equal     */
+      CondNBE    = 7,  /* not below or equal */
+      CondS      = 8,  /* negative           */
+      ConsNS     = 9,  /* not negative       */
+      CondP      = 10, /* parity even        */
+      CondNP     = 11, /* not parity even    */
+      CondL      = 12, /* jump less          */
+      CondNL     = 13, /* not less           */
+      CondLE     = 14, /* less or equal      */
+      CondNLE    = 15, /* not less or equal  */
+      CondAlways = 16  /* Jump always        */
+   } 
+   Condcode;
+
+
+/* Descriptions of additional properties of *unconditional* jumps. */
+typedef
+   enum {
+     JmpBoring=0,   /* boring unconditional jump */
+     JmpCall=1,     /* jump due to an x86 call insn */
+     JmpRet=2,      /* jump due to an x86 ret insn */
+     JmpSyscall=3,  /* do a system call, then jump */
+     JmpClientReq=4 /* do a client request, then jump */
+   }
+   JmpKind;
+
+
+/* Flags.  User-level code can only read/write O(verflow), S(ign),
+   Z(ero), A(ux-carry), C(arry), P(arity), and may also write
+   D(irection).  That's a total of 7 flags.  A FlagSet is a bitset,
+   thusly: 
+      76543210
+       DOSZACP
+   and bit 7 must always be zero since it is unused.
+*/
+typedef UChar FlagSet;
+
+#define FlagD (1<<6)
+#define FlagO (1<<5)
+#define FlagS (1<<4)
+#define FlagZ (1<<3)
+#define FlagA (1<<2)
+#define FlagC (1<<1)
+#define FlagP (1<<0)
+
+#define FlagsOSZACP (FlagO | FlagS | FlagZ | FlagA | FlagC | FlagP)
+#define FlagsOSZAP  (FlagO | FlagS | FlagZ | FlagA |         FlagP)
+#define FlagsOSZCP  (FlagO | FlagS | FlagZ |         FlagC | FlagP)
+#define FlagsOSACP  (FlagO | FlagS |         FlagA | FlagC | FlagP)
+#define FlagsSZACP  (        FlagS | FlagZ | FlagA | FlagC | FlagP)
+#define FlagsSZAP   (        FlagS | FlagZ | FlagA |         FlagP)
+#define FlagsZCP    (                FlagZ         | FlagC | FlagP)
+#define FlagsOC     (FlagO |                         FlagC        )
+#define FlagsAC     (                        FlagA | FlagC        )
+
+#define FlagsALL    (FlagsOSZACP | FlagD)
+#define FlagsEmpty  (FlagSet)0
+
+
+/* Liveness of general purpose registers, useful for code generation.
+   Reg rank order 0..N-1 corresponds to bits 0..N-1, ie. first
+   reg's liveness in bit 0, last reg's in bit N-1.  Note that
+   these rankings don't match the Intel register ordering. */
+typedef UInt RRegSet;
+
+#define ALL_RREGS_DEAD      0                           /* 0000...00b */
+#define ALL_RREGS_LIVE      (1 << (VG_MAX_REALREGS-1))  /* 0011...11b */
+#define UNIT_RREGSET(rank)  (1 << (rank))
+
+#define IS_RREG_LIVE(rank,rregs_live) (rregs_live & UNIT_RREGSET(rank))
+#define SET_RREG_LIVENESS(rank,rregs_live,b)       \
+   do { RRegSet unit = UNIT_RREGSET(rank);         \
+        if (b) rregs_live |= unit;                 \
+        else   rregs_live &= ~unit;                \
+   } while(0)
+
+
+/* A Micro (u)-instruction. */
+typedef
+   struct {
+      /* word 1 */
+      UInt    lit32;      /* 32-bit literal */
+
+      /* word 2 */
+      UShort  val1;       /* first operand */
+      UShort  val2;       /* second operand */
+
+      /* word 3 */
+      UShort  val3;       /* third operand */
+      UChar   opcode;     /* opcode */
+      UChar   size;       /* data transfer size */
+
+      /* word 4 */
+      FlagSet flags_r;    /* :: FlagSet */
+      FlagSet flags_w;    /* :: FlagSet */
+      UChar   tag1:4;     /* first  operand tag */
+      UChar   tag2:4;     /* second operand tag */
+      UChar   tag3:4;     /* third  operand tag */
+      UChar   extra4b:4;  /* Spare field, used by WIDEN for src
+                             -size, and by LEA2 for scale (1,2,4 or 8),
+                             and by JMPs for original x86 instr size */
+
+      /* word 5 */
+      UChar   cond;            /* condition, for jumps */
+      Bool    signed_widen:1;  /* signed or unsigned WIDEN ? */
+      JmpKind jmpkind:3;       /* additional properties of unconditional JMP */
+
+      /* Additional properties for UInstrs that call C functions:  
+           - CCALL
+           - PUT (when %ESP is the target)
+           - possibly skin-specific UInstrs
+      */
+      UChar   argc:2;          /* Number of args, max 3 */
+      UChar   regparms_n:2;    /* Number of args passed in registers */
+      Bool    has_ret_val:1;   /* Function has return value? */
+
+      /* RealReg liveness;  only sensical after reg alloc and liveness
+         analysis done.  This info is a little bit arch-specific --
+         VG_MAX_REALREGS can vary on different architectures.  Note that
+         to use this information requires converting between register ranks
+         and the Intel register numbers, using VG_(realRegNumToRank)()
+         and/or VG_(rankToRealRegNum)() */
+      RRegSet regs_live_after:VG_MAX_REALREGS; 
+   }
+   UInstr;
+
+
+/* Expandable arrays of uinstrs. */
+typedef 
+   struct { 
+      Int     used; 
+      Int     size; 
+      UInstr* instrs;
+      Int     nextTemp;
+   }
+   UCodeBlock;
+
+
+/*====================================================================*/
+/*=== Instrumenting UCode                                          ===*/
+/*====================================================================*/
+
+/* A structure for communicating TempReg and RealReg uses of UInstrs. */
+typedef
+   struct {
+      Int   num;
+      Bool  isWrite;
+   }
+   RegUse;
+
+/* Find what this instruction does to its regs.  Tag indicates whether we're
+ * considering TempRegs (pre-reg-alloc) or RealRegs (post-reg-alloc).
+ * Useful for analysis/optimisation passes. */
+extern Int  VG_(getRegUsage) ( UInstr* u, Tag tag, RegUse* arr );
+
+
+/* ------------------------------------------------------------------ */
+/* Used to register helper functions to be called from generated code */
+extern void VG_(register_compact_helper)    ( Addr a );
+extern void VG_(register_noncompact_helper) ( Addr a );
+
+
+/* ------------------------------------------------------------------ */
+/* Virtual register allocation */
+
+/* Get a new virtual register */
+extern Int   VG_(getNewTemp)     ( UCodeBlock* cb );
+
+/* Get a new virtual shadow register */
+extern Int   VG_(getNewShadow)   ( UCodeBlock* cb );
+
+/* Get a virtual register's corresponding virtual shadow register */
+#define SHADOW(tempreg)  ((tempreg)+1)
+
+
+/* ------------------------------------------------------------------ */
+/* Low-level UInstr builders */
+extern void VG_(newNOP)     ( UInstr* u );
+extern void VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz );
+extern void VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                               Tag tag1, UInt val1 );
+extern void VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                              Tag tag1, UInt val1,
+                              Tag tag2, UInt val2 );
+extern void VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
+                              Tag tag1, UInt val1,
+                              Tag tag2, UInt val2,
+                              Tag tag3, UInt val3 );
+extern void VG_(setFlagRW)  ( UInstr* u, 
+                               FlagSet fr, FlagSet fw );
+extern void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 );
+extern void VG_(setCCallFields)  ( UCodeBlock* cb, Addr fn, UChar argc,
+                                   UChar regparms_n, Bool has_ret_val );
+
+extern void VG_(copyUInstr) ( UCodeBlock* cb, UInstr* instr );
+
+extern Bool VG_(anyFlagUse) ( UInstr* u );
+
+/* Refer to `the last instruction stuffed in' (can be lvalue). */
+#define LAST_UINSTR(cb) (cb)->instrs[(cb)->used-1]
+
+
+/* ------------------------------------------------------------------ */
+/* Higher-level UInstr sequence builders */
+extern void VG_(callHelper_0_0) ( UCodeBlock* cb, Addr f);
+extern void VG_(callHelper_1_0) ( UCodeBlock* cb, Addr f, UInt arg1,
+                                  UInt regparms_n);
+extern void VG_(callHelper_2_0) ( UCodeBlock* cb, Addr f, UInt arg1, UInt arg2,
+                                  UInt regparms_n);
+
+/* One way around the 3-arg C function limit is to pass args via global
+ * variables... ugly, but it works. */
+void VG_(set_global_var) ( UCodeBlock* cb, Addr globvar_ptr, UInt val);
+
+/* ------------------------------------------------------------------ */
+/* UCode pretty/ugly printing, to help debugging skins;  but only useful
+   if VG_(needs).extended_UCode == True. */
+
+/* When True, all generated code is/should be printed. */
+extern Bool  VG_(print_codegen);
+
+extern void  VG_(ppUCodeBlock)     ( UCodeBlock* cb, Char* title );
+extern void  VG_(ppUInstr)         ( Int instrNo, UInstr* u );
+extern void  VG_(ppUInstrWithRegs) ( Int instrNo, UInstr* u );
+extern void  VG_(upUInstr)         ( Int instrNo, UInstr* u );
+extern Char* VG_(nameUOpcode)      ( Bool upper, Opcode opc );
+extern void  VG_(ppUOperand)       ( UInstr* u, Int operandNo, 
+                                     Int sz, Bool parens );
+
+/* ------------------------------------------------------------------ */
+/* Allocating/freeing basic blocks of UCode */
+extern UCodeBlock* VG_(allocCodeBlock) ( void );
+extern void  VG_(freeCodeBlock)        ( UCodeBlock* cb );
+
+/*====================================================================*/
+/*=== Functions for generating x86 code from UCode                 ===*/
+/*====================================================================*/
+
+/* These are only of interest for skins where 
+   VG_(needs).extends_UCode == True. */
+
+/* This is the Intel register encoding. */
+#define R_EAX 0
+#define R_ECX 1
+#define R_EDX 2
+#define R_EBX 3
+#define R_ESP 4
+#define R_EBP 5
+#define R_ESI 6
+#define R_EDI 7
+
+#define R_AL (0+R_EAX)
+#define R_CL (0+R_ECX)
+#define R_DL (0+R_EDX)
+#define R_BL (0+R_EBX)
+#define R_AH (4+R_EAX)
+#define R_CH (4+R_ECX)
+#define R_DH (4+R_EDX)
+#define R_BH (4+R_EBX)
+
+/* For pretty printing x86 code */
+extern Char* VG_(nameOfIntReg)   ( Int size, Int reg );
+extern Char  VG_(nameOfIntSize)  ( Int size );
+
+/* Randomly useful things */
+extern UInt  VG_(extend_s_8to32) ( UInt x );
+
+/* Code emitters */
+extern void VG_(emitB)  ( UInt b );
+extern void VG_(emitW)  ( UInt w );
+extern void VG_(emitL)  ( UInt l );
+extern void VG_(newEmit)( void );
+
+/* Finding offsets */
+extern Int  VG_(helper_offset)     ( Addr a );
+extern Int  VG_(shadowRegOffset)   ( Int arch );
+extern Int  VG_(shadowFlagsOffset) ( void );
+
+/* Converting reg ranks <-> Intel register ordering, for using register
+   liveness info */
+extern Int VG_(realRegNumToRank) ( Int realReg );
+extern Int VG_(rankToRealRegNum) ( Int rank    );
+
+/* Subroutine calls */
+/* This one just calls it. */
+void VG_(synth_call) ( Bool ensure_shortform, Int word_offset );
+
+/* This one is good for calling C functions -- saves caller save regs,
+   pushes args, calls, clears the stack, restores caller save regs.
+   `fn' must be registered in the baseBlock first.  Acceptable tags are
+   RealReg and Literal.  
+
+   WARNING:  a UInstr should *not* be translated with synth_ccall followed
+   by some other x86 assembly code;  this will confuse
+   vg_ccall_reg_save_analysis() and everything will fall over.
+*/
+void VG_(synth_ccall) ( Addr fn, Int argc, Int regparms_n, UInt argv[],
+                        Tag tagv[], Int ret_reg, 
+                        RRegSet regs_live_before, RRegSet regs_live_after );
+
+/* Addressing modes */
+void VG_(emit_amode_offregmem_reg) ( Int off, Int regmem, Int reg );
+void VG_(emit_amode_ereg_greg)     ( Int e_reg, Int g_reg );
+
+/* v-size (4, or 2 with OSO) insn emitters */
+void VG_(emit_movv_offregmem_reg) ( Int sz, Int off, Int areg, Int reg );
+void VG_(emit_movv_reg_offregmem) ( Int sz, Int reg, Int off, Int areg );
+void VG_(emit_movv_reg_reg)       ( Int sz, Int reg1, Int reg2 );
+void VG_(emit_nonshiftopv_lit_reg)( Int sz, Opcode opc, UInt lit, Int reg );
+void VG_(emit_shiftopv_lit_reg)   ( Int sz, Opcode opc, UInt lit, Int reg );
+void VG_(emit_nonshiftopv_reg_reg)( Int sz, Opcode opc, Int reg1, Int reg2 );
+void VG_(emit_movv_lit_reg)       ( Int sz, UInt lit, Int reg );
+void VG_(emit_unaryopv_reg)       ( Int sz, Opcode opc, Int reg );
+void VG_(emit_pushv_reg)          ( Int sz, Int reg );
+void VG_(emit_popv_reg)           ( Int sz, Int reg );
+
+void VG_(emit_pushl_lit32)        ( UInt int32 );
+void VG_(emit_pushl_lit8)         ( Int lit8 );
+void VG_(emit_cmpl_zero_reg)      ( Int reg );
+void VG_(emit_swapl_reg_EAX)      ( Int reg );
+void VG_(emit_movv_lit_offregmem) ( Int sz, UInt lit, Int off, Int memreg );
+
+/* b-size (1 byte) instruction emitters */
+void VG_(emit_movb_lit_offregmem) ( UInt lit, Int off, Int memreg );
+void VG_(emit_movb_reg_offregmem) ( Int reg, Int off, Int areg );
+void VG_(emit_unaryopb_reg)       ( Opcode opc, Int reg );
+void VG_(emit_testb_lit_reg)      ( UInt lit, Int reg );
+
+/* zero-extended load emitters */
+void VG_(emit_movzbl_offregmem_reg) ( Int off, Int regmem, Int reg );
+void VG_(emit_movzwl_offregmem_reg) ( Int off, Int areg, Int reg );
+
+/* misc instruction emitters */
+void VG_(emit_call_reg)         ( Int reg );
+void VG_(emit_add_lit_to_esp)   ( Int lit );
+void VG_(emit_jcondshort_delta) ( Condcode cond, Int delta );
+void VG_(emit_pushal)           ( void );
+void VG_(emit_popal)            ( void );
+void VG_(emit_AMD_prefetch_reg) ( Int reg );
+
+
+/*====================================================================*/
+/*=== Execution contexts                                           ===*/
+/*====================================================================*/
+
+/* Generic resolution type used in a few different ways, such as deciding
+   how closely to compare two errors for equality. */
+typedef 
+   enum { Vg_LowRes, Vg_MedRes, Vg_HighRes } 
+   VgRes;
+
+typedef
+   struct _ExeContext
+   ExeContext;
+
+/* Compare two ExeContexts, just comparing the top two callers. */
+extern Bool VG_(eq_ExeContext) ( VgRes res,
+                                 ExeContext* e1, ExeContext* e2 );
+
+/* Print an ExeContext. */
+extern void VG_(pp_ExeContext) ( ExeContext* );
+
+/* Take a snapshot of the client's stack.  Search our collection of
+   ExeContexts to see if we already have it, and if not, allocate a
+   new one.  Either way, return a pointer to the context. */
+extern ExeContext* VG_(get_ExeContext) ( ThreadState *tst );
+
+
+/*====================================================================*/
+/*=== Error reporting                                              ===*/
+/*====================================================================*/
+
+/* ------------------------------------------------------------------ */
+/* Suppressions describe errors which we want to suppress, ie, not 
+   show the user, usually because it is caused by a problem in a library
+   which we can't fix, replace or work around.  Suppressions are read from 
+   a file at startup time, specified by vg_clo_suppressions, and placed in
+   the vg_suppressions list.  This gives flexibility so that new
+   suppressions can be added to the file as and when needed.
+*/
+
+typedef
+   Int         /* Do not make this unsigned! */
+   SuppKind;
+
+/* An extensible (via the 'extra' field) suppression record.  This holds
+   the suppression details of interest to a skin.  Skins can use a normal
+   enum (with element values in the normal range (0..)) for `skind'. 
+
+   If VG_(needs).report_errors==True, for each suppression read in by core
+   SKN_(recognised_suppression)() and SKN_(read_extra_suppression_info) will
+   be called.  The `skind' field is filled in by the value returned in the
+   argument of the first function;  the second function can fill in the
+   `string' and `extra' fields if it wants. 
+*/
+typedef
+   struct {
+      /* What kind of suppression.  Must use the range (0..) */
+      SuppKind skind;
+      /* String -- use is optional.  NULL by default. */
+      Char* string;
+      /* Anything else -- use is optional.  NULL by default. */
+      void* extra;
+   }
+   SkinSupp;
+
+
+/* ------------------------------------------------------------------ */
+/* Error records contain enough info to generate an error report.  The idea
+   is that (typically) the same few points in the program generate thousands
+   of illegal accesses, and we don't want to spew out a fresh error message
+   for each one.  Instead, we use these structures to common up duplicates.
+*/
+
+typedef
+   Int         /* Do not make this unsigned! */
+   ErrorKind;
+
+/* An extensible (via the 'extra' field) error record.  This holds
+   the error details of interest to a skin.  Skins can use a normal
+   enum (with element values in the normal range (0..)) for `ekind'. 
+
+   When errors are found and recorded with VG_(maybe_record_error)(), all
+   the skin must do is pass in the four parameters;  core will
+   allocate/initialise the error record.
+*/
+typedef
+   struct {
+      /* Used by ALL.  Must be in the range (0..) */
+      Int ekind;
+      /* Used frequently */
+      Addr addr;
+      /* Used frequently */
+      Char* string;
+      /* For any skin-specific extras: size and the extra fields */
+      void* extra;
+   }
+   SkinError;
+
+
+/* ------------------------------------------------------------------ */
+/* Call this when an error occurs.  It will be recorded if it's not been
+   seen before.  If it has, the existing error record will have its count
+   incremented.  
+   
+   If the error occurs in generated code, 'tst' should be NULL.  If the
+   error occurs in non-generated code, 'tst' should be non-NULL.  The
+   `extra' field can be stack-allocated;  it will be copied (using
+   SKN_(dup_extra_and_update)()) if needed.  But it won't be copied
+   if it's NULL.
+*/
+extern void VG_(maybe_record_error) ( ThreadState* tst, ErrorKind ekind, 
+                                      Addr a, Char* s, void* extra );
+
+/* Gets a non-blank, non-comment line of at most nBuf chars from fd.
+   Skips leading spaces on the line.  Returns True if EOF was hit instead. 
+   Useful for reading in extra skin-specific suppression lines.
+*/
+extern Bool VG_(getLine) ( Int fd, Char* buf, Int nBuf );
+
+
+/*====================================================================*/
+/*=== Obtaining debug information                                  ===*/
+/*====================================================================*/
+
+/* Get the file/function/line number of the instruction at address 'a'. 
+   For these four, if debug info for the address is found, it copies the
+   info into the buffer/UInt and returns True.  If not, it returns False and
+   nothing is copied.  VG_(get_fnname) always demangles C++ function names.
+*/
+extern Bool VG_(get_filename) ( Addr a, Char* filename, Int n_filename );
+extern Bool VG_(get_fnname)   ( Addr a, Char* fnname,   Int n_fnname   );
+extern Bool VG_(get_linenum)  ( Addr a, UInt* linenum );
+
+/* This one is more efficient if getting both filename and line number,
+   because the two lookups are done together. */
+extern Bool VG_(get_filename_linenum) 
+                              ( Addr a, Char* filename, Int n_filename,
+                                        UInt* linenum );
+
+/* Succeeds only if we find from debug info that 'a' is the address of the
+   first instruction in a function -- as opposed to VG_(get_fnname) which
+   succeeds if we find from debug info that 'a' is the address of any
+   instruction in a function.  Use this to instrument the start of
+   a particular function.  Nb: if a executable/shared object is stripped
+   of its symbols, this function will not be able to recognise function
+   entry points within it. */
+extern Bool VG_(get_fnname_if_entry) ( Addr a, Char* filename, Int n_filename );
+
+/* Succeeds if the address is within a shared object or the main executable.
+   It doesn't matter if debug info is present or not. */
+extern Bool VG_(get_objname)  ( Addr a, Char* objname,  Int n_objname  );
+
+
+/*====================================================================*/
+/*=== Shadow chunks and block-finding                              ===*/
+/*====================================================================*/
+
+typedef
+   enum { 
+      Vg_AllocMalloc = 0,
+      Vg_AllocNew    = 1,
+      Vg_AllocNewVec = 2 
+   }
+   VgAllocKind;
+
+/* Description of a malloc'd chunk.  skin_extra[] part can be used by
+   the skin;  size of array is given by VG_(needs).sizeof_shadow_chunk. */
+typedef 
+   struct _ShadowChunk {
+      struct _ShadowChunk* next;
+      UInt          size : 30;      /* size requested                   */
+      VgAllocKind   allockind : 2;  /* which wrapper did the allocation */
+      Addr          data;           /* ptr to actual block              */
+      UInt          skin_extra[0];  /* extra skin-specific info         */
+   } 
+   ShadowChunk;
+
+/* Use this to free blocks if VG_(needs).alternative_free == True. 
+   It frees the ShadowChunk and the malloc'd block it points to. */
+extern void VG_(freeShadowChunk) ( ShadowChunk* sc );
+
+/* Makes an array of pointers to all the shadow chunks of malloc'd blocks */
+extern ShadowChunk** VG_(get_malloc_shadows) ( /*OUT*/ UInt* n_shadows );
+
+/* Determines if address 'a' is within the bounds of the block at start.
+   Allows a little 'slop' round the edges. */
+extern Bool VG_(addr_is_in_block) ( Addr a, Addr start, UInt size );
+
+/* Searches through currently malloc'd blocks until a matching one is found.
+   Returns NULL if none match.  Extra arguments can be implicitly passed to
+   p using nested functions; see vg_memcheck_errcontext.c for an example. */
+extern ShadowChunk* VG_(any_matching_mallocd_ShadowChunks) 
+                        ( Bool (*p) ( ShadowChunk* ));
+
+/* Searches through all thread's stacks to see if any match.  Returns
+ * VG_INVALID_THREADID if none match. */
+extern ThreadId VG_(any_matching_thread_stack)
+                        ( Bool (*p) ( Addr stack_min, Addr stack_max ));
+
+/*====================================================================*/
+/*=== Skin-specific stuff                                          ===*/
+/*====================================================================*/
+
+/* Skin-specific settings.
+ *
+ * If new fields are added to this type, update:
+ *  - vg_main.c:VG_(needs) initialisation
+ *  - vg_main.c:sanity_check_needs()
+ *
+ * If the name of this type or any of its fields change, update:
+ *  - dependent comments (just search for "VG_(needs)"). 
+ */
+typedef
+   struct {
+      /* name and description used in the startup message */
+      Char* name;
+      Char* description;
+
+      /* Booleans that decide core behaviour */
+
+      /* Want to have errors detected by Valgrind's core reported?  Includes:
+         - pthread API errors (many;  eg. unlocking a non-locked mutex)
+         - silly arguments to malloc() et al (eg. negative size)
+         - invalid file descriptors to blocking syscalls read() and write()
+         - bad signal numbers passed to sigaction()
+         - attempt to install signal handler for SIGKILL or SIGSTOP */  
+      Bool core_errors;
+      /* Want to report errors from the skin?  This implies use of
+         suppressions, too. */
+      Bool skin_errors;
+
+      /* Should __libc_freeres() be run?  Bugs in it crash the skin. */
+      Bool run_libc_freeres;
+
+      /* Booleans that indicate extra operations are defined;  if these are
+         True, the corresponding template functions (given below) must be
+         defined.  A lot like being a member of a type class. */
+
+      /* Is information kept about specific individual basic blocks?  (Eg. for
+         cachesim there are cost-centres for every instruction, stored at a
+         basic block level.)  If so, it sometimes has to be discarded, because
+         .so mmap/munmap-ping or self-modifying code (informed by the
+         DISCARD_TRANSLATIONS user request) can cause one instruction address
+         to store information about more than one instruction in one program
+         run!  */
+      Bool basic_block_discards;
+
+      /* Maintains information about each register? */
+      Bool shadow_regs;
+
+      /* Skin defines its own command line options? */
+      Bool command_line_options;
+      /* Skin defines its own client requests? */
+      Bool client_requests;
+
+      /* Skin defines its own UInstrs? */
+      Bool extended_UCode;
+
+      /* Skin does stuff before and/or after system calls? */
+      Bool syscall_wrapper;
+
+      /* Size, in words, of extra info about malloc'd blocks recorded by
+         skin.  Be careful to get this right or you'll get seg faults! */
+      UInt sizeof_shadow_block;
+
+      /* Skin does free()s itself? */
+      Bool alternative_free;
+
+      /* Are skin-state sanity checks performed? */
+      Bool sanity_checks;
+   } 
+   VgNeeds;
+
+extern VgNeeds VG_(needs);
+
+
+/* ------------------------------------------------------------------ */
+/* Core events to track */
+
+/* Part of the core from which this call was made.  Useful for determining
+ * what kind of error message should be emitted. */
+typedef 
+   enum { Vg_CorePThread, Vg_CoreSignal, Vg_CoreSysCall, Vg_CoreTranslate }
+   CorePart;
+
+/* Events happening in core to track.  To be notified, assign a function
+ * to the function pointer.  To ignore an event, don't do anything
+ * (default assignment is to NULL in which case the call is skipped). */
+typedef
+   struct {
+      /* Memory events */
+      void (*new_mem_startup)( Addr a, UInt len, Bool rr, Bool ww, Bool xx );
+      void (*new_mem_heap)   ( Addr a, UInt len, Bool is_inited );
+      void (*new_mem_stack)  ( Addr a, UInt len );
+      void (*new_mem_stack_aligned) ( Addr a, UInt len );
+      void (*new_mem_stack_signal)  ( Addr a, UInt len );
+      void (*new_mem_brk)    ( Addr a, UInt len );
+      void (*new_mem_mmap)   ( Addr a, UInt len, 
+                               Bool nn, Bool rr, Bool ww, Bool xx );
+
+      void (*copy_mem_heap)  ( Addr from, Addr to, UInt len );
+      void (*copy_mem_remap) ( Addr from, Addr to, UInt len );
+      void (*change_mem_mprotect) ( Addr a, UInt len,  
+                                    Bool nn, Bool rr, Bool ww, Bool xx );
+      
+      void (*ban_mem_heap)   ( Addr a, UInt len );
+      void (*ban_mem_stack)  ( Addr a, UInt len );
+
+      void (*die_mem_heap)   ( Addr a, UInt len );
+      void (*die_mem_stack)  ( Addr a, UInt len );
+      void (*die_mem_stack_aligned) ( Addr a, UInt len );
+      void (*die_mem_stack_signal)  ( Addr a, UInt len );
+      void (*die_mem_brk)    ( Addr a, UInt len );
+      void (*die_mem_munmap) ( Addr a, UInt len );
+
+      void (*bad_free)        ( ThreadState* tst, Addr a );
+      void (*mismatched_free) ( ThreadState* tst, Addr a );
+
+      void (*pre_mem_read)   ( CorePart part, ThreadState* tst,
+                               Char* s, Addr a, UInt size );
+      void (*pre_mem_read_asciiz) ( CorePart part, ThreadState* tst,
+                                    Char* s, Addr a );
+      void (*pre_mem_write)  ( CorePart part, ThreadState* tst,
+                               Char* s, Addr a, UInt size );
+      /* Not implemented yet -- have to add in lots of places, which is a
+         pain.  Won't bother unless/until there's a need. */
+      /* void (*post_mem_read)  ( ThreadState* tst, Char* s, 
+                                  Addr a, UInt size ); */
+      void (*post_mem_write) ( Addr a, UInt size );
+
+
+      /* Scheduler events */
+      void (*thread_run) ( ThreadId tid );
+
+
+      /* Mutex events */
+      void (*post_mutex_lock)   ( ThreadId tid, 
+                                  void* /*pthread_mutex_t* */ mutex );
+      void (*post_mutex_unlock) ( ThreadId tid, 
+                                  void* /*pthread_mutex_t* */ mutex );
+      
+      /* Others... threads, condition variables, etc... */
+
+      /* ... */
+   }
+   VgTrackEvents;
+
+/* Declare the struct instance */
+extern VgTrackEvents VG_(track_events);
+
+
+/* ------------------------------------------------------------------ */
+/* Template functions */
+
+/* These are the parameterised functions in the core.  The default definitions
+ * are replaced by LD_PRELOADing skin substitutes.  At the very least, a skin
+ * must define the fundamental template functions.  Depending on what needs
+ * boolean variables are set, extra templates will be used too.  For each
+ * group, the need governing its use is mentioned. */
+
+
+/* ------------------------------------------------------------------ */
+/* Fundamental template functions */
+
+/* Initialise skin.   Must do the following:
+     - initialise the 'needs' struct
+     - register any helpers called by generated code
+  
+   May do the following:
+     - indicate events to track by initialising part or all of the 'track'
+       struct
+     - register any skin-specific profiling events
+     - any other skin-specific initialisation
+*/
+extern void        SK_(pre_clo_init) ( VgNeeds* needs, VgTrackEvents* track );
+
+/* Do any initialisation that relies on the results of command line option
+   processing. */
+extern void        SK_(post_clo_init)( void );
+
+/* Instrument a basic block.  Must be a true function, ie. the same input
+   always results in the same output, because basic blocks can be
+   retranslated.  Unless you're doing something really strange...
+   'orig_addr' is the address of the first instruction in the block. */
+extern UCodeBlock* SK_(instrument)   ( UCodeBlock* cb, Addr orig_addr );
+
+/* Finish up, print out any results, etc. */
+extern void        SK_(fini)         ( void );
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).report_errors */
+
+/* Identify if two errors are equal, or equal enough.  `res' indicates how
+   close is "close enough".  `res' should be passed on as necessary, eg. if
+   the SkinError's extra field contains an ExeContext, `res' should be
+   passed to VG_(eq_ExeContext)() if the ExeContexts are considered.  Other
+   than that, probably don't worry about it unless you have lots of very
+   similar errors occurring.
+ */
+extern Bool SK_(eq_SkinError) ( VgRes res,
+                                SkinError* e1, SkinError* e2 );
+
+/* Print error context.  The passed function pp_ExeContext() can be (and
+   probably should be) used to print the location of the error. */
+extern void SK_(pp_SkinError) ( SkinError* ec, void (*pp_ExeContext)(void) );
+
+/* Copy the ec->extra part and replace ec->extra with the new copy.  This is
+   necessary to move from a temporary stack copy to a permanent heap one.
+  
+   Then fill in any details that could be postponed until after the decision
+   whether to ignore the error (ie. details not affecting the result of
+   SK_(eq_SkinError)()).  This saves time when errors are ignored.
+  
+   Yuk.
+*/
+extern void SK_(dup_extra_and_update)(SkinError* ec);
+
+/* Return value indicates recognition.  If recognised, type goes in `skind'. */
+extern Bool SK_(recognised_suppression) ( Char* name, SuppKind *skind );
+
+/* Read any extra info for this suppression kind.  For filling up the
+   `string' and `extra' fields in a `SkinSupp' struct if necessary. */
+extern Bool SK_(read_extra_suppression_info) ( Int fd, Char* buf, 
+                                                Int nBuf, SkinSupp *s );
+
+/* This should just check the kinds match and maybe some stuff in the
+   'extra' field if appropriate */
+extern Bool SK_(error_matches_suppression)(SkinError* ec, SkinSupp* su);
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).basic_block_discards */
+
+extern void SK_(discard_basic_block_info) ( Addr a, UInt size );
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).shadow_regs */
+
+/* Valid values for general registers and EFLAGS register, for initialising
+   and updating registers when written in certain places in core. */
+extern void SK_(written_shadow_regs_values) ( UInt* gen_reg, UInt* eflags );
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).command_line_options */
+
+/* Return True if option was recognised */
+extern Bool SK_(process_cmd_line_option)( Char* argv );
+
+/* Print out command line usage for skin options */
+extern Char* SK_(usage)                  ( void );
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).client_requests */
+
+extern UInt SK_(handle_client_request) ( ThreadState* tst, UInt* arg_block );
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).extends_UCode */
+
+/* Used in VG_(getExtRegUsage)() */
+#  define VG_UINSTR_READS_REG(ono)              \
+   { if (mycat(u->tag,ono) == tag)              \
+        { arr[n].num     = mycat(u->val,ono);   \
+          arr[n].isWrite = False;               \
+          n++;                                  \
+        }                                       \
+   }
+#  define VG_UINSTR_WRITES_REG(ono)             \
+   {  if (mycat(u->tag,ono) == tag)             \
+         { arr[n].num     = mycat(u->val,ono);  \
+           arr[n].isWrite = True;               \
+           n++;                                 \
+         }                                      \
+   }
+
+// SSS: only ones using camel caps
+extern Int   SK_(getExtRegUsage) ( UInstr* u, Tag tag, RegUse* arr );
+extern void  SK_(emitExtUInstr)  ( UInstr* u, RRegSet regs_live_before );
+extern Bool  SK_(saneExtUInstr)  ( Bool beforeRA, Bool beforeLiveness,
+                                   UInstr* u );
+extern Char* SK_(nameExtUOpcode) ( Opcode opc );
+extern void  SK_(ppExtUInstr)    ( UInstr* u );
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).syscall_wrapper */
+
+/* If either of the pre_ functions malloc() something to return, the
+ * corresponding post_ function had better free() it! 
+ */ 
+extern void* SK_( pre_syscall) ( ThreadId tid, UInt syscallno,
+                                 Bool is_blocking );
+extern void  SK_(post_syscall) ( ThreadId tid, UInt syscallno,
+                                 void* pre_result, Int res,
+                                 Bool is_blocking );
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).sizeof_shadow_chunk > 0 */
+
+extern void SK_(complete_shadow_chunk) ( ShadowChunk* sc, ThreadState* tst );
+
+
+/* ------------------------------------------------------------------ */
+/* VG_(needs).alternative_free */
+
+extern void SK_(alt_free) ( ShadowChunk* sc, ThreadState* tst );
+
+/* ---------------------------------------------------------------------
+   VG_(needs).sanity_checks */
+
+extern Bool SK_(cheap_sanity_check)     ( void );
+extern Bool SK_(expensive_sanity_check) ( void );
+
+
+#endif   /* NDEF __VG_SKIN_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                vg_skin.h ---*/
+/*--------------------------------------------------------------------*/
+
diff --git a/vg_startup.S b/vg_startup.S
index 63ee590..d6c202e 100644
--- a/vg_startup.S
+++ b/vg_startup.S
@@ -26,7 +26,7 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.
 
-  The GNU General Public License is contained in the file LICENSE.
+  The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_constants.h"
diff --git a/vg_symtab2.c b/vg_symtab2.c
index 8330794..728f228 100644
--- a/vg_symtab2.c
+++ b/vg_symtab2.c
@@ -25,7 +25,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -37,17 +37,12 @@
 /* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from
    dlopen()ed libraries, which is something that KDE3 does a lot.
 
-   Stabs reader greatly improved by Nick Nethercode, Apr 02.
-
-   16 May 02: when notified about munmap, return a Bool indicating
-   whether or not the area being munmapped had executable permissions.
-   This is then used to determine whether or not
-   VG_(invalid_translations) should be called for that area.  In order
-   that this work even if --instrument=no, in this case we still keep
-   track of the mapped executable segments, but do not load any debug
-   info or symbols.
+   Stabs reader greatly improved by Nick Nethercote, Apr 02.
 */
 
+/* Set to True when first debug info search is performed */
+Bool VG_(using_debug_info) = False;
+
 /*------------------------------------------------------------*/
 /*--- Structs n stuff                                      ---*/
 /*------------------------------------------------------------*/
@@ -126,23 +121,14 @@
    SegInfo;
 
 
-/* -- debug helper -- */
-static void ppSegInfo ( SegInfo* si )
-{
-   VG_(printf)("name: %s\n"
-               "start %p, size %d, foffset %d\n",
-               si->filename?si->filename : (UChar*)"NULL",
-               si->start, si->size, si->foffset );
-}
-
 static void freeSegInfo ( SegInfo* si )
 {
    vg_assert(si != NULL);
-   if (si->filename) VG_(free)(VG_AR_SYMTAB, si->filename);
-   if (si->symtab) VG_(free)(VG_AR_SYMTAB, si->symtab);
-   if (si->loctab) VG_(free)(VG_AR_SYMTAB, si->loctab);
-   if (si->strtab) VG_(free)(VG_AR_SYMTAB, si->strtab);
-   VG_(free)(VG_AR_SYMTAB, si);
+   if (si->filename) VG_(arena_free)(VG_AR_SYMTAB, si->filename);
+   if (si->symtab)   VG_(arena_free)(VG_AR_SYMTAB, si->symtab);
+   if (si->loctab)   VG_(arena_free)(VG_AR_SYMTAB, si->loctab);
+   if (si->strtab)   VG_(arena_free)(VG_AR_SYMTAB, si->strtab);
+   VG_(arena_free)(VG_AR_SYMTAB, si);
 }
 
 
@@ -151,23 +137,54 @@
 /*------------------------------------------------------------*/
 
 /* Add a str to the string table, including terminating zero, and
-   return offset of the string in vg_strtab. */
+   return offset of the string in vg_strtab.  Unless it's been seen
+   recently, in which case we find the old index and return that.
+   This avoids the most egregious duplications. */
 
 static __inline__
 Int addStr ( SegInfo* si, Char* str )
 {
+#  define EMPTY    0xffffffff
+#  define NN       5
+   
+   /* prevN[0] has the most recent, prevN[NN-1] the least recent */
+   static UInt     prevN[] = { EMPTY, EMPTY, EMPTY, EMPTY, EMPTY };
+   static SegInfo* curr_si = NULL;
+
    Char* new_tab;
    Int   new_sz, i, space_needed;
-   
+
+   /* Avoid gratuitous duplication:  if we saw `str' within the last NN,
+    * within this segment, return that index.  Saves about 200KB in glibc,
+    * extra time taken is too small to measure.  --NJN 2002-Aug-30 */
+   if (curr_si == si) {
+      for (i = NN-1; i >= 0; i--) {
+         if (EMPTY != prevN[i] &&
+             (0 == VG_(strcmp)(str, &si->strtab[prevN[i]]))) {
+            return prevN[i];
+         }
+      }
+   } else {
+      /* New segment */
+      curr_si = si;
+      for (i = 0; i < 5; i++) prevN[i] = EMPTY;
+   }
+   /* Shuffle prevous ones along, put new one in. */
+   for (i = NN-1; i > 0; i--) prevN[i] = prevN[i-1];
+   prevN[0] = si->strtab_used;
+
+#  undef EMPTY
+
    space_needed = 1 + VG_(strlen)(str);
+
    if (si->strtab_used + space_needed > si->strtab_size) {
       new_sz = 2 * si->strtab_size;
       if (new_sz == 0) new_sz = 5000;
-      new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz);
+      new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz);
       if (si->strtab != NULL) {
          for (i = 0; i < si->strtab_used; i++)
             new_tab[i] = si->strtab[i];
-         VG_(free)(VG_AR_SYMTAB, si->strtab);
+         VG_(arena_free)(VG_AR_SYMTAB, si->strtab);
       }
       si->strtab      = new_tab;
       si->strtab_size = new_sz;
@@ -178,6 +195,7 @@
 
    si->strtab_used += space_needed;
    vg_assert(si->strtab_used <= si->strtab_size);
+
    return si->strtab_used - space_needed;
 }
 
@@ -195,11 +213,11 @@
    if (si->symtab_used == si->symtab_size) {
       new_sz = 2 * si->symtab_size;
       if (new_sz == 0) new_sz = 500;
-      new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
+      new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) );
       if (si->symtab != NULL) {
          for (i = 0; i < si->symtab_used; i++)
             new_tab[i] = si->symtab[i];
-         VG_(free)(VG_AR_SYMTAB, si->symtab);
+         VG_(arena_free)(VG_AR_SYMTAB, si->symtab);
       }
       si->symtab = new_tab;
       si->symtab_size = new_sz;
@@ -224,11 +242,11 @@
    if (si->loctab_used == si->loctab_size) {
       new_sz = 2 * si->loctab_size;
       if (new_sz == 0) new_sz = 500;
-      new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
+      new_tab = VG_(arena_malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) );
       if (si->loctab != NULL) {
          for (i = 0; i < si->loctab_used; i++)
             new_tab[i] = si->loctab[i];
-         VG_(free)(VG_AR_SYMTAB, si->loctab);
+         VG_(arena_free)(VG_AR_SYMTAB, si->loctab);
       }
       si->loctab = new_tab;
       si->loctab_size = new_sz;
@@ -732,8 +750,7 @@
                      next_addr = (UInt)stab[i+1].n_value;
                      break;
 
-                  /* Boring one: skip, look for something more
-                     useful. */
+                  /* Boring one: skip, look for something more useful. */
                   case N_RSYM: case N_LSYM: case N_LBRAC: case N_RBRAC: 
                   case N_STSYM: case N_LCSYM: case N_GSYM:
                      i++;
@@ -1006,10 +1023,10 @@
       ++ state_machine_regs.last_file_entry;
       name = data;
       if (*fnames == NULL)
-        *fnames = VG_(malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
+        *fnames = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
       else
-        *fnames = VG_(realloc)(
-                     VG_AR_SYMTAB, *fnames, 
+        *fnames = VG_(arena_realloc)(
+                     VG_AR_SYMTAB, *fnames, /*alignment*/4,
                      sizeof(UInt) 
                         * (state_machine_regs.last_file_entry + 1));
       (*fnames)[state_machine_regs.last_file_entry] = addStr (si,name);
@@ -1136,9 +1153,9 @@
 		semantics, we need to malloc the first time. */
 
              if (fnames == NULL)
-               fnames = VG_(malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
+               fnames = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2);
              else
-               fnames = VG_(realloc)(VG_AR_SYMTAB, fnames, 
+               fnames = VG_(arena_realloc)(VG_AR_SYMTAB, fnames, /*alignment*/4,
                            sizeof(UInt) 
                               * (state_machine_regs.last_file_entry + 1));
              data += VG_(strlen) ((Char *) data) + 1;
@@ -1281,7 +1298,7 @@
              break;
            }
        }
-      VG_(free)(VG_AR_SYMTAB, fnames);
+      VG_(arena_free)(VG_AR_SYMTAB, fnames);
       fnames = NULL;
     }
 }
@@ -1327,7 +1344,7 @@
    }
    n_oimage = stat_buf.st_size;
 
-   fd = VG_(open_read)(si->filename);
+   fd = VG_(open)(si->filename, VKI_O_RDONLY, 0);
    if (fd == -1) {
       vg_symerr("Can't open .so/.exe to read symbols?!");
       return;
@@ -1650,8 +1667,7 @@
 static SegInfo* segInfo = NULL;
 
 
-static
-void read_symtab_callback ( 
+void VG_(read_symtab_callback) ( 
         Addr start, UInt size, 
         Char rr, Char ww, Char xx, 
         UInt foffset, UChar* filename )
@@ -1686,14 +1702,14 @@
    }
 
    /* Get the record initialised right. */
-   si = VG_(malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
+   si = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
    si->next = segInfo;
    segInfo = si;
 
    si->start    = start;
    si->size     = size;
    si->foffset  = foffset;
-   si->filename = VG_(malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
+   si->filename = VG_(arena_malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
    VG_(strcpy)(si->filename, filename);
 
    si->symtab = NULL;
@@ -1704,15 +1720,12 @@
    si->strtab_size = si->strtab_used = 0;
 
    /* Kludge ... */
-   si->offset 
-      = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
+   si->offset = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start;
 
    /* And actually fill it up. */
-   if (VG_(clo_instrument) || VG_(clo_cachesim)) {
-      vg_read_lib_symbols ( si );
-      canonicaliseSymtab ( si );
-      canonicaliseLoctab ( si );
-   }
+   vg_read_lib_symbols ( si );
+   canonicaliseSymtab ( si );
+   canonicaliseLoctab ( si );
 }
 
 
@@ -1724,57 +1737,29 @@
    libraries as they are dlopen'd.  Conversely, when the client does
    munmap(), vg_symtab_notify_munmap() throws away any symbol tables
    which happen to correspond to the munmap()d area.  */
-void VG_(read_symbols) ( void )
+void VG_(maybe_read_symbols) ( void )
 {
-   VG_(read_procselfmaps) ( read_symtab_callback );
+   if (!VG_(using_debug_info))
+      return;
 
-   /* Do a sanity check on the symbol tables: ensure that the address
-      space pieces they cover do not overlap (otherwise we are severely
-      hosed).  This is a quadratic algorithm, but there shouldn't be
-      many of them.  
-   */
-   { SegInfo *si, *si2;
-     for (si = segInfo; si != NULL; si = si->next) {
-        /* Check no overlap between *si and those in the rest of the
-           list. */
-        for (si2 = si->next; si2 != NULL; si2 = si2->next) {
-           Addr lo = si->start;
-           Addr hi = si->start + si->size - 1;
-           Addr lo2 = si2->start;
-           Addr hi2 = si2->start + si2->size - 1;
-           Bool overlap;
-           vg_assert(lo < hi);
-	   vg_assert(lo2 < hi2);
-           /* the main assertion */
-           overlap = (lo <= lo2 && lo2 <= hi)
-                      || (lo <= hi2 && hi2 <= hi);
-	   if (overlap) {
-              VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" );
-              ppSegInfo ( si );
-              ppSegInfo ( si2 );
-              VG_(printf)("\n\n"); 
-              vg_assert(! overlap);
-	   }
-        }
-     }
-   }    
+   VGP_PUSHCC(VgpReadSyms);
+      VG_(read_procselfmaps) ( VG_(read_symtab_callback) );
+   VGP_POPCC(VgpReadSyms);
 }
 
-
 /* When an munmap() call happens, check to see whether it corresponds
    to a segment for a .so, and if so discard the relevant SegInfo.
    This might not be a very clever idea from the point of view of
    accuracy of error messages, but we need to do it in order to
    maintain the no-overlapping invariant.
-
-   16 May 02: Returns a Bool indicating whether or not the discarded
-   range falls inside a known executable segment.  See comment at top
-   of file for why.
 */
-Bool VG_(symtab_notify_munmap) ( Addr start, UInt length )
+void VG_(maybe_unload_symbols) ( Addr start, UInt length )
 {
    SegInfo *prev, *curr;
 
+   if (!VG_(using_debug_info))
+      return;
+
    prev = NULL;
    curr = segInfo;
    while (True) {
@@ -1784,7 +1769,7 @@
       curr = curr->next;
    }
    if (curr == NULL) 
-      return False;
+      return;
 
    VG_(message)(Vg_UserMsg, 
                 "discard syms in %s due to munmap()", 
@@ -1799,7 +1784,7 @@
    }
 
    freeSegInfo(curr);
-   return True;
+   return;
 }
 
 
@@ -1808,13 +1793,22 @@
 /*--- plausible-looking stack dumps.                       ---*/
 /*------------------------------------------------------------*/
 
+static __inline__ void ensure_debug_info_inited ( void )
+{
+   if (!VG_(using_debug_info)) {
+      VG_(using_debug_info) = True;
+      VG_(maybe_read_symbols)();
+   }
+}
+
 /* Find a symbol-table index containing the specified pointer, or -1
    if not found.  Binary search.  */
 
-static Int search_one_symtab ( SegInfo* si, Addr ptr )
+static Int search_one_symtab ( SegInfo* si, Addr ptr,
+                               Bool match_anywhere_in_fun )
 {
    Addr a_mid_lo, a_mid_hi;
-   Int  mid, 
+   Int  mid, size, 
         lo = 0, 
         hi = si->symtab_used-1;
    while (True) {
@@ -1822,7 +1816,10 @@
       if (lo > hi) return -1; /* not found */
       mid      = (lo + hi) / 2;
       a_mid_lo = si->symtab[mid].addr;
-      a_mid_hi = ((Addr)si->symtab[mid].addr) + si->symtab[mid].size - 1;
+      size = ( match_anywhere_in_fun
+             ? si->symtab[mid].size
+             : 1);
+      a_mid_hi = ((Addr)si->symtab[mid].addr) + size - 1;
 
       if (ptr < a_mid_lo) { hi = mid-1; continue; } 
       if (ptr > a_mid_hi) { lo = mid+1; continue; }
@@ -1836,21 +1833,29 @@
    *psi to the relevant SegInfo, and *symno to the symtab entry number
    within that.  If not found, *psi is set to NULL.  */
 
-static void search_all_symtabs ( Addr ptr, SegInfo** psi, Int* symno )
+static void search_all_symtabs ( Addr ptr, /*OUT*/SegInfo** psi, 
+                                           /*OUT*/Int* symno,
+                                 Bool match_anywhere_in_fun )
 {
    Int      sno;
    SegInfo* si;
+
+   ensure_debug_info_inited();
+   VGP_PUSHCC(VgpSearchSyms);
+   
    for (si = segInfo; si != NULL; si = si->next) {
       if (si->start <= ptr && ptr < si->start+si->size) {
-         sno = search_one_symtab ( si, ptr );
+         sno = search_one_symtab ( si, ptr, match_anywhere_in_fun );
          if (sno == -1) goto not_found;
          *symno = sno;
          *psi = si;
+         VGP_POPCC(VgpSearchSyms);
          return;
       }
    }
   not_found:
    *psi = NULL;
+   VGP_POPCC(VgpSearchSyms);
 }
 
 
@@ -1882,54 +1887,84 @@
    *psi to the relevant SegInfo, and *locno to the loctab entry number
    within that.  If not found, *psi is set to NULL.
 */
-static void search_all_loctabs ( Addr ptr, SegInfo** psi, Int* locno )
+static void search_all_loctabs ( Addr ptr, /*OUT*/SegInfo** psi,
+                                           /*OUT*/Int* locno )
 {
    Int      lno;
    SegInfo* si;
+
+   VGP_PUSHCC(VgpSearchSyms);
+
+   ensure_debug_info_inited();
    for (si = segInfo; si != NULL; si = si->next) {
       if (si->start <= ptr && ptr < si->start+si->size) {
          lno = search_one_loctab ( si, ptr );
          if (lno == -1) goto not_found;
          *locno = lno;
          *psi = si;
+         VGP_POPCC(VgpSearchSyms);
          return;
       }
    }
   not_found:
    *psi = NULL;
+   VGP_POPCC(VgpSearchSyms);
 }
 
 
 /* The whole point of this whole big deal: map a code address to a
    plausible symbol name.  Returns False if no idea; otherwise True.
-   Caller supplies buf and nbuf.  If no_demangle is True, don't do
+   Caller supplies buf and nbuf.  If demangle is False, don't do
    demangling, regardless of vg_clo_demangle -- probably because the
    call has come from vg_what_fn_or_object_is_this. */
-Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a, 
-                            Char* buf, Int nbuf )
+static
+Bool get_fnname ( Bool demangle, Addr a, Char* buf, Int nbuf,
+                  Bool match_anywhere_in_fun )
 {
    SegInfo* si;
    Int      sno;
-   search_all_symtabs ( a, &si, &sno );
+   search_all_symtabs ( a, &si, &sno, match_anywhere_in_fun );
    if (si == NULL) 
       return False;
-   if (no_demangle) {
+   if (demangle) {
+      VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
+   } else {
       VG_(strncpy_safely) 
          ( buf, & si->strtab[si->symtab[sno].nmoff], nbuf );
-   } else {
-      VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf );
    }
    return True;
 }
 
+/* This is available to skins... always demangle C++ names */
+Bool VG_(get_fnname) ( Addr a, Char* buf, Int nbuf )
+{
+   return get_fnname ( /*demangle*/True, a, buf, nbuf,
+                       /*match_anywhere_in_fun*/True );
+}
 
-/* Map a code address to the name of a shared object file.  Returns
-   False if no idea; otherwise False.  Caller supplies buf and
-   nbuf. */
-static
-Bool vg_what_object_is_this ( Addr a, Char* buf, Int nbuf )
+/* This is available to skins... always demangle C++ names,
+   only succeed if 'a' matches first instruction of function. */
+Bool VG_(get_fnname_if_entry) ( Addr a, Char* buf, Int nbuf )
+{
+   return get_fnname ( /*demangle*/True, a, buf, nbuf,
+                       /*match_anywhere_in_fun*/False );
+}
+
+/* This is only available to core... don't demangle C++ names */
+Bool VG_(get_fnname_nodemangle) ( Addr a, Char* buf, Int nbuf )
+{
+   return get_fnname ( /*demangle*/False, a, buf, nbuf,
+                       /*match_anywhere_in_fun*/True );
+}
+
+/* Map a code address to the name of a shared object file or the executable.
+   Returns False if no idea; otherwise True.  Doesn't require debug info.
+   Caller supplies buf and nbuf. */
+Bool VG_(get_objname) ( Addr a, Char* buf, Int nbuf )
 {
    SegInfo* si;
+
+   ensure_debug_info_inited();
    for (si = segInfo; si != NULL; si = si->next) {
       if (si->start <= a && a < si->start+si->size) {
          VG_(strncpy_safely)(buf, si->filename, nbuf);
@@ -1939,27 +1974,39 @@
    return False;
 }
 
-/* Return the name of an erring fn in a way which is useful
-   for comparing against the contents of a suppressions file. 
-   Always writes something to buf.  Also, doesn't demangle the
-   name, because we want to refer to mangled names in the 
-   suppressions file.
-*/
-void VG_(what_obj_and_fun_is_this) ( Addr a,
-                                     Char* obj_buf, Int n_obj_buf,
-                                     Char* fun_buf, Int n_fun_buf )
+
+/* Map a code address to a filename.  Returns True if successful.  */
+Bool VG_(get_filename)( Addr a, Char* filename, Int n_filename )
 {
-   (void)vg_what_object_is_this ( a, obj_buf, n_obj_buf );
-   (void)VG_(what_fn_is_this) ( True, a, fun_buf, n_fun_buf );
+   SegInfo* si;
+   Int      locno;
+   search_all_loctabs ( a, &si, &locno );
+   if (si == NULL) 
+      return False;
+   VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff], 
+                       n_filename);
+   return True;
 }
 
+/* Map a code address to a line number.  Returns True if successful. */
+Bool VG_(get_linenum)( Addr a, UInt* lineno )
+{
+   SegInfo* si;
+   Int      locno;
+   search_all_loctabs ( a, &si, &locno );
+   if (si == NULL) 
+      return False;
+   *lineno = si->loctab[locno].lineno;
+
+   return True;
+}
 
 /* Map a code address to a (filename, line number) pair.  
    Returns True if successful.
 */
-Bool VG_(what_line_is_this)( Addr a, 
-                             UChar* filename, Int n_filename, 
-                             UInt* lineno )
+Bool VG_(get_filename_linenum)( Addr a, 
+                                Char* filename, Int n_filename, 
+                                UInt* lineno )
 {
    SegInfo* si;
    Int      locno;
@@ -2001,11 +2048,13 @@
 
    n = 0;
 
-   know_fnname  = VG_(what_fn_is_this)(False,ec->eips[0], buf_fn, M_VG_ERRTXT);
-   know_objname = vg_what_object_is_this(ec->eips[0], buf_obj, M_VG_ERRTXT);
-   know_srcloc  = VG_(what_line_is_this)(ec->eips[0], 
-                                         buf_srcloc, M_VG_ERRTXT, 
-                                         &lineno);
+   // SSS: factor this repeated code out!
+
+   know_fnname  = VG_(get_fnname) (ec->eips[0], buf_fn,  M_VG_ERRTXT);
+   know_objname = VG_(get_objname)(ec->eips[0], buf_obj, M_VG_ERRTXT);
+   know_srcloc  = VG_(get_filename_linenum)(ec->eips[0], 
+                                            buf_srcloc, M_VG_ERRTXT, 
+                                            &lineno);
 
    APPEND("   at ");
    VG_(sprintf)(ibuf,"0x%x: ", ec->eips[0]);
@@ -2035,11 +2084,11 @@
    VG_(message)(Vg_UserMsg, "%s", buf);
 
    for (i = 1; i < stop_at && ec->eips[i] != 0; i++) {
-      know_fnname  = VG_(what_fn_is_this)(False,ec->eips[i], buf_fn, M_VG_ERRTXT);
-      know_objname = vg_what_object_is_this(ec->eips[i],buf_obj, M_VG_ERRTXT);
-      know_srcloc  = VG_(what_line_is_this)(ec->eips[i], 
-                                          buf_srcloc, M_VG_ERRTXT, 
-                                          &lineno);
+      know_fnname  = VG_(get_fnname) (ec->eips[i], buf_fn,  M_VG_ERRTXT);
+      know_objname = VG_(get_objname)(ec->eips[i], buf_obj, M_VG_ERRTXT);
+      know_srcloc  = VG_(get_filename_linenum)(ec->eips[i], 
+                                               buf_srcloc, M_VG_ERRTXT, 
+                                               &lineno);
       n = 0;
       APPEND("   by ");
       VG_(sprintf)(ibuf,"0x%x: ",ec->eips[i]);
diff --git a/vg_syscall.S b/vg_syscall.S
index adabbed..52d6091 100644
--- a/vg_syscall.S
+++ b/vg_syscall.S
@@ -26,7 +26,7 @@
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.
 
-  The GNU General Public License is contained in the file LICENSE.
+  The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_constants.h"
diff --git a/vg_syscall_mem.c b/vg_syscall_mem.c
deleted file mode 100644
index 580f6af..0000000
--- a/vg_syscall_mem.c
+++ /dev/null
@@ -1,3302 +0,0 @@
-
-/*--------------------------------------------------------------------*/
-/*--- Update the byte permission maps following a system call.     ---*/
-/*---                                             vg_syscall_mem.c ---*/
-/*--------------------------------------------------------------------*/
-
-/*
-   This file is part of Valgrind, an x86 protected-mode emulator 
-   designed for debugging and profiling binaries on x86-Unixes.
-
-   Copyright (C) 2000-2002 Julian Seward 
-      jseward@acm.org
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307, USA.
-
-   The GNU General Public License is contained in the file LICENSE.
-*/
-
-#include "vg_include.h"
-
-/* vg_unsafe.h should NOT be included into any file except this
-   one. */
-#include "vg_unsafe.h"
-
-
-/* All system calls are channelled through vg_wrap_syscall.  It does
-   three things:
-
-   * optionally, checks the permissions for the args to the call
-
-   * perform the syscall, usually by passing it along to the kernel
-     unmodified.  However, because we simulate signals ourselves,
-     signal-related syscalls are routed to vg_signal.c, and are not
-     delivered to the kernel.
-
-   * Update the permission maps following the syscall.
-
-   A magical piece of assembly code, vg_do_syscall(), in vg_syscall.S
-   does the tricky bit of passing a syscall to the kernel, whilst
-   having the simulator retain control.
-*/
-
-static void make_noaccess ( Addr a, UInt len )
-{
-   if (VG_(clo_instrument))
-      VGM_(make_noaccess) ( a, len );
-}
-
-static void make_writable ( Addr a, UInt len )
-{
-   if (VG_(clo_instrument))
-      VGM_(make_writable) ( a, len );
-}
-
-static void make_readable ( Addr a, UInt len )
-{
-   if (VG_(clo_instrument))
-      VGM_(make_readable) ( a, len );
-}
-
-static void make_readwritable ( Addr a, UInt len )
-{
-   if (VG_(clo_instrument))
-      VGM_(make_readwritable) ( a, len );
-}
-
-static
-void must_be_writable ( ThreadState* tst, 
-                        Char* syscall_name, UInt base, UInt size )
-{
-   Bool ok;
-   Addr bad_addr;
-   /* VG_(message)(Vg_DebugMsg,"must be writable: %x .. %x",
-                               base,base+size-1); */
-   if (!VG_(clo_instrument)) 
-      return;
-   ok = VGM_(check_writable) ( base, size, &bad_addr );
-   if (!ok)
-      VG_(record_param_err) ( tst, bad_addr, True, syscall_name );
-}
-
-static
-void must_be_readable ( ThreadState* tst, 
-                        Char* syscall_name, UInt base, UInt size )
-{
-   Bool ok;
-   Addr bad_addr;
-   /* VG_(message)(Vg_DebugMsg,"must be readable: %x .. %x",
-                               base,base+size-1); */
-   if (!VG_(clo_instrument)) 
-      return;
-   ok = VGM_(check_readable) ( base, size, &bad_addr );
-   if (!ok)
-      VG_(record_param_err) ( tst, bad_addr, False, syscall_name );
-}
-
-static
-void must_be_readable_asciiz ( ThreadState* tst, 
-                               Char* syscall_name, UInt str )
-{
-   Bool ok = True;
-   Addr bad_addr;
-   /* VG_(message)(Vg_DebugMsg,"must be readable asciiz: 0x%x",str); */
-   if (!VG_(clo_instrument)) 
-      return;
-   ok = VGM_(check_readable_asciiz) ( (Addr)str, &bad_addr );
-   if (!ok)
-      VG_(record_param_err) ( tst, bad_addr, False, syscall_name );
-}
-
-
-/* Set memory permissions, based on PROT_* values for mmap/mprotect,
-   into the permissions our scheme understands.  Dunno if this is
-   really correct.  */
-
-static void approximate_mmap_permissions ( Addr a, UInt len, UInt prot )
-{
-   /* PROT_READ and PROT_WRITE --> readable
-      PROT_READ only           --> readable
-      PROT_WRITE only          --> writable
-      NEITHER                  --> noaccess
-   */
-   if (prot & PROT_READ)
-      make_readable(a,len);
-   else
-   if (prot & PROT_WRITE)
-      make_writable(a,len);
-   else
-      make_noaccess(a,len);
-}
-
-
-/* Dereference a pointer, but only after checking that it's
-   safe to do so.  If not, return the default.
-*/
-static
-UInt safe_dereference ( Addr aa, UInt defawlt )
-{
-   if (!VG_(clo_instrument)) 
-      return * (UInt*)aa;
-   if (VGM_(check_readable)(aa,4,NULL))
-      return * (UInt*)aa;
-   else
-      return defawlt;
-}
-
-
-/* Is this a Linux kernel error return value? */
-/* From:
-   http://sources.redhat.com/cgi-bin/cvsweb.cgi/libc/sysdeps/unix/sysv/
-   linux/i386/sysdep.h?
-   rev=1.28&content-type=text/x-cvsweb-markup&cvsroot=glibc
-
-   QUOTE:
-
-   Linux uses a negative return value to indicate syscall errors,
-   unlike most Unices, which use the condition codes' carry flag.
-
-   Since version 2.1 the return value of a system call might be
-   negative even if the call succeeded.  E.g., the `lseek' system call
-   might return a large offset.  Therefore we must not anymore test
-   for < 0, but test for a real error by making sure the value in %eax
-   is a real error number.  Linus said he will make sure the no syscall
-   returns a value in -1 .. -4095 as a valid result so we can savely
-   test with -4095.  
-
-   END QUOTE
-*/
-Bool VG_(is_kerror) ( Int res )
-{
-   if (res >= -4095 && res <= -1)
-      return True;
-   else
-      return False;
-}
-
-static
-UInt get_shm_size ( Int shmid )
-{
-   struct shmid_ds buf;
-   long __res;
-    __asm__ volatile ( "int $0x80"
-                       : "=a" (__res)
-                       : "0" (__NR_ipc),
-                         "b" ((long)(24) /*IPCOP_shmctl*/),
-                         "c" ((long)(shmid)),
-                         "d" ((long)(IPC_STAT)),
-                         "S" ((long)(0)),
-                         "D" ((long)(&buf)) );
-    if ( VG_(is_kerror) ( __res ) )
-       return 0;
- 
-   return buf.shm_segsz;
-}
- 
-static
-Char *strdupcat ( const Char *s1, const Char *s2, ArenaId aid )
-{
-   UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
-   Char *result = VG_(malloc) ( aid, len );
-   VG_(strcpy) ( result, s1 );
-   VG_(strcat) ( result, s2 );
-   return result;
-}
-
-static 
-void must_be_readable_sendmsg ( ThreadState* tst, 
-                                Char *msg, UInt base, UInt size )
-{
-   Char *outmsg = strdupcat ( "socketcall.sendmsg", msg, VG_AR_TRANSIENT );
-   must_be_readable ( tst, outmsg, base, size );
-   VG_(free) ( VG_AR_TRANSIENT, outmsg );
-}
-
-static 
-void must_be_writable_recvmsg ( ThreadState* tst, 
-                                Char *msg, UInt base, UInt size )
-{
-   Char *outmsg = strdupcat ( "socketcall.recvmsg", msg, VG_AR_TRANSIENT );
-   must_be_writable ( tst, outmsg, base, size );
-   VG_(free) ( VG_AR_TRANSIENT, outmsg );
-}
-
-static
-void make_readable_recvmsg ( ThreadState* tst,
-                             Char *fieldName, UInt base, UInt size )
-{
-   make_readable( base, size );
-}
- 
-static
-void msghdr_foreachfield ( 
-        ThreadState* tst, 
-        struct msghdr *msg, 
-        void (*foreach_func)( ThreadState*, Char *, UInt, UInt ) 
-     )
-{
-   if ( !msg )
-      return;
-
-   foreach_func ( tst, "(msg)", (Addr)msg, sizeof( struct msghdr ) );
-
-   if ( msg->msg_name )
-      foreach_func ( tst, 
-                     "(msg.msg_name)", 
-                     (Addr)msg->msg_name, msg->msg_namelen );
-
-   if ( msg->msg_iov ) {
-      struct iovec *iov = msg->msg_iov;
-      UInt i;
-
-      foreach_func ( tst, 
-                     "(msg.msg_iov)", 
-                     (Addr)iov, msg->msg_iovlen * sizeof( struct iovec ) );
-
-      for ( i = 0; i < msg->msg_iovlen; ++i, ++iov )
-         foreach_func ( tst, 
-                        "(msg.msg_iov[i]", 
-                        (Addr)iov->iov_base, iov->iov_len );
-   }
-
-   if ( msg->msg_control )
-      foreach_func ( tst, 
-                     "(msg.msg_control)", 
-                     (Addr)msg->msg_control, msg->msg_controllen );
-}
-
-static
-void must_be_readable_sockaddr ( ThreadState* tst,
-                                 Char *description,
-                                 struct sockaddr *sa, UInt salen )
-{
-   Char *outmsg = VG_(malloc) ( VG_AR_TRANSIENT, strlen( description ) + 30 );
-
-   VG_(sprintf) ( outmsg, description, ".sa_family" );
-   must_be_readable( tst, outmsg, (UInt) &sa->sa_family, sizeof (sa_family_t));
-               
-   switch (sa->sa_family) {
-                  
-      case AF_UNIX:
-         VG_(sprintf) ( outmsg, description, ".sun_path" );
-         must_be_readable_asciiz( tst, outmsg,
-            (UInt) ((struct sockaddr_un *) sa)->sun_path);
-         break;
-                     
-      case AF_INET:
-         VG_(sprintf) ( outmsg, description, ".sin_port" );
-         must_be_readable( tst, outmsg,
-            (UInt) &((struct sockaddr_in *) sa)->sin_port,
-            sizeof (((struct sockaddr_in *) sa)->sin_port));
-         VG_(sprintf) ( outmsg, description, ".sin_addr" );
-         must_be_readable( tst, outmsg,
-            (UInt) &((struct sockaddr_in *) sa)->sin_addr,
-            sizeof (struct in_addr));
-         break;
-                           
-      case AF_INET6:
-         VG_(sprintf) ( outmsg, description, ".sin6_port" );
-         must_be_readable( tst, outmsg,
-            (UInt) &((struct sockaddr_in6 *) sa)->sin6_port,
-            sizeof (((struct sockaddr_in6 *) sa)->sin6_port));
-         VG_(sprintf) ( outmsg, description, ".sin6_flowinfo" );
-         must_be_readable( tst, outmsg,
-            (UInt) &((struct sockaddr_in6 *) sa)->sin6_flowinfo,
-            sizeof (uint32_t));
-         VG_(sprintf) ( outmsg, description, ".sin6_addr" );
-         must_be_readable( tst, outmsg,
-            (UInt) &((struct sockaddr_in6 *) sa)->sin6_addr,
-            sizeof (struct in6_addr));
-#        ifndef GLIBC_2_1
-         VG_(sprintf) ( outmsg, description, ".sin6_scope_id" );
-         must_be_readable( tst, outmsg,
-            (UInt) &((struct sockaddr_in6 *) sa)->sin6_scope_id,
-            sizeof (uint32_t));
-#        endif
-         break;
-               
-      default:
-         VG_(sprintf) ( outmsg, description, "" );
-         must_be_readable( tst, outmsg, (UInt) sa, salen );
-         break;
-   }
-   
-   VG_(free) ( VG_AR_TRANSIENT, outmsg );
-}
-
-
-/* Records the current end of the data segment so we can make sense of
-   calls to brk().  Initial value set by hdm_init_memory_audit(). */
-Addr VGM_(curr_dataseg_end);
-
-
-
-/* The Main Entertainment ... */
-
-void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid )
-{
-   ThreadState* tst;
-   Bool         sane_before_call, sane_after_call;
-   UInt         syscallno, arg1, arg2, arg3, arg4, arg5;
-   /* Do not make this unsigned! */
-   Int res;
-
-   VGP_PUSHCC(VgpSyscall);
-
-   vg_assert(VG_(is_valid_tid)(tid));
-   sane_before_call = True;
-   sane_after_call  = True;
-   tst              = & VG_(threads)[tid];
-   syscallno        = tst->m_eax;
-   arg1             = tst->m_ebx;
-   arg2             = tst->m_ecx;
-   arg3             = tst->m_edx;
-   arg4             = tst->m_esi;
-   arg5             = tst->m_edi;
-
-   /* Since buggy syscall wrappers sometimes break this, we may as well 
-      check ourselves. */
-   if (! VG_(first_and_last_secondaries_look_plausible)())
-      sane_before_call = False;
-
-   /* the syscall no is in %eax.  For syscalls with <= 5 args,
-      args 1 .. 5 to the syscall are in %ebx %ecx %edx %esi %edi.
-      For calls with > 5 args, %ebx points to a lump of memory
-      containing the args.
-
-      The result is returned in %eax.  If this value >= 0, the call
-      succeeded, and this is the return value.  If < 0, it failed, and
-      the negation of this value is errno.  To be more specific, 
-      if res is in the range -EMEDIUMTYPE (-124) .. -EPERM (-1)
-      (kernel 2.4.9 sources, include/asm-i386/errno.h)
-      then it indicates an error.  Otherwise it doesn't.
-
-      Dirk Mueller (mueller@kde.org) says that values -4095 .. -1
-      (inclusive?) indicate error returns.  Not sure where the -4095
-      comes from.
-   */
-
-   if (VG_(clo_trace_syscalls))
-      VG_(printf)("SYSCALL[%d,%d](%3d): ", 
-                  VG_(getpid)(), tid, syscallno);
-
-   switch (syscallno) {
-
-      case __NR_exit:
-         VG_(panic)("syscall exit() not caught by the scheduler?!");
-         break;
-
-      case __NR_clone:
-         VG_(unimplemented)
-            ("clone(): not supported by Valgrind.\n   "
-             "We do now support programs linked against\n   "
-             "libpthread.so, though.  Re-run with -v and ensure that\n   "
-             "you are picking up Valgrind's implementation of libpthread.so.");
-         break;
-
-#     if defined(__NR_modify_ldt)
-      case __NR_modify_ldt:
-         VG_(nvidia_moan)();
-         VG_(unimplemented)
-            ("modify_ldt(): I (JRS) haven't investigated this yet; sorry.");
-         break;
-#     endif
-
-      /* !!!!!!!!!! New, untested syscalls !!!!!!!!!!!!!!!!!!!!! */
-
-#     if defined(__NR_getxattr)
-      case __NR_getxattr: /* syscall 229 */
-         /* ssize_t getxattr (const char *path, const char* name,
-                              void* value, size_t size); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getxattr ( %p, %p, %p, %d )\n", 
-                        arg1,arg2,arg3, arg4);
-         must_be_readable_asciiz( tst, "getxattr(path)", arg1 );
-         must_be_readable_asciiz( tst, "getxattr(name)", arg2 );
-         must_be_writable( tst, "getxattr(value)", arg3, arg4 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res > 0 
-                                  && arg3 != (Addr)NULL) {
-            make_readable( arg3, res );
-         }
-         break;
-#     endif
-      
-#     if defined(__NR_quotactl)
-      case __NR_quotactl: /* syscall 131 */
-         /* int quotactl(int cmd, char *special, int uid, caddr_t addr); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("quotactl (0x%x, %p, 0x%x, 0x%x )\n", 
-                        arg1,arg2,arg3, arg4);
-         must_be_readable_asciiz( tst, "quotactl(special)", arg2 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_truncate64)
-      case __NR_truncate64: /* syscall 193 */
-         /* int truncate64(const char *path, off64_t length); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("truncate64 ( %p, %lld )\n",
-                        arg1, ((ULong)arg2) | (((ULong) arg3) << 32));
-         must_be_readable_asciiz( tst, "truncate64(path)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_fdatasync)
-      case __NR_fdatasync: /* syscall 148 */
-         /* int fdatasync(int fd); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("fdatasync ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_msync) /* syscall 144 */
-      case __NR_msync:
-         /* int msync(const void *start, size_t length, int flags); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("msync ( %p, %d, %d )\n", arg1,arg2,arg3);
-         must_be_readable( tst, "msync(start)", arg1, arg2 );
-         KERNEL_DO_SYSCALL(tid,res);  
-         break;
-#     endif
-
-#     if defined(__NR_getpmsg) /* syscall 188 */
-      case __NR_getpmsg: 
-      {
-      /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
-      /* int getpmsg(int fd, struct strbuf *ctrl, struct strbuf *data, 
-                             int *bandp, int *flagsp); */
-      struct strbuf {
-         int     maxlen;         /* no. of bytes in buffer */
-         int     len;            /* no. of bytes returned */
-         caddr_t buf;            /* pointer to data */
-      };
-      struct strbuf *ctrl;
-      struct strbuf *data;
-      if (VG_(clo_trace_syscalls))
-          VG_(printf)("getpmsg ( %d, %p, %p, %p, %p )\n",
-                      arg1,arg2,arg3,arg4,arg5);
-      ctrl = (struct strbuf *)arg2;
-      data = (struct strbuf *)arg3;
-      if (ctrl && ctrl->maxlen > 0)
-          must_be_writable(tst, "getpmsg(ctrl)", 
-                                (UInt)ctrl->buf, ctrl->maxlen);
-      if (data && data->maxlen > 0)
-          must_be_writable(tst, "getpmsg(data)", 
-                                 (UInt)data->buf, data->maxlen);
-      if (arg4)
-          must_be_writable(tst, "getpmsg(bandp)", 
-                                (UInt)arg4, sizeof(int));
-      if (arg5)
-          must_be_writable(tst, "getpmsg(flagsp)", 
-                                (UInt)arg5, sizeof(int));
-      KERNEL_DO_SYSCALL(tid,res);
-      if (!VG_(is_kerror)(res) && res == 0 && ctrl && ctrl->len > 0) {
-         make_readable( (UInt)ctrl->buf, ctrl->len);
-      }
-      if (!VG_(is_kerror)(res) && res == 0 && data && data->len > 0) {
-         make_readable( (UInt)data->buf, data->len);
-      }
-      }
-      break;
-#     endif
-
-
-#     if defined(__NR_putpmsg) /* syscall 189 */
-      case __NR_putpmsg: 
-      {
-      /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
-      /* int putpmsg(int fd, struct strbuf *ctrl, struct strbuf *data, 
-                             int band, int flags); */
-      struct strbuf {
-         int     maxlen;         /* no. of bytes in buffer */
-         int     len;            /* no. of bytes returned */
-         caddr_t buf;            /* pointer to data */
-      };
-      struct strbuf *ctrl;
-      struct strbuf *data;
-      if (VG_(clo_trace_syscalls))
-         VG_(printf)("putpmsg ( %d, %p, %p, %d, %d )\n",
-                     arg1,arg2,arg3,arg4,arg5);
-      ctrl = (struct strbuf *)arg2;
-      data = (struct strbuf *)arg3;
-      if (ctrl && ctrl->len > 0)
-          must_be_readable(tst, "putpmsg(ctrl)",
-                                (UInt)ctrl->buf, ctrl->len);
-      if (data && data->len > 0)
-          must_be_readable(tst, "putpmsg(data)",
-                                (UInt)data->buf, data->len);
-      KERNEL_DO_SYSCALL(tid,res);
-      }
-      break;
-#     endif
-
-      case __NR_getitimer: /* syscall 105 */
-         /* int getitimer(int which, struct itimerval *value); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getitimer ( %d, %p )\n", arg1, arg2);
-         must_be_writable( tst, "getitimer(timer)", arg2, 
-                           sizeof(struct itimerval) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg2 != (Addr)NULL) {
-            make_readable(arg2, sizeof(struct itimerval));
-         }
-         break;
-
-#     if defined(__NR_syslog)
-      case __NR_syslog: /* syscall 103 */
-         /* int syslog(int type, char *bufp, int len); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("syslog (%d, %p, %d)\n",arg1,arg2,arg3);
-         switch(arg1) {
-            case 2: case 3: case 4:
-               must_be_writable( tst, "syslog(buf)", arg2, arg3);
-	       break;
-            default: 
-               break;
-         }
-         KERNEL_DO_SYSCALL(tid, res);
-         if (!VG_(is_kerror)(res)) {
-            switch (arg1) {
-               case 2: case 3: case 4:
-                  make_readable( arg2, arg3 );
-                  break;
-               default:
-                  break;
-            }
-         }
-         break;
-#     endif
-
-      case __NR_personality: /* syscall 136 */
-         /* int personality(unsigned long persona); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("personality ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_chroot: /* syscall 61 */
-         /* int chroot(const char *path); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("chroot ( %p )\n", arg1);
-         must_be_readable_asciiz( tst, "chroot(path)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_madvise)
-      case __NR_madvise: /* syscall 219 */
-         /* int madvise(void *start, size_t length, int advice ); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("madvise ( %p, %d, %d )\n", arg1,arg2,arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_mremap)
-      /* Is this really right?  Perhaps it should copy the permissions
-         from the old area into the new.  Unclear from the Linux man
-         pages what this really does.  Also, the flags don't look like
-         they mean the same as the standard mmap flags, so that's
-         probably wrong too. */
-      case __NR_mremap: /* syscall 163 */
-         /* void* mremap(void * old_address, size_t old_size, 
-                         size_t new_size, unsigned long flags); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("mremap ( %p, %d, %d, 0x%x )\n", 
-                        arg1, arg2, arg3, arg4);
-         must_be_writable ( tst, "mremap(old_address)", arg1, arg2 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res)) {
-            /* Copied from munmap() wrapper. */
-            Bool munmap_exe;
-            Addr start  = arg1;
-            Addr length = arg2;
-            while ((start % VKI_BYTES_PER_PAGE) > 0) { start--; length++; }
-            while (((start+length) % VKI_BYTES_PER_PAGE) > 0) { length++; }
-            make_noaccess( start, length );
-            munmap_exe = VG_(symtab_notify_munmap) ( start, length );
-            if (munmap_exe)
-               VG_(invalidate_translations) ( start, length );
-            approximate_mmap_permissions( (Addr)res, arg3, arg4 );
-         }
-         break;         
-#     endif
-
-      case __NR_nice: /* syscall 34 */
-         /* int nice(int inc); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("nice ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      /* !!!!!!!!!! New, untested syscalls, 14 Mar 02 !!!!!!!!!! */
-
-#     if defined(__NR_setresgid32)
-      case __NR_setresgid32: /* syscall 210 */
-         /* int setresgid(gid_t rgid, gid_t egid, gid_t sgid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setresgid32 ( %d, %d, %d )\n", arg1, arg2, arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setfsuid32)
-      case __NR_setfsuid32: /* syscall 215 */
-         /* int setfsuid(uid_t fsuid); */
-          if (VG_(clo_trace_syscalls))
-             VG_(printf)("setfsuid ( %d )\n", arg1);
-          KERNEL_DO_SYSCALL(tid,res);
-          break;
-#     endif
-
-#     if defined(__NR__sysctl)
-      case __NR__sysctl:
-      /* int _sysctl(struct __sysctl_args *args); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("_sysctl ( %p )\n", arg1 );
-         must_be_writable ( tst, "_sysctl(args)", arg1, 
-                            sizeof(struct __sysctl_args) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            make_readable ( arg1, sizeof(struct __sysctl_args) );
-         break;
-#     endif
-
-#     if defined(__NR_sched_getscheduler)
-      case __NR_sched_getscheduler:
-         /* int sched_getscheduler(pid_t pid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("sched_getscheduler ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_sched_setscheduler)
-      case __NR_sched_setscheduler:
-         /* int sched_setscheduler(pid_t pid, int policy, 
-                const struct sched_param *p); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("sched_setscheduler ( %d, %d, %p )\n",arg1,arg2,arg3);
-         if (arg3 != (UInt)NULL)
-            must_be_readable( tst,
-                              "sched_setscheduler(struct sched_param *p)", 
-                              arg3, sizeof(struct sched_param));
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_mlock)
-      case __NR_mlock:
-         /* int mlock(const void * addr, size_t len) */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("mlock ( %p, %d )\n", arg1, arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_mlockall)
-      case __NR_mlockall:
-         /* int mlockall(int flags); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("mlockall ( %x )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_munlockall)
-      case __NR_munlockall:
-         /* int munlockall(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("munlockall ( )\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#if   defined(__NR_sched_get_priority_max)
-      case __NR_sched_get_priority_max:
-         /* int sched_get_priority_max(int policy); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("sched_get_priority_max ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#if   defined(__NR_sched_get_priority_min)
-      case __NR_sched_get_priority_min: /* syscall 160 */
-         /* int sched_get_priority_min(int policy); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("sched_get_priority_min ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#if   defined(__NR_setpriority)
-      case __NR_setpriority: /* syscall 97 */
-         /* int setpriority(int which, int who, int prio); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setpriority ( %d, %d, %d )\n", arg1, arg2, arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#if   defined(__NR_getpriority)
-      case __NR_getpriority: /* syscall 96 */
-         /* int getpriority(int which, int who); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getpriority ( %d, %d )\n", arg1, arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setfsgid)
-      case __NR_setfsgid: /* syscall 139 */
-         /* int setfsgid(gid_t gid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setfsgid ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setregid)
-      case __NR_setregid: /* syscall 71 */
-         /* int setregid(gid_t rgid, gid_t egid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setregid ( %d, %d )\n", arg1, arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setresuid)
-      case __NR_setresuid: /* syscall 164 */
-         /* int setresuid(uid_t ruid, uid_t euid, uid_t suid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setresuid ( %d, %d, %d )\n", arg1, arg2, arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setfsuid)
-      case __NR_setfsuid: /* syscall 138 */
-         /* int setfsuid(uid_t uid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setfsuid ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      /* !!!!!!!!!! New, untested syscalls, 8 Mar 02 !!!!!!!!!!! */
-
-#     if defined(__NR_sendfile)
-      case __NR_sendfile: /* syscall 187 */
-         /* ssize_t sendfile(int out_fd, int in_fd, off_t *offset, 
-                             size_t count) */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("sendfile ( %d, %d, %p, %d )\n",arg1,arg2,arg3,arg4);
-         if (arg3 != (UInt)NULL)
-            must_be_writable( tst, "sendfile(offset)", arg3, sizeof(off_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg3 != (UInt)NULL) {
-            make_readable( arg3, sizeof( off_t ) );
-         }
-         break;
-#     endif
-
-      /* !!!!!!!!!! New, untested syscalls, 7 Mar 02 !!!!!!!!!!! */
-
-#     if defined(__NR_pwrite)
-      case __NR_pwrite: /* syscall 181 */
-         /* ssize_t pwrite (int fd, const void *buf, size_t nbytes,
-                            off_t offset); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("pwrite ( %d, %p, %d, %d )\n", arg1, arg2, arg3, arg4);
-         must_be_readable( tst, "pwrite(buf)", arg2, arg3 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      /* !!!!!!!!!! New, untested syscalls, 6 Mar 02 !!!!!!!!!!! */
-
-      case __NR_sync: /* syscall 36 */
-         /* int sync(); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("sync ( )\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break; 
- 
-      case __NR_fstatfs: /* syscall 100 */
-         /* int fstatfs(int fd, struct statfs *buf); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("fstatfs ( %d, %p )\n",arg1,arg2);
-         must_be_writable( tst, "stat(buf)", arg2, sizeof(struct statfs) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            make_readable( arg2, sizeof(struct statfs) );
-         break;
-
-      /* !!!!!!!!!! New, untested syscalls, 4 Mar 02 !!!!!!!!!!! */
-
-      case __NR_pause: /* syscall 29 */
-         /* int pause(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("pause ( )\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_getsid: /* syscall 147 */
-         /* pid_t getsid(pid_t pid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getsid ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_pread)
-      case __NR_pread: /* syscall 180 */
-         /* ssize_t pread(int fd, void *buf, size_t count, off_t offset); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("pread ( %d, %p, %d, %d ) ...\n",arg1,arg2,arg3,arg4);
-         must_be_writable( tst, "pread(buf)", arg2, arg3 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("SYSCALL[%d]       pread ( %d, %p, %d, %d ) --> %d\n",
-                        VG_(getpid)(),
-                        arg1, arg2, arg3, arg4, res);
-         if (!VG_(is_kerror)(res) && res > 0) {
-            make_readable( arg2, res );
-         }
-         break;
-#     endif
-
-      /* !!!!!!!!!! New, untested syscalls, 27 Feb 02 !!!!!!!!!! */
-
-      case __NR_mknod: /* syscall 14 */
-         /* int mknod(const char *pathname, mode_t mode, dev_t dev); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("mknod ( %p, 0x%x, 0x%x )\n", arg1, arg2, arg3 );
-         must_be_readable_asciiz( tst, "mknod(pathname)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_flock: /* syscall 143 */
-         /* int flock(int fd, int operation); */
-         if (VG_(clo_trace_syscalls)) 
-            VG_(printf)("flock ( %d, %d )\n", arg1, arg2 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_rt_sigsuspend)
-      /* Viewed with great suspicion by me, but, hey, let's do it
-         anyway ... */
-      case __NR_rt_sigsuspend: /* syscall 179 */
-         /* int sigsuspend(const sigset_t *mask); */
-         if (VG_(clo_trace_syscalls)) 
-            VG_(printf)("sigsuspend ( %p )\n", arg1 );
-         if (arg1 != (Addr)NULL) {
-            /* above NULL test is paranoia */
-            must_be_readable( tst, "sigsuspend(mask)", arg1, 
-                              sizeof(vki_ksigset_t) );
-         }
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      case __NR_init_module: /* syscall 128 */
-         /* int init_module(const char *name, struct module *image); */
-         if (VG_(clo_trace_syscalls)) 
-            VG_(printf)("init_module ( %p, %p )\n", arg1, arg2 );
-         must_be_readable_asciiz( tst, "init_module(name)", arg1 );
-         must_be_readable( tst, "init_module(image)", arg2, 
-                           VKI_SIZEOF_STRUCT_MODULE );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_ioperm: /* syscall 101 */
-         /* int ioperm(unsigned long from, unsigned long num, int turn_on); */
-         if (VG_(clo_trace_syscalls)) 
-            VG_(printf)("ioperm ( %d, %d, %d )\n", arg1, arg2, arg3 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_capget: /* syscall 184 */
-         /* int capget(cap_user_header_t header, cap_user_data_t data); */
-         if (VG_(clo_trace_syscalls)) 
-            VG_(printf)("capget ( %p, %p )\n", arg1, arg2 );
-         must_be_readable( tst, "capget(header)", arg1, 
-                                             sizeof(vki_cap_user_header_t) );
-         must_be_writable( tst, "capget(data)", arg2, 
-                                           sizeof( vki_cap_user_data_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg2 != (Addr)NULL)
-            make_readable ( arg2, sizeof( vki_cap_user_data_t) );
-         break;
-
-      /* !!!!!!!!!!!!!!!!!!!!! mutant ones !!!!!!!!!!!!!!!!!!!!! */
-
-      case __NR_execve:
-         /* int execve (const char *filename, 
-                        char *const argv [], 
-                        char *const envp[]); */
-         if (VG_(clo_trace_syscalls)) 
-            VG_(printf)("execve ( %p(%s), %p, %p ) --- NOT CHECKED\n", 
-                        arg1, arg1, arg2, arg3);
-         /* Resistance is futile.  Nuke all other threads.  POSIX
-            mandates this. */
-            VG_(nuke_all_threads_except)( tid );
-         /* Make any binding for LD_PRELOAD disappear, so that child
-            processes don't get traced into. */
-         if (!VG_(clo_trace_children)) {
-            Int i;
-            Char** envp = (Char**)arg3;
-            Char*  ld_preload_str = NULL;
-            Char*  ld_library_path_str = NULL;
-            for (i = 0; envp[i] != NULL; i++) {
-               if (VG_(strncmp)(envp[i], "LD_PRELOAD=", 11) == 0)
-                  ld_preload_str = &envp[i][11];
-               if (VG_(strncmp)(envp[i], "LD_LIBRARY_PATH=", 16) == 0)
-                  ld_library_path_str = &envp[i][16];
-            }
-            VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH)(
-	       ld_preload_str, ld_library_path_str );
-         }
-         KERNEL_DO_SYSCALL(tid,res);
-         /* Should we still be alive here?  Don't think so. */
-         /* Actually, above comment is wrong.  execve can fail, just
-            like any other syscall -- typically the file to exec does
-            not exist.  Hence: */
-         vg_assert(VG_(is_kerror)(res));
-         break;
-
-      /* !!!!!!!!!!!!!!!!!!!!!     end     !!!!!!!!!!!!!!!!!!!!! */
-
-      case __NR_access: /* syscall 33 */
-         /* int access(const char *pathname, int mode); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("access ( %p, %d )\n", arg1,arg2);
-         must_be_readable_asciiz( tst, "access(pathname)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_alarm: /* syscall 27 */
-         /* unsigned int alarm(unsigned int seconds); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("alarm ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_brk: /* syscall 45 */
-         /* Haven't a clue if this is really right. */
-         /* int brk(void *end_data_segment); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("brk ( %p ) --> ",arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         if (VG_(clo_trace_syscalls)) 
-            VG_(printf)("0x%x\n", res);
-
-         if (!VG_(is_kerror)(res)) {
-            if (arg1 == 0) {
-               /* Just asking where the current end is. (???) */
-               VGM_(curr_dataseg_end) = res;
-            } else
-            if (arg1 < VGM_(curr_dataseg_end)) {
-               /* shrinking the data segment. */
-               make_noaccess( (Addr)arg1, 
-                              VGM_(curr_dataseg_end)-arg1 );
-               VGM_(curr_dataseg_end) = arg1;
-            } else
-            if (arg1 > VGM_(curr_dataseg_end) && res != 0) {
-               /* asked for more memory, and got it */
-               /* 
-               VG_(printf)("BRK: new area %x .. %x\n", 
-                           VGM_(curr_dataseg_end, arg1-1 );
-               */
-               make_writable ( (Addr)VGM_(curr_dataseg_end), 
-                               arg1-VGM_(curr_dataseg_end) );
-               VGM_(curr_dataseg_end) = arg1;         
-            }
-         }
-         break;
-
-      case __NR_chdir: /* syscall 12 */
-         /* int chdir(const char *path); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("chdir ( %p )\n", arg1);
-         must_be_readable_asciiz( tst, "chdir(path)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_chmod: /* syscall 15 */
-         /* int chmod(const char *path, mode_t mode); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("chmod ( %p, %d )\n", arg1,arg2);
-         must_be_readable_asciiz( tst, "chmod(path)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_chown32)
-      case __NR_chown32: /* syscall 212 */
-#     endif
-#     if defined(__NR_lchown32)
-      case __NR_lchown32: /* syscall 198 */
-#     endif
-      case __NR_chown: /* syscall 16 */
-         /* int chown(const char *path, uid_t owner, gid_t group); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("chown ( %p, 0x%x, 0x%x )\n", arg1,arg2,arg3);
-         must_be_readable_asciiz( tst, "chown(path)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_close: /* syscall 6 */
-         /* int close(int fd); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("close ( %d )\n",arg1);
-         /* Detect and negate attempts by the client to close Valgrind's
-            logfile fd ... */
-         if (arg1 == VG_(clo_logfile_fd)) {
-            VG_(message)(Vg_UserMsg, 
-              "Warning: client attempted to close "
-               "Valgrind's logfile fd (%d).", 
-               VG_(clo_logfile_fd));
-            VG_(message)(Vg_UserMsg, 
-              "   Use --logfile-fd=<number> to select an "
-              "alternative logfile fd." );
-         } else {
-            KERNEL_DO_SYSCALL(tid,res);
-         }
-         break;
-
-      case __NR_dup: /* syscall 41 */
-         /* int dup(int oldfd); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("dup ( %d ) --> ", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("%d\n", res);
-         break;
-
-      case __NR_dup2: /* syscall 63 */
-         /* int dup2(int oldfd, int newfd); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("dup2 ( %d, %d ) ...\n", arg1,arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("SYSCALL[%d]       dup2 ( %d, %d ) = %d\n", 
-                        VG_(getpid)(), 
-                        arg1, arg2, res);
-         break;
-
-      case __NR_fcntl: /* syscall 55 */
-         /* int fcntl(int fd, int cmd, int arg); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("fcntl ( %d, %d, %d )\n",arg1,arg2,arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_fchdir: /* syscall 133 */
-         /* int fchdir(int fd); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("fchdir ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_fchown32)
-      case __NR_fchown32: /* syscall 207 */
-#     endif
-      case __NR_fchown: /* syscall 95 */
-         /* int fchown(int filedes, uid_t owner, gid_t group); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("fchown ( %d, %d, %d )\n", arg1,arg2,arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_fchmod: /* syscall 94 */
-         /* int fchmod(int fildes, mode_t mode); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("fchmod ( %d, %d )\n", arg1,arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_fcntl64)
-      case __NR_fcntl64: /* syscall 221 */
-         /* I don't know what the prototype for this is supposed to be. */
-         /* ??? int fcntl(int fd, int cmd); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("fcntl64 (?!) ( %d, %d )\n", arg1,arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      case __NR_fstat: /* syscall 108 */
-         /* int fstat(int filedes, struct stat *buf); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("fstat ( %d, %p )\n",arg1,arg2);
-         must_be_writable( tst, "fstat", arg2, sizeof(struct stat) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            make_readable( arg2, sizeof(struct stat) );
-         break;
-
-      case __NR_vfork: /* syscall 190 */
-         /* pid_t vfork(void); */
-         if (VG_(clo_trace_syscalls)) 
-            VG_(printf)("vfork ( ) ... becomes ... ");
-         /* KLUDGE: we prefer to do a fork rather than vfork. 
-            vfork gives a SIGSEGV, and the stated semantics looks
-            pretty much impossible for us. */
-         tst->m_eax = __NR_fork;
-         /* fall through ... */
-      case __NR_fork: /* syscall 2 */
-         /* pid_t fork(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("fork ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         if (res == 0) {
-            /* I am the child.  Nuke all other threads which I might
-               have inherited from my parent.  POSIX mandates this. */
-            VG_(nuke_all_threads_except)( tid );
-         }
-         break;
-
-      case __NR_fsync: /* syscall 118 */
-         /* int fsync(int fd); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("fsync ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_ftruncate: /* syscall 93 */
-         /* int ftruncate(int fd, size_t length); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("ftruncate ( %d, %d )\n", arg1,arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_ftruncate64)
-      case __NR_ftruncate64: /* syscall 194 */
-         /* int ftruncate64(int fd, off64_t length); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("ftruncate64 ( %d, %lld )\n", 
-                        arg1,arg2|((long long) arg3 << 32));
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      case __NR_getdents: /* syscall 141 */
-         /* int getdents(unsigned int fd, struct dirent *dirp, 
-                         unsigned int count); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getdents ( %d, %p, %d )\n",arg1,arg2,arg3);
-         must_be_writable( tst, "getdents(dirp)", arg2, arg3 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res > 0)
-            make_readable( arg2, res );
-         break;
-
-#     if defined(__NR_getdents64)
-      case __NR_getdents64: /* syscall 220 */
-         /* int getdents(unsigned int fd, struct dirent64 *dirp, 
-                         unsigned int count); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getdents64 ( %d, %p, %d )\n",arg1,arg2,arg3);
-         must_be_writable( tst, "getdents64(dirp)", arg2, arg3 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res > 0)
-            make_readable( arg2, res );
-         break;
-#     endif
-
-#     if defined(__NR_getgroups32)
-      case __NR_getgroups32: /* syscall 205 */
-#     endif
-      case __NR_getgroups: /* syscall 80 */
-         /* int getgroups(int size, gid_t list[]); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getgroups ( %d, %p )\n", arg1, arg2);
-         if (arg1 > 0)
-            must_be_writable ( tst, "getgroups(list)", arg2, 
-                               arg1 * sizeof(gid_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (arg1 > 0 && !VG_(is_kerror)(res) && res > 0)
-            make_readable ( arg2, res * sizeof(gid_t) );
-         break;
-
-      case __NR_getcwd: /* syscall 183 */
-         /* char *getcwd(char *buf, size_t size); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getcwd ( %p, %d )\n",arg1,arg2);
-         must_be_writable( tst, "getcwd(buf)", arg1, arg2 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res != (Addr)NULL)
-            make_readable ( arg1, arg2 );
-         /* Not really right -- really we should have the asciiz
-            string starting at arg1 readable, or up to arg2 bytes,
-            whichever finishes first. */
-         break;
-
-      case __NR_geteuid: /* syscall 49 */
-         /* uid_t geteuid(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("geteuid ( )\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_geteuid32)
-      case __NR_geteuid32: /* syscall 201 */
-         /* ?? uid_t geteuid32(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("geteuid32(?) ( )\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      case __NR_getegid: /* syscall 50 */
-         /* gid_t getegid(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getegid ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_getegid32)
-      case __NR_getegid32: /* syscall 202 */
-         /* gid_t getegid32(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getegid32 ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      case __NR_getgid: /* syscall 47 */
-         /* gid_t getgid(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getgid ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_getgid32)
-      case __NR_getgid32: /* syscall 200 */
-         /* gid_t getgid32(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getgid32 ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      case __NR_getpid: /* syscall 20 */
-         /* pid_t getpid(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getpid ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_getpgid: /* syscall 132 */
-         /* pid_t getpgid(pid_t pid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getpgid ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_getpgrp: /* syscall 65 */
-         /* pid_t getpprp(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getpgrp ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_getppid: /* syscall 64 */
-         /* pid_t getppid(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getppid ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_getresgid: /* syscall 171 */
-         /* int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getresgid ( %p, %p, %p )\n", arg1,arg2,arg3);
-         must_be_writable ( tst, "getresgid(rgid)", arg1, sizeof(gid_t) );
-         must_be_writable ( tst, "getresgid(egid)", arg2, sizeof(gid_t) );
-         must_be_writable ( tst, "getresgid(sgid)", arg3, sizeof(gid_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0) {
-            make_readable ( arg1, sizeof(gid_t) );
-            make_readable ( arg2, sizeof(gid_t) );
-            make_readable ( arg3, sizeof(gid_t) );
-         }
-         break;
-
-#     if defined(__NR_getresgid32)
-      case __NR_getresgid32: /* syscall 211 */
-         /* int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getresgid32 ( %p, %p, %p )\n", arg1,arg2,arg3);
-         must_be_writable ( tst, "getresgid32(rgid)", arg1, sizeof(gid_t) );
-         must_be_writable ( tst, "getresgid32(egid)", arg2, sizeof(gid_t) );
-         must_be_writable ( tst, "getresgid32(sgid)", arg3, sizeof(gid_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0) {
-            make_readable ( arg1, sizeof(gid_t) );
-            make_readable ( arg2, sizeof(gid_t) );
-            make_readable ( arg3, sizeof(gid_t) );
-         }
-         break;
-#     endif
-
-      case __NR_getresuid: /* syscall 165 */
-         /* int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getresuid ( %p, %p, %p )\n", arg1,arg2,arg3);
-         must_be_writable ( tst, "getresuid(ruid)", arg1, sizeof(uid_t) );
-         must_be_writable ( tst, "getresuid(euid)", arg2, sizeof(uid_t) );
-         must_be_writable ( tst, "getresuid(suid)", arg3, sizeof(uid_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0) {
-            make_readable ( arg1, sizeof(uid_t) );
-            make_readable ( arg2, sizeof(uid_t) );
-            make_readable ( arg3, sizeof(uid_t) );
-         }
-         break;
-
-#     if defined(__NR_getresuid32)
-      case __NR_getresuid32: /* syscall 209 */
-         /* int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getresuid32 ( %p, %p, %p )\n", arg1,arg2,arg3);
-         must_be_writable ( tst, "getresuid32(ruid)", arg1, sizeof(uid_t) );
-         must_be_writable ( tst, "getresuid32(euid)", arg2, sizeof(uid_t) );
-         must_be_writable ( tst, "getresuid32(suid)", arg3, sizeof(uid_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0) {
-            make_readable ( arg1, sizeof(uid_t) );
-            make_readable ( arg2, sizeof(uid_t) );
-            make_readable ( arg3, sizeof(uid_t) );
-         }
-         break;
-#     endif
-
-#     if defined(__NR_ugetrlimit)
-      case __NR_ugetrlimit: /* syscall 191 */
-#     endif
-      case __NR_getrlimit: /* syscall 76 */
-         /* int getrlimit (int resource, struct rlimit *rlim); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getrlimit ( %d, %p )\n", arg1,arg2);
-         must_be_writable( tst, "getrlimit(rlim)", arg2, 
-                           sizeof(struct rlimit) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0)
-            make_readable( arg2, sizeof(struct rlimit) );
-         break;
-
-      case __NR_getrusage: /* syscall 77 */
-         /* int getrusage (int who, struct rusage *usage); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getrusage ( %d, %p )\n", arg1,arg2);
-         must_be_writable( tst, "getrusage(usage)", arg2, 
-                           sizeof(struct rusage) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0)
-            make_readable(arg2, sizeof(struct rusage) );
-         break;
-
-      case __NR_gettimeofday: /* syscall 78 */
-         /* int gettimeofday(struct timeval *tv, struct timezone *tz); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("gettimeofday ( %p, %p )\n",arg1,arg2);
-         must_be_writable( tst, "gettimeofday(tv)", arg1, 
-                           sizeof(struct timeval) );
-         if (arg2 != 0)
-            must_be_writable( tst, "gettimeofday(tz)", arg2, 
-                              sizeof(struct timezone) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0) {
-            make_readable( arg1, sizeof(struct timeval) );
-            if (arg2 != 0)
-               make_readable( arg2, sizeof(struct timezone) );
-         }
-         break;
-
-      case __NR_getuid: /* syscall 24 */
-         /* uid_t getuid(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getuid ( )\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_getuid32)
-      case __NR_getuid32: /* syscall 199 */
-         /* ???uid_t getuid32(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("getuid32 ( )\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-      case __NR_ipc: /* syscall 117 */
-         /* int ipc ( unsigned int call, int first, int second, 
-                      int third, void *ptr, long fifth); */
-         {
-         UInt arg6 = tst->m_ebp;
-
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("ipc ( %d, %d, %d, %d, %p, %d )\n",
-                        arg1,arg2,arg3,arg4,arg5,arg6);
-         switch (arg1 /* call */) {
-            case 1: /* IPCOP_semop */
-               must_be_readable ( tst, "semop(sops)", arg5, 
-                                  arg3 * sizeof(struct sembuf) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case 2: /* IPCOP_semget */
-            case 3: /* IPCOP_semctl */
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case 11: /* IPCOP_msgsnd */
-               {
-                  struct msgbuf *msgp = (struct msgbuf *)arg5;
-                  Int msgsz = arg3;
-
-                  must_be_readable ( tst, "msgsnd(msgp->mtype)", 
-                                     (UInt)&msgp->mtype, sizeof(msgp->mtype) );
-                  must_be_readable ( tst, "msgsnd(msgp->mtext)", 
-                                     (UInt)msgp->mtext, msgsz );
-
-                  KERNEL_DO_SYSCALL(tid,res);
-                  break;
-               }
-            case 12: /* IPCOP_msgrcv */
-               {
-                  struct msgbuf *msgp;
-                  Int msgsz = arg3;
- 
-                  msgp = (struct msgbuf *)safe_dereference( 
-                            (Addr) (&((struct ipc_kludge *)arg5)->msgp), 0 );
-
-                  must_be_writable ( tst, "msgrcv(msgp->mtype)", 
-                                     (UInt)&msgp->mtype, sizeof(msgp->mtype) );
-                  must_be_writable ( tst, "msgrcv(msgp->mtext)", 
-                                     (UInt)msgp->mtext, msgsz );
-
-                  KERNEL_DO_SYSCALL(tid,res);
-
-                  if ( !VG_(is_kerror)(res) && res > 0 ) {
-                     make_readable ( (UInt)&msgp->mtype, sizeof(msgp->mtype) );
-                     make_readable ( (UInt)msgp->mtext, res );
-                  }
-                  break;
-               }
-            case 13: /* IPCOP_msgget */
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case 14: /* IPCOP_msgctl */
-               {
-                  switch (arg3 /* cmd */) {
-                     case IPC_STAT:
-                        must_be_writable ( tst, "msgctl(buf)", arg5, 
-                                           sizeof(struct msqid_ds) );
-                        KERNEL_DO_SYSCALL(tid,res);
-                        if ( !VG_(is_kerror)(res) && res > 0 ) {
-                           make_readable ( arg5, sizeof(struct msqid_ds) );
-                        }
-                        break;
-                     case IPC_SET:
-                        must_be_readable ( tst, "msgctl(buf)", arg5, 
-                                           sizeof(struct msqid_ds) );
-                        KERNEL_DO_SYSCALL(tid,res);
-                        break;
-#                    if defined(IPC_64)
-                     case IPC_STAT|IPC_64:
-                        must_be_writable ( tst, "msgctl(buf)", arg5, 
-                                           sizeof(struct msqid64_ds) );
-                        KERNEL_DO_SYSCALL(tid,res);
-                        if ( !VG_(is_kerror)(res) && res > 0 ) {
-                           make_readable ( arg5, sizeof(struct msqid64_ds) );
-                        }
-                        break;
-#                    endif
-#                    if defined(IPC_64)
-                     case IPC_SET|IPC_64:
-                        must_be_readable ( tst, "msgctl(buf)", arg5, 
-                                           sizeof(struct msqid64_ds) );
-                        KERNEL_DO_SYSCALL(tid,res);
-                        break;
-#                    endif
-                     default:
-                        KERNEL_DO_SYSCALL(tid,res);
-                        break;
-                  }
-                  break;
-               }
-            case 21: /* IPCOP_shmat */
-               {
-                  Int shmid = arg2;
-                  Int shmflag = arg3;
-                  UInt addr;
-
-                  KERNEL_DO_SYSCALL(tid,res);
-
-                  if ( VG_(is_kerror) ( res ) )
-                     break;
-                  
-                  /* force readability. before the syscall it is
-                   * indeed uninitialized, as can be seen in
-                   * glibc/sysdeps/unix/sysv/linux/shmat.c */
-                  make_readable ( arg4, sizeof( ULong ) );
-
-                  addr = safe_dereference ( arg4, 0 );
-                  if ( addr > 0 ) { 
-                     UInt segmentSize = get_shm_size ( shmid );
-                     if ( segmentSize > 0 ) {
-                        if ( shmflag & SHM_RDONLY )
-                           make_readable ( addr, segmentSize );
-                        else
-                           make_readwritable ( addr, segmentSize );
-                     }
-                  }
-                  break;
-               }
-            case 22: /* IPCOP_shmdt */
-                  KERNEL_DO_SYSCALL(tid,res);
-                  /* ### FIXME: this should call make_noaccess on the
-                   * area passed to shmdt. But there's no way to
-                   * figure out the size of the shared memory segment
-                   * just from the address...  Maybe we want to keep a
-                   * copy of the exiting mappings inside valgrind? */
-                  break;
-            case 23: /* IPCOP_shmget */
-                KERNEL_DO_SYSCALL(tid,res);
-                break;
-            case 24: /* IPCOP_shmctl */
-	      /* Subject: shmctl: The True Story
-                    Date: Thu, 9 May 2002 18:07:23 +0100 (BST)
-                    From: Reuben Thomas <rrt@mupsych.org>
-                      To: Julian Seward <jseward@acm.org>
-
-                 1. As you suggested, the syscall subop is in arg1.
-
-                 2. There are a couple more twists, so the arg order
-                    is actually:
-
-                 arg1 syscall subop
-                 arg2 file desc
-                 arg3 shm operation code (can have IPC_64 set)
-                 arg4 0 ??? is arg3-arg4 a 64-bit quantity when IPC_64
-                        is defined?
-                 arg5 pointer to buffer
-
-                 3. With this in mind, I've amended the case as below:
-	      */
-               {
-                  UInt cmd = arg3;
-                  Bool out_arg = False;
-                  if ( arg5 ) {
-#                    if defined(IPC_64)
-                     cmd = cmd & (~IPC_64);
-#                    endif
-                     out_arg = cmd == SHM_STAT || cmd == IPC_STAT;
-                     if ( out_arg )
-                        must_be_writable( tst, 
-                           "shmctl(SHM_STAT or IPC_STAT,buf)", 
-                           arg5, sizeof(struct shmid_ds) );
-                     else
-                        must_be_readable( tst, 
-                           "shmctl(SHM_XXXX,buf)", 
-                           arg5, sizeof(struct shmid_ds) );
-                  }
-                  KERNEL_DO_SYSCALL(tid,res);
-                  if ( arg5 && !VG_(is_kerror)(res) && res == 0 && out_arg )
-                          make_readable( arg5, sizeof(struct shmid_ds) );
-               }
-               break;
-            default:
-               VG_(message)(Vg_DebugMsg,
-                            "FATAL: unhandled syscall(ipc) %d",
-                            arg1 );
-               VG_(panic)("... bye!\n");
-               break; /*NOTREACHED*/
-         }
-         }
-         break;
-
-      case __NR_ioctl: /* syscall 54 */
-         /* int ioctl(int d, int request, ...)
-            [The  "third"  argument  is traditionally char *argp, 
-             and will be so named for this discussion.]
-         */
-         /*
-         VG_(message)(
-            Vg_DebugMsg, 
-            "is an IOCTL,  request = 0x%x,   d = %d,   argp = 0x%x", 
-            arg2,arg1,arg3);
-         */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("ioctl ( %d, 0x%x, %p )\n",arg1,arg2,arg3);
-         switch (arg2 /* request */) {
-            case TCSETS:
-            case TCSETSW:
-            case TCSETSF:
-               must_be_readable( tst, "ioctl(TCSET{S,SW,SF})", arg3, 
-                                 VKI_SIZEOF_STRUCT_TERMIOS );
-               KERNEL_DO_SYSCALL(tid,res);
-               break; 
-            case TCGETS:
-               must_be_writable( tst, "ioctl(TCGETS)", arg3, 
-                                 VKI_SIZEOF_STRUCT_TERMIOS );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable ( arg3, VKI_SIZEOF_STRUCT_TERMIOS );
-               break;
-            case TCSETA:
-            case TCSETAW:
-            case TCSETAF:
-               must_be_readable( tst, "ioctl(TCSET{A,AW,AF})", arg3,
-                                 VKI_SIZEOF_STRUCT_TERMIO );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case TCGETA:
-               must_be_writable( tst, "ioctl(TCGETA)", arg3,
-                                 VKI_SIZEOF_STRUCT_TERMIO );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable ( arg3, VKI_SIZEOF_STRUCT_TERMIO );
-               break;
-            case TCSBRK:
-            case TCXONC:
-            case TCSBRKP:
-            case TCFLSH:
-               /* These just take an int by value */
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case TIOCGWINSZ:
-               must_be_writable( tst, "ioctl(TIOCGWINSZ)", arg3, 
-                                 sizeof(struct winsize) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable ( arg3, sizeof(struct winsize) );
-               break;
-            case TIOCSWINSZ:
-               must_be_readable( tst, "ioctl(TIOCSWINSZ)", arg3, 
-                                 sizeof(struct winsize) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case TIOCGPGRP:
-               /* Get process group ID for foreground processing group. */
-               must_be_writable( tst, "ioctl(TIOCGPGRP)", arg3,
-                                 sizeof(pid_t) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable ( arg3, sizeof(pid_t) );
-               break;
-            case TIOCSPGRP:
-               /* Set a process group ID? */
-               must_be_writable( tst, "ioctl(TIOCGPGRP)", arg3,
-                                 sizeof(pid_t) );
-               KERNEL_DO_SYSCALL(tid,res); 
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable ( arg3, sizeof(pid_t) );
-               break;
-            case TIOCGPTN: /* Get Pty Number (of pty-mux device) */
-               must_be_writable(tst, "ioctl(TIOCGPTN)", arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                   make_readable ( arg3, sizeof(int));
-               break;
-            case TIOCSCTTY:
-               /* Just takes an int value.  */
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case TIOCSPTLCK: /* Lock/unlock Pty */
-               must_be_readable( tst, "ioctl(TIOCSPTLCK)", arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case FIONBIO:
-               must_be_readable( tst, "ioctl(FIONBIO)", arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case FIOASYNC:
-               must_be_readable( tst, "ioctl(FIOASYNC)", arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case FIONREAD:
-               must_be_writable( tst, "ioctl(FIONREAD)", arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable( arg3, sizeof(int) );
-               break;
-
-            /* If you get compilation problems here, change the #if
-               1 to #if 0 and get rid of <scsi/sg.h> in
-               vg_unsafe.h. */
-#       if 1
-            case SG_SET_COMMAND_Q:
-               must_be_readable( tst, "ioctl(SG_SET_COMMAND_Q)", 
-                                 arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-#           if defined(SG_IO)
-            case SG_IO:
-               must_be_writable( tst, "ioctl(SG_IO)", arg3, 
-                                 sizeof(struct sg_io_hdr) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable (arg3, sizeof(struct sg_io_hdr));
-               break;
-#           endif /* SG_IO */
-            case SG_GET_SCSI_ID:
-               /* Note: sometimes sg_scsi_id is called sg_scsi_id_t */
-               must_be_writable( tst, "ioctl(SG_GET_SCSI_ID)", arg3, 
-                                 sizeof(struct sg_scsi_id) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable (arg3, sizeof(struct sg_scsi_id));
-               break;
-            case SG_SET_RESERVED_SIZE:
-               must_be_readable( tst, "ioctl(SG_SET_RESERVED_SIZE)", 
-                                 arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case SG_SET_TIMEOUT:
-               must_be_readable( tst, "ioctl(SG_SET_TIMEOUT)", arg3, 
-                                 sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case SG_GET_RESERVED_SIZE:
-               must_be_writable( tst, "ioctl(SG_GET_RESERVED_SIZE)", arg3, 
-                                 sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable (arg3, sizeof(int));
-               break;
-            case SG_GET_TIMEOUT:
-               must_be_writable( tst, "ioctl(SG_GET_TIMEOUT)", arg3, 
-                                 sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable (arg3, sizeof(int));
-               break;
-            case SG_GET_VERSION_NUM:
-               must_be_readable( tst, "ioctl(SG_GET_VERSION_NUM)", 
-                                 arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-#       endif
-
-            case IIOCGETCPS:
-               /* In early 2.4 kernels, ISDN_MAX_CHANNELS was only defined
-                * when KERNEL was. I never saw a larger value than 64 though */
-#              ifndef ISDN_MAX_CHANNELS
-#              define ISDN_MAX_CHANNELS 64
-#              endif
-               must_be_writable( tst, "ioctl(IIOCGETCPS)", arg3,
-                                 ISDN_MAX_CHANNELS 
-                                 * 2 * sizeof(unsigned long) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable ( arg3, ISDN_MAX_CHANNELS 
-                                        * 2 * sizeof(unsigned long) );
-               break;
-            case IIOCNETGPN:
-               must_be_readable( tst, "ioctl(IIOCNETGPN)",
-                                 (UInt)&((isdn_net_ioctl_phone *)arg3)->name,
-                                 sizeof(((isdn_net_ioctl_phone *)arg3)->name) );
-               must_be_writable( tst, "ioctl(IIOCNETGPN)", arg3,
-                                 sizeof(isdn_net_ioctl_phone) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable ( arg3, sizeof(isdn_net_ioctl_phone) );
-               break;
-
-            /* These all use struct ifreq AFAIK */
-            case SIOCGIFINDEX:
-            case SIOCGIFFLAGS:        /* get flags                    */
-            case SIOCGIFHWADDR:       /* Get hardware address         */
-            case SIOCGIFMTU:          /* get MTU size                 */
-            case SIOCGIFADDR:         /* get PA address               */
-            case SIOCGIFNETMASK:      /* get network PA mask          */
-            case SIOCGIFMETRIC:       /* get metric                   */
-            case SIOCGIFMAP:          /* Get device parameters        */
-            case SIOCGIFTXQLEN:       /* Get the tx queue length      */
-            case SIOCGIFDSTADDR:      /* get remote PA address        */
-            case SIOCGIFBRDADDR:      /* get broadcast PA address     */
-            case SIOCGIFNAME:         /* get iface name               */
-               must_be_writable(tst, "ioctl(SIOCGIFINDEX)", arg3, 
-                                sizeof(struct ifreq));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable (arg3, sizeof(struct ifreq));
-               break;
-            case SIOCGIFCONF:         /* get iface list               */
-               /* WAS:
-               must_be_writable("ioctl(SIOCGIFCONF)", arg3, 
-                                sizeof(struct ifconf));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable (arg3, sizeof(struct ifconf));
-               */
-               must_be_readable(tst, "ioctl(SIOCGIFCONF)", arg3, 
-                                sizeof(struct ifconf));
-               if ( arg3 ) {
-                  // TODO len must be readable and writable
-                  // buf pointer only needs to be readable
-                  struct ifconf *ifc = (struct ifconf *) arg3;
-                  must_be_writable(tst, "ioctl(SIOCGIFCONF).ifc_buf",
-                                   (Addr)(ifc->ifc_buf), (UInt)(ifc->ifc_len) );
-               }
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0 && arg3 ) {
-                  struct ifconf *ifc = (struct ifconf *) arg3;
-                  if (ifc->ifc_buf != NULL)
-                     make_readable ( (Addr)(ifc->ifc_buf), 
-                                     (UInt)(ifc->ifc_len) );
-               }
-               break;
-            case SIOCGSTAMP:
-               must_be_writable(tst, "ioctl(SIOCGSTAMP)", arg3, 
-                                sizeof(struct timeval));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable (arg3, sizeof(struct timeval));
-               break;
-            case SIOCGRARP:           /* get RARP table entry         */
-            case SIOCGARP:            /* get ARP table entry          */
-               must_be_writable(tst, "ioctl(SIOCGARP)", arg3, 
-                                sizeof(struct arpreq));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable (arg3, sizeof(struct arpreq));
-               break;
-                    
-            case SIOCSIFFLAGS:        /* set flags                    */
-            case SIOCSIFMAP:          /* Set device parameters        */
-            case SIOCSIFTXQLEN:       /* Set the tx queue length      */
-            case SIOCSIFDSTADDR:      /* set remote PA address        */
-            case SIOCSIFBRDADDR:      /* set broadcast PA address     */
-            case SIOCSIFNETMASK:      /* set network PA mask          */
-            case SIOCSIFMETRIC:       /* set metric                   */
-            case SIOCSIFADDR:         /* set PA address               */
-            case SIOCSIFMTU:          /* set MTU size                 */
-            case SIOCSIFHWADDR:       /* set hardware address         */
-               must_be_readable(tst,"ioctl(SIOCSIFFLAGS)", arg3, 
-                                sizeof(struct ifreq));
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            /* Routing table calls.  */
-            case SIOCADDRT:           /* add routing table entry      */
-            case SIOCDELRT:           /* delete routing table entry   */
-               must_be_readable(tst,"ioctl(SIOCADDRT/DELRT)", arg3, 
-                                sizeof(struct rtentry));
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            /* RARP cache control calls. */
-            case SIOCDRARP:           /* delete RARP table entry      */
-            case SIOCSRARP:           /* set RARP table entry         */
-            /* ARP cache control calls. */
-            case SIOCSARP:            /* set ARP table entry          */
-            case SIOCDARP:            /* delete ARP table entry       */
-               must_be_readable(tst, "ioctl(SIOCSIFFLAGS)", arg3, 
-                                sizeof(struct ifreq));
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SIOCSPGRP:
-               must_be_readable( tst, "ioctl(SIOCSPGRP)", arg3, sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            /* linux/soundcard interface (OSS) */
-            case SNDCTL_SEQ_GETOUTCOUNT:
-            case SNDCTL_SEQ_GETINCOUNT:
-            case SNDCTL_SEQ_PERCMODE:
-            case SNDCTL_SEQ_TESTMIDI:
-            case SNDCTL_SEQ_RESETSAMPLES:
-            case SNDCTL_SEQ_NRSYNTHS:
-            case SNDCTL_SEQ_NRMIDIS:
-            case SNDCTL_SEQ_GETTIME:
-            case SNDCTL_DSP_GETFMTS:
-            case SNDCTL_DSP_GETTRIGGER:
-            case SNDCTL_DSP_GETODELAY:
-#           if defined(SNDCTL_DSP_GETSPDIF)
-            case SNDCTL_DSP_GETSPDIF:
-#           endif
-            case SNDCTL_DSP_GETCAPS:
-            case SOUND_PCM_READ_RATE:
-            case SOUND_PCM_READ_CHANNELS:
-            case SOUND_PCM_READ_BITS:
-            case (SOUND_PCM_READ_BITS|0x40000000): /* what the fuck ? */
-            case SOUND_PCM_READ_FILTER:
-               must_be_writable(tst,"ioctl(SNDCTL_XXX|SOUND_XXX (SIOR, int))", 
-                                arg3,
-                                sizeof(int));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable (arg3, sizeof(int));
-               break;
-            case SNDCTL_SEQ_CTRLRATE:
-            case SNDCTL_DSP_SPEED:
-            case SNDCTL_DSP_STEREO:
-            case SNDCTL_DSP_GETBLKSIZE: 
-            case SNDCTL_DSP_CHANNELS:
-            case SOUND_PCM_WRITE_FILTER:
-            case SNDCTL_DSP_SUBDIVIDE:
-            case SNDCTL_DSP_SETFRAGMENT:
-#           if defined(SNDCTL_DSP_GETCHANNELMASK)
-            case SNDCTL_DSP_GETCHANNELMASK:
-#           endif
-#           if defined(SNDCTL_DSP_BIND_CHANNEL)
-            case SNDCTL_DSP_BIND_CHANNEL:
-#           endif
-            case SNDCTL_TMR_TIMEBASE:
-            case SNDCTL_TMR_TEMPO:
-            case SNDCTL_TMR_SOURCE:
-            case SNDCTL_MIDI_PRETIME:
-            case SNDCTL_MIDI_MPUMODE:
-               must_be_readable(tst, "ioctl(SNDCTL_XXX|SOUND_XXX "
-                                     "(SIOWR, int))", 
-                                arg3, sizeof(int));
-               must_be_writable(tst, "ioctl(SNDCTL_XXX|SOUND_XXX "
-                                     "(SIOWR, int))", 
-                                arg3, sizeof(int));
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case SNDCTL_DSP_GETOSPACE:
-            case SNDCTL_DSP_GETISPACE:
-               must_be_writable(tst, 
-                                "ioctl(SNDCTL_XXX|SOUND_XXX "
-                                "(SIOR, audio_buf_info))", arg3,
-                                sizeof(audio_buf_info));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable (arg3, sizeof(audio_buf_info));
-               break;
-            case SNDCTL_DSP_SETTRIGGER:
-               must_be_readable(tst, "ioctl(SNDCTL_XXX|SOUND_XXX (SIOW, int))", 
-                                arg3, sizeof(int));
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            /* Real Time Clock (/dev/rtc) ioctls */
-#           ifndef GLIBC_2_1
-            case RTC_UIE_ON:
-            case RTC_UIE_OFF:
-            case RTC_AIE_ON:
-            case RTC_AIE_OFF:
-            case RTC_PIE_ON:
-            case RTC_PIE_OFF:
-            case RTC_IRQP_SET:
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case RTC_RD_TIME:
-            case RTC_ALM_READ:
-               must_be_writable(tst, "ioctl(RTC_RD_TIME/ALM_READ)", arg3,
-                                sizeof(struct rtc_time));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror) && res == 0)
-                  make_readable(arg3, sizeof(struct rtc_time));
-               break;
-            case RTC_ALM_SET:
-               must_be_readable(tst, "ioctl(RTC_ALM_SET)", arg3,
-                                sizeof(struct rtc_time));
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-            case RTC_IRQP_READ:
-               must_be_writable(tst, "ioctl(RTC_IRQP_READ)", arg3,
-                                sizeof(unsigned long));
-               KERNEL_DO_SYSCALL(tid,res);
-               if(!VG_(is_kerror) && res == 0)
-                   make_readable(arg3, sizeof(unsigned long));
-               break;
-#           endif /* GLIBC_2_1 */
-
-#           ifdef BLKGETSIZE
-            case BLKGETSIZE:
-               must_be_writable(tst, "ioctl(BLKGETSIZE)", arg3,
-                                sizeof(unsigned long));
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res == 0)
-                  make_readable (arg3, sizeof(unsigned long));
-               break;
-#           endif /* BLKGETSIZE */
-
-            /* CD ROM stuff (??)  */
-            case CDROMSUBCHNL:
-                must_be_readable(tst, "ioctl(CDROMSUBCHNL (cdsc_format, char))",
-                   (int) &(((struct cdrom_subchnl *) arg3)->cdsc_format), 
-                   sizeof(((struct cdrom_subchnl *) arg3)->cdsc_format));
-                must_be_writable(tst, "ioctl(CDROMSUBCHNL)", arg3, 
-                   sizeof(struct cdrom_subchnl));
-                KERNEL_DO_SYSCALL(tid,res);
-                if (!VG_(is_kerror)(res) && res == 0)
-                   make_readable (arg3, sizeof(struct cdrom_subchnl));
-                break;
-            case CDROMREADTOCHDR:
-                must_be_writable(tst, "ioctl(CDROMREADTOCHDR)", arg3, 
-                   sizeof(struct cdrom_tochdr));
-                KERNEL_DO_SYSCALL(tid,res);
-                if (!VG_(is_kerror)(res) && res == 0)
-                   make_readable (arg3, sizeof(struct cdrom_tochdr));
-                break;
-            case CDROMREADTOCENTRY:
-                 must_be_readable(tst, "ioctl(CDROMREADTOCENTRY (cdte_format, char))",
-                    (int) &(((struct cdrom_tocentry *) arg3)->cdte_format), 
-                    sizeof(((struct cdrom_tocentry *) arg3)->cdte_format));
-                 must_be_readable(tst, "ioctl(CDROMREADTOCENTRY (cdte_track, char))",
-                    (int) &(((struct cdrom_tocentry *) arg3)->cdte_track), 
-                    sizeof(((struct cdrom_tocentry *) arg3)->cdte_track));
-                 must_be_writable(tst, "ioctl(CDROMREADTOCENTRY)", arg3, 
-                    sizeof(struct cdrom_tocentry));
-                 KERNEL_DO_SYSCALL(tid,res);
-                 if (!VG_(is_kerror)(res) && res == 0)
-                    make_readable (arg3, sizeof(struct cdrom_tochdr));
-                 break;
-            case CDROMPLAYMSF:
-                 must_be_readable(tst, "ioctl(CDROMPLAYMSF)", arg3, 
-                    sizeof(struct cdrom_msf));
-                 KERNEL_DO_SYSCALL(tid,res);
-                 break;
-            /* We don't have any specific information on it, so
-               try to do something reasonable based on direction and
-               size bits.  The encoding scheme is described in
-               /usr/include/asm/ioctl.h.  
-
-               According to Simon Hausmann, _IOC_READ means the kernel
-               writes a value to the ioctl value passed from the user
-               space and the other way around with _IOC_WRITE. */
-            default: {
-               UInt dir  = _IOC_DIR(arg2);
-               UInt size = _IOC_SIZE(arg2);
-               if (/* size == 0 || */ dir == _IOC_NONE) {
-                  VG_(message)(Vg_UserMsg, 
-                     "Warning: noted but unhandled ioctl 0x%x"
-                     " with no size/direction hints",
-                     arg2); 
-                  VG_(message)(Vg_UserMsg, 
-                     "   This could cause spurious value errors"
-                     " to appear.");
-                  VG_(message)(Vg_UserMsg, 
-                     "   See README_MISSING_SYSCALL_OR_IOCTL for guidance on"
-                     " writing a proper wrapper." );
-               } else {
-                  if ((dir & _IOC_WRITE) && size > 0)
-                     must_be_readable(tst, "ioctl(generic)", arg3, size);
-                  if ((dir & _IOC_READ) && size > 0)
-                     must_be_writable(tst, "ioctl(generic)", arg3, size);
-               }
-               KERNEL_DO_SYSCALL(tid,res);
-               if (size > 0 && (dir & _IOC_READ)
-                   && !VG_(is_kerror)(res) && res == 0
-                   && arg3 != (Addr)NULL)
-                  make_readable (arg3, size);
-               break;
-            }
-         }
-         break;
-
-      case __NR_kill: /* syscall 37 */
-         /* int kill(pid_t pid, int sig); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("kill ( %d, %d )\n", arg1,arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_link: /* syscall 9 */
-         /* int link(const char *oldpath, const char *newpath); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("link ( %p, %p)\n", arg1, arg2);
-         must_be_readable_asciiz( tst, "link(oldpath)", arg1);
-         must_be_readable_asciiz( tst, "link(newpath)", arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_lseek: /* syscall 19 */
-         /* off_t lseek(int fildes, off_t offset, int whence); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("lseek ( %d, %d, %d )\n",arg1,arg2,arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR__llseek: /* syscall 140 */
-         /* int _llseek(unsigned int fd, unsigned long offset_high,       
-                        unsigned long  offset_low, 
-                        loff_t * result, unsigned int whence); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("llseek ( %d, 0x%x, 0x%x, %p, %d )\n",
-                        arg1,arg2,arg3,arg4,arg5);
-         must_be_writable( tst, "llseek(result)", arg4, sizeof(loff_t));
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0)
-            make_readable( arg4, sizeof(loff_t) );
-         break;
-
-      case __NR_lstat: /* syscall 107 */
-         /* int lstat(const char *file_name, struct stat *buf); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("lstat ( %p, %p )\n",arg1,arg2);
-         must_be_readable_asciiz( tst, "lstat(file_name)", arg1 );
-         must_be_writable( tst, "lstat(buf)", arg2, sizeof(struct stat) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0) {
-            make_readable( arg2, sizeof(struct stat) );
-         }
-         break;
-
-#     if defined(__NR_lstat64)
-      case __NR_lstat64: /* syscall 196 */
-         /* int lstat64(const char *file_name, struct stat64 *buf); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("lstat64 ( %p, %p )\n",arg1,arg2);
-         must_be_readable_asciiz( tst, "lstat64(file_name)", arg1 );
-         must_be_writable( tst, "lstat64(buf)", arg2, sizeof(struct stat64) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res == 0) {
-            make_readable( arg2, sizeof(struct stat64) );
-         }
-         break;
-#     endif
-
-      case __NR_mkdir: /* syscall 39 */
-         /* int mkdir(const char *pathname, mode_t mode); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("mkdir ( %p, %d )\n", arg1,arg2);
-         must_be_readable_asciiz( tst, "mkdir(pathname)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_mmap2)
-      case __NR_mmap2: /* syscall 192 */
-         /* My impression is that this is exactly like __NR_mmap 
-            except that all 6 args are passed in regs, rather than in 
-            a memory-block. */
-         /* void* mmap(void *start, size_t length, int prot, 
-                       int flags, int fd, off_t offset); 
-         */
-         {
-         UInt arg6 = tst->m_ebp;
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("mmap2 ( %p, %d, %d, %d, %d, %d )\n",
-                        arg1, arg2, arg3, arg4, arg5, arg6 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            approximate_mmap_permissions( (Addr)res, arg2, arg3 );
-         if (!VG_(is_kerror)(res)
-             && (arg3 & PROT_EXEC)) {
-            /* The client mmap'ed a segment with executable
-               permissions.  Tell the symbol-table loader, so that it
-               has an opportunity to pick up more symbols if this mmap
-               was caused by the client loading a new .so via
-               dlopen().  This is important for debugging KDE. */
-            VG_(read_symbols)();
-         }
-         }
-         break;
-#     endif
-
-      case __NR_mmap: /* syscall 90 */
-         /* void* mmap(void *start, size_t length, int prot, 
-                       int flags, int fd, off_t offset); 
-         */
-         {
-         Bool arg_block_readable
-                 = VG_(clo_instrument)
-                 ? VGM_(check_readable)(arg1, 6*sizeof(UInt), NULL)
-                 : True;
-         must_be_readable( tst, "mmap(args)", arg1, 6*sizeof(UInt) );
-         if (arg_block_readable) {
-            UInt* arg_block = (UInt*)arg1;
-            UInt arg6;
-            arg1 = arg_block[0];
-            arg2 = arg_block[1];
-            arg3 = arg_block[2];
-            arg4 = arg_block[3];
-            arg5 = arg_block[4];
-            arg6 = arg_block[5];
-            if (VG_(clo_trace_syscalls))
-               VG_(printf)("mmap ( %p, %d, %d, %d, %d, %d )\n",
-                           arg1, arg2, arg3, arg4, arg5, arg6 );
-         }
-         KERNEL_DO_SYSCALL(tid,res);
-         if (arg_block_readable && !VG_(is_kerror)(res))
-            approximate_mmap_permissions( (Addr)res, arg2, arg3 );
-         if (arg_block_readable && !VG_(is_kerror)(res)
-             && (arg3 & PROT_EXEC)) {
-            /* The client mmap'ed a segment with executable
-               permissions.  Tell the symbol-table loader, so that it
-               has an opportunity to pick up more symbols if this mmap
-               was caused by the client loading a new .so via
-               dlopen().  This is important for debugging KDE. */
-            VG_(read_symbols)();
-         }
-         }
-         
-         break;
-
-      case __NR_mprotect: /* syscall 125 */
-         /* int mprotect(const void *addr, size_t len, int prot); */
-         /* should addr .. addr+len-1 be checked before the call? */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("mprotect ( %p, %d, %d )\n", arg1,arg2,arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            approximate_mmap_permissions ( arg1, arg2, arg3 );
-         break;
-
-      case __NR_munmap: /* syscall 91 */
-         /* int munmap(void *start, size_t length); */
-         /* should start .. start+length-1 be checked before the call? */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("munmap ( %p, %d )\n", arg1,arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res)) {
-            /* Mash around start and length so that the area passed to
-               make_noaccess() exactly covers an integral number of
-               pages.  If we don't do that, our idea of addressible
-               memory diverges from that of the kernel's, which causes
-               the leak detector to crash. */
-            Bool munmap_exe;
-            Addr start = arg1;
-            Addr length = arg2;
-            while ((start % VKI_BYTES_PER_PAGE) > 0) { start--; length++; }
-            while (((start+length) % VKI_BYTES_PER_PAGE) > 0) { length++; }
-            /*
-            VG_(printf)("MUNMAP: correct (%p for %d) to (%p for %d) %s\n", 
-               arg1, arg2, start, length, (arg1!=start || arg2!=length) 
-                                             ? "CHANGE" : "");
-            */
-            make_noaccess( start, length );
-            /* Tell our symbol table machinery about this, so that if
-               this happens to be a .so being unloaded, the relevant
-               symbols are removed too. */
-            munmap_exe = VG_(symtab_notify_munmap) ( start, length );
-            if (munmap_exe)
-               VG_(invalidate_translations) ( start, length );
-         }
-         break;
-
-      case __NR_nanosleep: /* syscall 162 */
-         /* int nanosleep(const struct timespec *req, struct timespec *rem); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("nanosleep ( %p, %p )\n", arg1,arg2);
-         must_be_readable ( tst, "nanosleep(req)", arg1, 
-                                              sizeof(struct timespec) );
-         if (arg2 != (UInt)NULL)
-            must_be_writable ( tst, "nanosleep(rem)", arg2, 
-                               sizeof(struct timespec) );
-         KERNEL_DO_SYSCALL(tid,res);
-         /* Somewhat bogus ... is only written by the kernel if
-            res == -1 && errno == EINTR. */
-         if (!VG_(is_kerror)(res) && arg2 != (UInt)NULL)
-            make_readable ( arg2, sizeof(struct timespec) );
-         break;
-
-      case __NR__newselect: /* syscall 142 */
-         /* int select(int n,  
-                       fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 
-                       struct timeval *timeout);
-         */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("newselect ( %d, %p, %p, %p, %p )\n",
-                        arg1,arg2,arg3,arg4,arg5);
-         if (arg2 != 0)
-            must_be_readable( tst, "newselect(readfds)",   
-                              arg2, arg1/8 /* __FD_SETSIZE/8 */ );
-         if (arg3 != 0)
-            must_be_readable( tst, "newselect(writefds)",  
-                              arg3, arg1/8 /* __FD_SETSIZE/8 */ );
-         if (arg4 != 0)
-            must_be_readable( tst, "newselect(exceptfds)", 
-                              arg4, arg1/8 /* __FD_SETSIZE/8 */ );
-         if (arg5 != 0)
-            must_be_readable( tst, "newselect(timeout)", arg5, 
-                              sizeof(struct timeval) );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-         
-      case __NR_open: /* syscall 5 */
-         /* int open(const char *pathname, int flags); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("open ( %p(%s), %d ) --> ",arg1,arg1,arg2);
-         must_be_readable_asciiz( tst, "open(pathname)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("%d\n",res);
-         break;
-
-      case __NR_pipe: /* syscall 42 */
-         /* int pipe(int filedes[2]); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("pipe ( %p ) ...\n", arg1);
-         must_be_writable( tst, "pipe(filedes)", arg1, 2*sizeof(int) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            make_readable ( arg1, 2*sizeof(int) );
-         if (VG_(clo_trace_syscalls) && !VG_(is_kerror)(res))
-            VG_(printf)("SYSCALL[%d]       pipe --> (rd %d, wr %d)\n", 
-                        VG_(getpid)(), 
-                        ((UInt*)arg1)[0], ((UInt*)arg1)[1] );
-         break;
-
-      case __NR_poll: /* syscall 168 */
-         /* struct pollfd {
-               int fd;           -- file descriptor
-               short events;     -- requested events
-               short revents;    -- returned events
-            };
-           int poll(struct pollfd *ufds, unsigned int nfds, 
-                                         int timeout) 
-         */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("poll ( %p, %d, %d )\n",arg1,arg2,arg3);
-         /* In fact some parts of this struct should be readable too.
-            This should be fixed properly. */
-         must_be_writable( tst, "poll(ufds)", 
-                           arg1, arg2 * sizeof(struct pollfd) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res > 0) {
-            Int i;
-            struct pollfd * arr = (struct pollfd *)arg1;
-            for (i = 0; i < arg2; i++)
-               make_readable( (Addr)(&arr[i].revents), sizeof(Short) );
-         }
-         break;
- 
-      case __NR_readlink: /* syscall 85 */
-         /* int readlink(const char *path, char *buf, size_t bufsiz); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("readlink ( %p, %p, %d )\n", arg1,arg2,arg3);
-         must_be_readable_asciiz( tst, "readlink(path)", arg1 );
-         must_be_writable ( tst, "readlink(buf)", arg2,arg3 );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res > 0) {
-            make_readable ( arg2, res );
-         }
-         break;
-
-      case __NR_readv: { /* syscall 145 */
-         /* int readv(int fd, const struct iovec * vector, size_t count); */
-         UInt i;
-         struct iovec * vec;
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("readv ( %d, %p, %d )\n",arg1,arg2,arg3);
-         must_be_readable( tst, "readv(vector)", 
-                           arg2, arg3 * sizeof(struct iovec) );
-         /* ToDo: don't do any of the following if the vector is invalid */
-         vec = (struct iovec *)arg2;
-         for (i = 0; i < arg3; i++)
-            must_be_writable( tst, "readv(vector[...])",
-                              (UInt)vec[i].iov_base,vec[i].iov_len );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && res > 0) {
-            /* res holds the number of bytes read. */
-            for (i = 0; i < arg3; i++) {
-               Int nReadThisBuf = vec[i].iov_len;
-               if (nReadThisBuf > res) nReadThisBuf = res;
-               make_readable( (UInt)vec[i].iov_base, nReadThisBuf );
-               res -= nReadThisBuf;
-               if (res < 0) VG_(panic)("vg_wrap_syscall: readv: res < 0");
-            }
-         }
-         break;
-      }
-
-      case __NR_rename: /* syscall 38 */
-         /* int rename(const char *oldpath, const char *newpath); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("rename ( %p, %p )\n", arg1, arg2 );
-         must_be_readable_asciiz( tst, "rename(oldpath)", arg1 );
-         must_be_readable_asciiz( tst, "rename(newpath)", arg2 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_rmdir: /* syscall 40 */
-         /* int rmdir(const char *pathname); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("rmdir ( %p )\n", arg1);
-         must_be_readable_asciiz( tst, "rmdir(pathname)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_sched_setparam: /* syscall 154 */
-         /* int sched_setparam(pid_t pid, const struct sched_param *p); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("sched_setparam ( %d, %p )\n", arg1, arg2 );
-         must_be_readable( tst, "sched_setparam(ptr)",
-                           arg2, sizeof(struct sched_param) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            make_readable( arg2, sizeof(struct sched_param) );
-         break;
-
-      case __NR_sched_getparam: /* syscall 155 */
-         /* int sched_getparam(pid_t pid, struct sched_param *p); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("sched_getparam ( %d, %p )\n", arg1, arg2 );
-         must_be_writable( tst, "sched_getparam(ptr)",
-                           arg2, sizeof(struct sched_param) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            make_readable( arg2, sizeof(struct sched_param) );
-         break;
-
-      case __NR_sched_yield: /* syscall 158 */
-         /* int sched_yield(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("sched_yield ()\n" );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_select: /* syscall 82 */
-         /* struct sel_arg_struct {
-              unsigned long n;
-              fd_set *inp, *outp, *exp;
-              struct timeval *tvp;
-            };
-            int old_select(struct sel_arg_struct *arg);
-         */
-         {
-         Bool arg_block_readable
-                 = VG_(clo_instrument)
-                 ? VGM_(check_readable)(arg1, 5*sizeof(UInt), NULL)
-                 : True;
-         must_be_readable ( tst, "select(args)", arg1, 5*sizeof(UInt) );
-         if (arg_block_readable) {
-            UInt* arg_struct = (UInt*)arg1;
-            arg1 = arg_struct[0];
-            arg2 = arg_struct[1];
-            arg3 = arg_struct[2];
-            arg4 = arg_struct[3];
-            arg5 = arg_struct[4];
-
-            if (VG_(clo_trace_syscalls)) 
-               VG_(printf)("select ( %d, %p, %p, %p, %p )\n", 
-                           arg1,arg2,arg3,arg4,arg5);
-            if (arg2 != (Addr)NULL)
-               must_be_readable(tst, "select(readfds)", arg2, 
-                                arg1/8 /* __FD_SETSIZE/8 */ );
-            if (arg3 != (Addr)NULL)
-               must_be_readable(tst, "select(writefds)", arg3, 
-                                arg1/8 /* __FD_SETSIZE/8 */ );
-            if (arg4 != (Addr)NULL)
-               must_be_readable(tst, "select(exceptfds)", arg4, 
-                                arg1/8 /* __FD_SETSIZE/8 */ );
-            if (arg5 != (Addr)NULL)
-               must_be_readable(tst, "select(timeout)", arg5, 
-                                sizeof(struct timeval) );
-         }
-         }
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_setitimer: /* syscall 104 */
-         /* setitimer(int which, const struct itimerval *value,
-                                 struct itimerval *ovalue); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setitimer ( %d, %p, %p )\n", arg1,arg2,arg3);
-         if (arg2 != (Addr)NULL)
-            must_be_readable(tst, "setitimer(value)", 
-                             arg2, sizeof(struct itimerval) );
-         if (arg3 != (Addr)NULL)
-            must_be_writable(tst, "setitimer(ovalue)", 
-                             arg3, sizeof(struct itimerval));
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg3 != (Addr)NULL) {
-            make_readable(arg3, sizeof(struct itimerval));
-         }
-         break;
-
-#     if defined(__NR_setfsgid32)
-      case __NR_setfsgid32: /* syscall 216 */
-         /* int setfsgid(uid_t fsgid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setfsgid ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setgid32)
-      case __NR_setgid32: /* syscall 214 */
-#     endif
-      case __NR_setgid: /* syscall 46 */
-         /* int setgid(gid_t gid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setgid ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_setsid: /* syscall 66 */
-         /* pid_t setsid(void); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setsid ()\n");
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_setgroups32)
-      case __NR_setgroups32: /* syscall 206 */
-#     endif
-      case __NR_setgroups: /* syscall 81 */
-         /* int setgroups(size_t size, const gid_t *list); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setgroups ( %d, %p )\n", arg1, arg2);
-         if (arg1 > 0)
-            must_be_readable ( tst, "setgroups(list)", arg2, 
-                               arg1 * sizeof(gid_t) );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_setpgid: /* syscall 57 */
-         /* int setpgid(pid_t pid, pid_t pgid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setpgid ( %d, %d )\n", arg1, arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_setregid32)
-      case __NR_setregid32: /* syscall 204 */
-         /* int setregid(gid_t rgid, gid_t egid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setregid32(?) ( %d, %d )\n", arg1, arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setresuid32)
-      case __NR_setresuid32: /* syscall 208 */
-         /* int setresuid(uid_t ruid, uid_t euid, uid_t suid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setresuid32(?) ( %d, %d, %d )\n", arg1, arg2, arg3);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-#     endif
-
-#     if defined(__NR_setreuid32)
-      case __NR_setreuid32: /* syscall 203 */
-#     endif
-      case __NR_setreuid: /* syscall 70 */
-         /* int setreuid(uid_t ruid, uid_t euid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setreuid ( 0x%x, 0x%x )\n", arg1, arg2);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_setrlimit: /* syscall 75 */
-         /* int setrlimit (int resource, const struct rlimit *rlim); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setrlimit ( %d, %p )\n", arg1,arg2);
-         must_be_readable( tst, "setrlimit(rlim)", arg2, sizeof(struct rlimit) );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-#     if defined(__NR_setuid32)
-      case __NR_setuid32: /* syscall 213 */
-#     endif
-      case __NR_setuid: /* syscall 23 */
-         /* int setuid(uid_t uid); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("setuid ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_socketcall: /* syscall 102 */
-         /* int socketcall(int call, unsigned long *args); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("socketcall ( %d, %p )\n",arg1,arg2);
-         switch (arg1 /* request */) {
-
-            case SYS_SOCKETPAIR:
-               /* int socketpair(int d, int type, int protocol, int sv[2]); */
-               must_be_readable( tst, "socketcall.socketpair(args)", 
-                                 arg2, 4*sizeof(Addr) );
-               must_be_writable( tst, "socketcall.socketpair(sv)", 
-                                 ((UInt*)arg2)[3], 2*sizeof(int) );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res))
-                  make_readable ( ((UInt*)arg2)[3], 2*sizeof(int) );
-               break;
-
-            case SYS_SOCKET:
-               /* int socket(int domain, int type, int protocol); */
-               must_be_readable( tst, "socketcall.socket(args)", 
-                                 arg2, 3*sizeof(Addr) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SYS_BIND:
-               /* int bind(int sockfd, struct sockaddr *my_addr, 
-                           int addrlen); */
-               must_be_readable( tst, "socketcall.bind(args)", 
-                                 arg2, 3*sizeof(Addr) );
-               must_be_readable_sockaddr( tst, "socketcall.bind(my_addr.%s)",
-                  (struct sockaddr *) (((UInt*)arg2)[1]), ((UInt*)arg2)[2]);
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-               
-            case SYS_LISTEN:
-               /* int listen(int s, int backlog); */
-               must_be_readable( tst, "socketcall.listen(args)", 
-                                 arg2, 2*sizeof(Addr) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SYS_ACCEPT: {
-               /* int accept(int s, struct sockaddr *addr, int *p_addrlen); */
-               Addr addr;
-               Addr p_addrlen;
-               UInt addrlen_in, addrlen_out;
-               must_be_readable( tst, "socketcall.accept(args)", 
-                                 arg2, 3*sizeof(Addr) );
-               addr      = ((UInt*)arg2)[1];
-               p_addrlen = ((UInt*)arg2)[2];
-               if (p_addrlen != (Addr)NULL) {
-                  must_be_readable ( tst, "socketcall.accept(addrlen)", 
-                                     p_addrlen, sizeof(int) );
-                  addrlen_in = safe_dereference( p_addrlen, 0 );
-                  must_be_writable ( tst, "socketcall.accept(addr)", 
-                                     addr, addrlen_in );
-               }
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res >= 0 && p_addrlen != (Addr)NULL) {
-                  addrlen_out = safe_dereference( p_addrlen, 0 );
-                  if (addrlen_out > 0)
-                     make_readable( addr, addrlen_out );
-               }
-               break;
-            }
-
-            case SYS_SENDTO:
-               /* int sendto(int s, const void *msg, int len, 
-                             unsigned int flags, 
-                             const struct sockaddr *to, int tolen); */
-               must_be_readable( tst, "socketcall.sendto(args)", arg2, 
-                                 6*sizeof(Addr) );
-               must_be_readable( tst, "socketcall.sendto(msg)",
-                                 ((UInt*)arg2)[1], /* msg */
-                                 ((UInt*)arg2)[2]  /* len */ );
-               must_be_readable_sockaddr( tst, "socketcall.sendto(to.%s)",
-                  (struct sockaddr *) (((UInt*)arg2)[4]), ((UInt*)arg2)[5]);
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SYS_SEND:
-               /* int send(int s, const void *msg, size_t len, int flags); */
-               must_be_readable( tst, "socketcall.send(args)", arg2,
-                                 4*sizeof(Addr) );
-               must_be_readable( tst, "socketcall.send(msg)",
-                                 ((UInt*)arg2)[1], /* msg */
-                                  ((UInt*)arg2)[2]  /* len */ );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SYS_RECVFROM:
-               /* int recvfrom(int s, void *buf, int len, unsigned int flags,
-                               struct sockaddr *from, int *fromlen); */
-               must_be_readable( tst, "socketcall.recvfrom(args)", 
-                                 arg2, 6*sizeof(Addr) );
-               if ( ((UInt*)arg2)[4] /* from */ != 0) {
-                  must_be_readable( tst, "socketcall.recvfrom(fromlen)",
-                                    ((UInt*)arg2)[5] /* fromlen */, 
-                                    sizeof(int) );
-                  must_be_writable( tst, "socketcall.recvfrom(from)",
-                                    ((UInt*)arg2)[4], /*from*/
-                                    safe_dereference( (Addr)
-                                                      ((UInt*)arg2)[5], 0 ) );
-               }
-               must_be_writable( tst, "socketcall.recvfrom(buf)", 
-                                 ((UInt*)arg2)[1], /* buf */
-                                 ((UInt*)arg2)[2]  /* len */ );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res >= 0) {
-                  make_readable( ((UInt*)arg2)[1], /* buf */
-                                 ((UInt*)arg2)[2]  /* len */ );
-                  if ( ((UInt*)arg2)[4] /* from */ != 0) {
-                     make_readable( 
-                        ((UInt*)arg2)[4], /*from*/
-                        safe_dereference( (Addr) ((UInt*)arg2)[5], 0 ) );
-                  }
-               }
-               /* phew! */
-               break;
-
-            case SYS_RECV:
-               /* int recv(int s, void *buf, int len, unsigned int flags); */
-               /* man 2 recv says:
-               The  recv call is normally used only on a connected socket
-               (see connect(2)) and is identical to recvfrom with a  NULL
-               from parameter.
-               */
-               must_be_readable( tst, "socketcall.recv(args)", 
-                                 arg2, 4*sizeof(Addr) );
-               must_be_writable( tst, "socketcall.recv(buf)", 
-                                 ((UInt*)arg2)[1], /* buf */
-                                 ((UInt*)arg2)[2]  /* len */ );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res) && res >= 0 
-                                   && ((UInt*)arg2)[1] != (UInt)NULL) {
-                  make_readable( ((UInt*)arg2)[1], /* buf */
-                                 ((UInt*)arg2)[2]  /* len */ );
-               }
-               break;
-
-            case SYS_CONNECT:
-               /* int connect(int sockfd, 
-                              struct sockaddr *serv_addr, int addrlen ); */
-               must_be_readable( tst, "socketcall.connect(args)", 
-                                 arg2, 3*sizeof(Addr) );
-               must_be_readable( tst, "socketcall.connect(serv_addr.sa_family)",
-                                 ((UInt*)arg2)[1], /* serv_addr */
-                                 sizeof (sa_family_t));
-               must_be_readable_sockaddr( tst,
-                  "socketcall.connect(serv_addr.%s)",
-                  (struct sockaddr *) (((UInt*)arg2)[1]), ((UInt*)arg2)[2]);
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SYS_SETSOCKOPT:
-               /* int setsockopt(int s, int level, int optname, 
-                                 const void *optval, int optlen); */
-               must_be_readable( tst, "socketcall.setsockopt(args)", 
-                                 arg2, 5*sizeof(Addr) );
-               must_be_readable( tst, "socketcall.setsockopt(optval)",
-                                 ((UInt*)arg2)[3], /* optval */
-                                 ((UInt*)arg2)[4]  /* optlen */ );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SYS_GETSOCKOPT:
-               /* int setsockopt(int s, int level, int optname, 
-                                 void *optval, socklen_t *optlen); */
-               must_be_readable( tst, "socketcall.getsockopt(args)", 
-                                 arg2, 5*sizeof(Addr) );
-               {
-               Addr optval_p = ((UInt*)arg2)[3];
-               Addr optlen_p = ((UInt*)arg2)[4];
-               /* vg_assert(sizeof(socklen_t) == sizeof(UInt)); */
-               UInt optlen_after;
-               UInt optlen = safe_dereference ( optlen_p, 0 );
-               if (optlen > 0) 
-                  must_be_writable( tst, "socketcall.getsockopt(optval)", 
-                                    optval_p, optlen );
-               KERNEL_DO_SYSCALL(tid,res);
-               optlen_after = safe_dereference ( optlen_p, 0 );
-               if (!VG_(is_kerror)(res) && optlen > 0 && optlen_after > 0) 
-                  make_readable( optval_p, optlen_after );
-               }
-               break;
-
-            case SYS_GETSOCKNAME:
-               /* int getsockname(int s, struct sockaddr* name, 
-                                  int* namelen) */
-               must_be_readable( tst, "socketcall.getsockname(args)", 
-                                 arg2, 3*sizeof(Addr) );
-               {
-               UInt namelen = safe_dereference( (Addr) ((UInt*)arg2)[2], 0);
-               if (namelen > 0)
-                  must_be_writable( tst, "socketcall.getsockname(name)", 
-                                    ((UInt*)arg2)[1], namelen );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res)) {
-                  namelen = safe_dereference( (Addr) ((UInt*)arg2)[2], 0);
-                  if (namelen > 0 
-                      && ((UInt*)arg2)[1] != (UInt)NULL)
-                     make_readable( ((UInt*)arg2)[1], namelen );
-               }
-               }
-               break;
-
-            case SYS_GETPEERNAME:
-               /* int getpeername(int s, struct sockaddr* name, 
-                                  int* namelen) */
-               must_be_readable( tst, "socketcall.getpeername(args)", 
-                                 arg2, 3*sizeof(Addr) );
-               {
-               UInt namelen = safe_dereference( (Addr) ((UInt*)arg2)[2], 0);
-               if (namelen > 0)
-                  must_be_writable( tst, "socketcall.getpeername(name)", 
-                                    ((UInt*)arg2)[1], namelen );
-               KERNEL_DO_SYSCALL(tid,res);
-               if (!VG_(is_kerror)(res)) {
-                  namelen = safe_dereference( (Addr) ((UInt*)arg2)[2], 0);
-                  if (namelen > 0 
-                      && ((UInt*)arg2)[1] != (UInt)NULL)
-                     make_readable( ((UInt*)arg2)[1], namelen );
-               }
-               }
-               break;
-
-            case SYS_SHUTDOWN:
-               /* int shutdown(int s, int how); */
-               must_be_readable( tst, "socketcall.shutdown(args)", 
-                                 arg2, 2*sizeof(Addr) );
-               KERNEL_DO_SYSCALL(tid,res);
-               break;
-
-            case SYS_SENDMSG:
-               {
-                  /* int sendmsg(int s, const struct msghdr *msg, int flags); */
-
-                  /* this causes warnings, and I don't get why. glibc bug?
-                   * (after all it's glibc providing the arguments array)
-                  must_be_readable( "socketcall.sendmsg(args)", 
-                                     arg2, 3*sizeof(Addr) );
-                  */
-
-                  struct msghdr *msg = (struct msghdr *)((UInt *)arg2)[ 1 ];
-                  msghdr_foreachfield ( tst, msg, must_be_readable_sendmsg );
-
-                  KERNEL_DO_SYSCALL(tid,res);
-                  break;
-               }
-
-            case SYS_RECVMSG:
-               {
-                  /* int recvmsg(int s, struct msghdr *msg, int flags); */
-
-                  /* this causes warnings, and I don't get why. glibc bug?
-                   * (after all it's glibc providing the arguments array)
-                  must_be_readable( "socketcall.recvmsg(args)", 
-                                     arg2, 3*sizeof(Addr) );
-                  */
-
-                  struct msghdr *msg = (struct msghdr *)((UInt *)arg2)[ 1 ];
-                  msghdr_foreachfield ( tst, msg, must_be_writable_recvmsg );
-
-                  KERNEL_DO_SYSCALL(tid,res);
-
-                  if ( !VG_(is_kerror)( res ) )
-                     msghdr_foreachfield( tst, msg, make_readable_recvmsg );
-
-                  break;
-               }
-
-            default:
-               VG_(message)(Vg_DebugMsg,"FATAL: unhandled socketcall 0x%x",arg1);
-               VG_(panic)("... bye!\n");
-               break; /*NOTREACHED*/
-         }
-         break;
-
-      case __NR_stat: /* syscall 106 */
-         /* int stat(const char *file_name, struct stat *buf); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("stat ( %p, %p )\n",arg1,arg2);
-         must_be_readable_asciiz( tst, "stat(file_name)", arg1 );
-         must_be_writable( tst, "stat(buf)", arg2, sizeof(struct stat) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            make_readable( arg2, sizeof(struct stat) );
-         break;
-
-      case __NR_statfs: /* syscall 99 */
-         /* int statfs(const char *path, struct statfs *buf); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("statfs ( %p, %p )\n",arg1,arg2);
-         must_be_readable_asciiz( tst, "statfs(path)", arg1 );
-         must_be_writable( tst, "stat(buf)", arg2, sizeof(struct statfs) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            make_readable( arg2, sizeof(struct statfs) );
-         break;
-
-      case __NR_symlink: /* syscall 83 */
-         /* int symlink(const char *oldpath, const char *newpath); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("symlink ( %p, %p )\n",arg1,arg2);
-         must_be_readable_asciiz( tst, "symlink(oldpath)", arg1 );
-         must_be_readable_asciiz( tst, "symlink(newpath)", arg2 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break; 
-
-#     if defined(__NR_stat64)
-      case __NR_stat64: /* syscall 195 */
-         /* int stat64(const char *file_name, struct stat64 *buf); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("stat64 ( %p, %p )\n",arg1,arg2);
-         must_be_readable_asciiz( tst, "stat64(file_name)", arg1 );
-         must_be_writable( tst, "stat64(buf)", arg2, sizeof(struct stat64) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            make_readable( arg2, sizeof(struct stat64) );
-         break;
-#     endif
-
-#     if defined(__NR_fstat64)
-      case __NR_fstat64: /* syscall 197 */
-         /* int fstat64(int filedes, struct stat64 *buf); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("fstat64 ( %d, %p )\n",arg1,arg2);
-         must_be_writable( tst, "fstat64(buf)", arg2, sizeof(struct stat64) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            make_readable( arg2, sizeof(struct stat64) );
-         break;
-#     endif
-
-      case __NR_sysinfo: /* syscall 116 */
-         /* int sysinfo(struct sysinfo *info); */
-         if (VG_(clo_trace_syscalls)) 
-            VG_(printf)("sysinfo ( %p )\n",arg1);
-         must_be_writable( tst, "sysinfo(info)", arg1, sizeof(struct sysinfo) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res))
-            make_readable( arg1, sizeof(struct sysinfo) );
-         break;
-
-      case __NR_time: /* syscall 13 */
-         /* time_t time(time_t *t); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("time ( %p )\n",arg1);
-         if (arg1 != (UInt)NULL) {
-            must_be_writable( tst, "time", arg1, sizeof(time_t) );
-         }
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
-            make_readable( arg1, sizeof(time_t) );
-         }
-         break;
-
-      case __NR_times: /* syscall 43 */
-         /* clock_t times(struct tms *buf); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("times ( %p )\n",arg1);
-         must_be_writable( tst, "times(buf)", arg1, sizeof(struct tms) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
-            make_readable( arg1, sizeof(struct tms) );
-         }
-         break;
-
-      case __NR_truncate: /* syscall 92 */
-         /* int truncate(const char *path, size_t length); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("truncate ( %p, %d )\n", arg1,arg2);
-         must_be_readable_asciiz( tst, "truncate(path)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_umask: /* syscall 60 */
-         /* mode_t umask(mode_t mask); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("umask ( %d )\n", arg1);
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_unlink: /* syscall 10 */
-         /* int unlink(const char *pathname) */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("ulink ( %p )\n",arg1);
-         must_be_readable_asciiz( tst, "unlink(pathname)", arg1 );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_uname: /* syscall 122 */
-         /* int uname(struct utsname *buf); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("uname ( %p )\n",arg1);
-         must_be_writable( tst, "uname(buf)", arg1, sizeof(struct utsname) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
-            make_readable( arg1, sizeof(struct utsname) );
-         }
-         break;
-
-      case __NR_utime: /* syscall 30 */
-         /* int utime(const char *filename, struct utimbuf *buf); */
-         if (VG_(clo_trace_syscalls)) 
-            VG_(printf)("utime ( %p, %p )\n", arg1,arg2);
-         must_be_readable_asciiz( tst, "utime(filename)", arg1 );
-         if (arg2 != (UInt)NULL)
-            must_be_readable( tst, "utime(buf)", arg2, 
-                                                 sizeof(struct utimbuf) );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-
-      case __NR_wait4: /* syscall 114 */
-         /* pid_t wait4(pid_t pid, int *status, int options,
-                        struct rusage *rusage) */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("wait4 ( %d, %p, %d, %p )\n",
-                      arg1,arg2,arg3,arg4);
-         if (arg2 != (Addr)NULL)
-            must_be_writable( tst, "wait4(status)", arg2, sizeof(int) );
-         if (arg4 != (Addr)NULL)
-            must_be_writable( tst, "wait4(rusage)", arg4, 
-                              sizeof(struct rusage) );
-         KERNEL_DO_SYSCALL(tid,res);
-         if (!VG_(is_kerror)(res)) {
-            if (arg2 != (Addr)NULL)
-               make_readable( arg2, sizeof(int) );
-            if (arg4 != (Addr)NULL)
-               make_readable( arg4, sizeof(struct rusage) );
-         }
-         break;
-
-      case __NR_writev: { /* syscall 146 */
-         /* int writev(int fd, const struct iovec * vector, size_t count); */
-         UInt i;
-         struct iovec * vec;
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("writev ( %d, %p, %d )\n",arg1,arg2,arg3);
-         must_be_readable( tst, "writev(vector)", 
-                           arg2, arg3 * sizeof(struct iovec) );
-         /* ToDo: don't do any of the following if the vector is invalid */
-         vec = (struct iovec *)arg2;
-         for (i = 0; i < arg3; i++)
-            must_be_readable( tst, "writev(vector[...])",
-                              (UInt)vec[i].iov_base,vec[i].iov_len );
-         KERNEL_DO_SYSCALL(tid,res);
-         break;
-      }
-
-      /*-------------------------- SIGNALS --------------------------*/
-
-      /* Normally set to 1, so that Valgrind's signal-simulation machinery
-         is engaged.  Sometimes useful to disable (set to 0), for
-         debugging purposes, to make clients more deterministic. */
-#     define SIGNAL_SIMULATION 1
-
-      case __NR_sigaltstack: /* syscall 186 */
-         /* int sigaltstack(const stack_t *ss, stack_t *oss); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("sigaltstack ( %p, %p )\n",arg1,arg2);
-         if (arg1 != (UInt)NULL) {
-            must_be_readable( tst, "sigaltstack(ss)", 
-                              arg1, sizeof(vki_kstack_t) );
-         }
-         if (arg2 != (UInt)NULL) {
-            must_be_writable( tst, "sigaltstack(ss)", 
-                              arg1, sizeof(vki_kstack_t) );
-         }
-#        if SIGNAL_SIMULATION
-         VG_(do__NR_sigaltstack) (tid);
-         res = tst->m_eax;
-#        else
-         KERNEL_DO_SYSCALL(tid,res);
-#        endif
-         if (!VG_(is_kerror)(res) && res == 0 && arg2 != (UInt)NULL)
-            make_readable( arg2, sizeof(vki_kstack_t));
-         break;
-
-      case __NR_rt_sigaction:
-      case __NR_sigaction:
-         /* int sigaction(int signum, struct k_sigaction *act, 
-                                      struct k_sigaction *oldact); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("sigaction ( %d, %p, %p )\n",arg1,arg2,arg3);
-         if (arg2 != (UInt)NULL)
-            must_be_readable( tst, "sigaction(act)", 
-                              arg2, sizeof(vki_ksigaction));
-         if (arg3 != (UInt)NULL)
-            must_be_writable( tst, "sigaction(oldact)", 
-                              arg3, sizeof(vki_ksigaction));
-         /* We do this one ourselves! */
-#        if SIGNAL_SIMULATION
-         VG_(do__NR_sigaction)(tid);
-         res = tst->m_eax;
-#        else
-         /* debugging signals; when we don't handle them. */
-         KERNEL_DO_SYSCALL(tid,res);
-#        endif
-         if (!VG_(is_kerror)(res) && res == 0 && arg3 != (UInt)NULL)
-            make_readable( arg3, sizeof(vki_ksigaction));
-         break;
-
-      case __NR_rt_sigprocmask:
-      case __NR_sigprocmask:
-         /* int sigprocmask(int how, k_sigset_t *set, 
-                                     k_sigset_t *oldset); */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)("sigprocmask ( %d, %p, %p )\n",arg1,arg2,arg3);
-         if (arg2 != (UInt)NULL)
-            must_be_readable( tst, "sigprocmask(set)", 
-                              arg2, sizeof(vki_ksigset_t));
-         if (arg3 != (UInt)NULL)
-            must_be_writable( tst, "sigprocmask(oldset)", 
-                              arg3, sizeof(vki_ksigset_t));
-#        if SIGNAL_SIMULATION
-         VG_(do__NR_sigprocmask) ( tid, 
-                                   arg1 /*how*/, 
-                                   (vki_ksigset_t*) arg2,
-                                   (vki_ksigset_t*) arg3 );
-         res = tst->m_eax;
-#        else
-         KERNEL_DO_SYSCALL(tid,res);
-#        endif
-         if (!VG_(is_kerror)(res) && res == 0 && arg3 != (UInt)NULL)
-            make_readable( arg3, sizeof(vki_ksigset_t));
-         break;
-
-      case __NR_sigpending: /* syscall 73 */
-#     if defined(__NR_rt_sigpending)
-      case __NR_rt_sigpending: /* syscall 176 */
-#     endif
-         /* int sigpending( sigset_t *set ) ; */
-         if (VG_(clo_trace_syscalls))
-            VG_(printf)( "sigpending ( %p )\n", arg1 );
-         must_be_writable( tst, "sigpending(set)", 
-                           arg1, sizeof(vki_ksigset_t));
-#        if SIGNAL_SIMULATION
-         VG_(do_sigpending)( tid, (vki_ksigset_t*)arg1 );
-         res = 0;
-	 SET_EAX(tid, res);
-#        else
-         KERNEL_DO_SYSCALL(tid, res);
-#        endif
-         if ( !VG_( is_kerror )( res ) && res == 0 )
-            make_readable( arg1, sizeof( vki_ksigset_t ) ) ;
-         break ;
-
-      default:
-         VG_(message)
-            (Vg_DebugMsg,"FATAL: unhandled syscall: %d",syscallno);
-         VG_(message)
-            (Vg_DebugMsg,"Do not panic.  You may be able to fix this easily.");
-         VG_(message)
-            (Vg_DebugMsg,"Read the file README_MISSING_SYSCALL_OR_IOCTL.");
-         VG_(unimplemented)("no wrapper for the above system call");
-         vg_assert(3+3 == 7);
-         break; /*NOTREACHED*/
-   }
-
-   /* { void zzzmemscan(void); zzzmemscan(); } */
-
-   if (! VG_(first_and_last_secondaries_look_plausible)())
-      sane_before_call = False;
-
-   if (sane_before_call && (!sane_after_call)) {
-      VG_(message)(Vg_DebugMsg, "perform_assumed_nonblocking_syscall: ");
-      VG_(message)(Vg_DebugMsg,
-                   "probable sanity check failure for syscall number %d\n", 
-                   syscallno );
-      VG_(panic)("aborting due to the above ... bye!"); 
-   }
-
-   VGP_POPCC;
-}
-
-
-
-/* Perform pre- and post- actions for a blocking syscall, but do not
-   do the syscall itself.  If res is NULL, the pre-syscall actions are
-   to be performed.  If res is non-NULL, the post-syscall actions are
-   to be performed, and *res is assumed to hold the result of the
-   syscall.  This slightly strange scheme makes it impossible to
-   mistakenly use the value of *res in the pre-syscall actions.  
-
-   This doesn't actually do the syscall itself, it is important to
-   observe.  
-
-   Because %eax is used both for the syscall number before the call
-   and the result value afterwards, we can't reliably use it to get
-   the syscall number.  So the caller has to pass it explicitly.  
-*/
-void VG_(check_known_blocking_syscall) ( ThreadId tid,
-                                         Int syscallno,
-                                         Int* /*IN*/ res )
-{
-   ThreadState* tst;
-   Bool         sane_before_post, sane_after_post;
-   UInt         arg1, arg2, arg3;
-
-   VGP_PUSHCC(VgpSyscall);
-
-   vg_assert(VG_(is_valid_tid)(tid));
-   sane_before_post = True;
-   sane_after_post  = True;
-   tst              = & VG_(threads)[tid];
-   arg1             = tst->m_ebx;
-   arg2             = tst->m_ecx;
-   arg3             = tst->m_edx;
-   /*
-   arg4             = tst->m_esi;
-   arg5             = tst->m_edi;
-   */
-
-   if (res != NULL
-       && ! VG_(first_and_last_secondaries_look_plausible)())
-      sane_before_post = False;
-
-   switch (syscallno) {
-
-      case __NR_read: /* syscall 3 */
-         /* size_t read(int fd, void *buf, size_t count); */
-         if (res == NULL) { 
-            /* PRE */
-            if (VG_(clo_trace_syscalls))
-               VG_(printf)(
-                  "SYSCALL--PRE[%d,%d]       read ( %d, %p, %d )\n", 
-                  VG_(getpid)(), tid,
-                  arg1, arg2, arg3);
-            must_be_writable( tst, "read(buf)", arg2, arg3 );
-         } else {
-            /* POST */
-            if (VG_(clo_trace_syscalls))
-               VG_(printf)(
-                  "SYSCALL-POST[%d,%d]       read ( %d, %p, %d ) --> %d\n", 
-                  VG_(getpid)(), tid,
-                  arg1, arg2, arg3, *res);
-            if (!VG_(is_kerror)(*res) && *res > 0) {
-               make_readable( arg2, *res );
-            }
-	 }
-         break;
-
-      case __NR_write: /* syscall 4 */
-         /* size_t write(int fd, const void *buf, size_t count); */
-         if (res == NULL) {
-            /* PRE */
-            if (VG_(clo_trace_syscalls))
-               VG_(printf)(
-                  "SYSCALL--PRE[%d,%d]       write ( %d, %p, %d )\n", 
-                  VG_(getpid)(), tid,
-                  arg1, arg2, arg3);
-            must_be_readable( tst, "write(buf)", arg2, arg3 );
-	 } else {
-            /* POST */
-            if (VG_(clo_trace_syscalls))
-               VG_(printf)(
-                  "SYSCALL-POST[%d,%d]       write ( %d, %p, %d ) --> %d\n", 
-                  VG_(getpid)(), tid,
-                  arg1, arg2, arg3, *res);
-	 }
-         break;
-
-      default:
-         VG_(printf)("check_known_blocking_syscall: unexpected %d\n", 
-                     syscallno);
-         VG_(panic)("check_known_blocking_syscall");
-         /*NOTREACHED*/
-         break;
-   }
-
-   if (res != NULL) { /* only check after syscall */
-      if (! VG_(first_and_last_secondaries_look_plausible)())
-         sane_after_post = False;
-
-      if (sane_before_post && (!sane_after_post)) {
-         VG_(message)(Vg_DebugMsg, "perform_known_blocking_syscall: ");
-         VG_(message)(Vg_DebugMsg,
-                      "probable sanity check failure for syscall number %d\n", 
-                      syscallno );
-         VG_(panic)("aborting due to the above ... bye!"); 
-      }
-   }
-
-   VGP_POPCC;
-}
-
-
-/*--------------------------------------------------------------------*/
-/*--- end                                         vg_syscall_mem.c ---*/
-/*--------------------------------------------------------------------*/
diff --git a/vg_syscalls.c b/vg_syscalls.c
new file mode 100644
index 0000000..a500deb
--- /dev/null
+++ b/vg_syscalls.c
@@ -0,0 +1,3164 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Update the byte permission maps following a system call.     ---*/
+/*---                                             vg_syscall_mem.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, an x86 protected-mode emulator 
+   designed for debugging and profiling binaries on x86-Unixes.
+
+   Copyright (C) 2000-2002 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "vg_include.h"
+
+/* vg_unsafe.h should NOT be included into any file except this
+   one. */
+#include "vg_unsafe.h"
+
+
+/* All system calls are channelled through here, doing two things:
+
+   * notify the skin of the memory events (reads, writes) happening
+
+   * perform the syscall, usually by passing it along to the kernel
+     unmodified.  However, because we simulate signals ourselves,
+     signal-related syscalls are routed to vg_signal.c, and are not
+     delivered to the kernel.
+
+   A magical piece of assembly code, vg_do_syscall(), in vg_syscall.S
+   does the tricky bit of passing a syscall to the kernel, whilst
+   having the simulator retain control.
+*/
+
+#define SYSCALL_TRACK(fn, args...)  VG_TRACK(fn, Vg_CoreSysCall, ## args)
+
+#define MAYBE_PRINTF(format, args...)  \
+   if (VG_(clo_trace_syscalls))        \
+      VG_(printf)(format, ## args)
+
+/* ---------------------------------------------------------------------
+   Doing mmap, munmap, mremap, mprotect
+   ------------------------------------------------------------------ */
+
+// Nb: this isn't done as precisely as possible, but it seems that programs
+// are usually sufficiently well-behaved that the more obscure corner cases
+// aren't important.  Various comments in the few functions below give more
+// details... njn 2002-Sep-17
+
+/* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
+   munmap, mprotect (and mremap??) work at the page level.  So addresses
+   and lengths must be adjusted for this. */
+
+/* Mash around start and length so that the area exactly covers
+   an integral number of pages.  If we don't do that, memcheck's
+   idea of addressible memory diverges from that of the
+   kernel's, which causes the leak detector to crash. */
+static 
+void mash_addr_and_len( Addr* a, UInt* len)
+{
+   while (( *a         % VKI_BYTES_PER_PAGE) > 0) { (*a)--; (*len)++; }
+   while (((*a + *len) % VKI_BYTES_PER_PAGE) > 0) {         (*len)++; }
+}
+
+static
+void mmap_segment ( Addr a, UInt len, UInt prot, Int fd )
+{
+   Bool nn, rr, ww, xx;
+
+   /* Records segment, reads debug symbols if necessary */
+   if (prot & PROT_EXEC && fd != -1)
+      VG_(new_exe_segment) ( a, len );
+
+   nn = prot & PROT_NONE;
+   rr = prot & PROT_READ;
+   ww = prot & PROT_WRITE;
+   xx = prot & PROT_EXEC;
+
+   VG_TRACK( new_mem_mmap, a, len, nn, rr, ww, xx );
+}
+
+static
+void munmap_segment ( Addr a, UInt len )
+{
+   /* Addr orig_a   = a;
+      Addr orig_len = len; */
+
+   mash_addr_and_len(&a, &len);
+   /*
+   VG_(printf)("MUNMAP: correct (%p for %d) to (%p for %d) %s\n", 
+      orig_a, orig_len, a, len, (orig_a!=start || orig_len!=length) 
+                                    ? "CHANGE" : "");
+   */
+
+   /* Invalidate translations as necessary (also discarding any basic
+      block-specific info retained by the skin) and unload any debug
+      symbols. */
+   // This doesn't handle partial unmapping of exe segs correctly, if that
+   // ever happens...
+   VG_(remove_if_exe_segment) ( a, len );
+
+   VG_TRACK( die_mem_munmap, a, len );
+}
+
+static 
+void mprotect_segment ( Addr a, UInt len, Int prot )
+{
+   Bool nn, rr, ww, xx;
+   nn = prot & PROT_NONE;
+   rr = prot & PROT_READ;
+   ww = prot & PROT_WRITE;
+   xx = prot & PROT_EXEC;
+
+   // if removing exe permission, should check and remove from exe_seg list
+   // if adding, should check and add to exe_seg list
+   // easier to ignore both cases -- both v. unlikely?
+   mash_addr_and_len(&a, &len);
+   VG_TRACK( change_mem_mprotect, a, len, nn, rr, ww, xx );
+}
+
+static 
+void mremap_segment ( old_addr, old_size, new_addr, new_size )
+{
+   /* If the block moves, assume new and old blocks can't overlap; seems to
+    * be valid judging from Linux kernel code in mm/mremap.c */
+   vg_assert(old_addr == new_addr         ||
+             old_addr+old_size < new_addr ||
+             new_addr+new_size < old_addr);
+
+   if (new_size < old_size) {
+      // if exe_seg
+      //    unmap old symbols from old_addr+new_size..old_addr+new_size
+      //    update exe_seg size = new_size
+      //    update exe_seg addr = new_addr...
+      VG_TRACK( copy_mem_remap, old_addr, new_addr, new_size );
+      VG_TRACK( die_mem_munmap, old_addr+new_size, old_size-new_size );
+
+   } else {
+      // if exe_seg
+      //    map new symbols from new_addr+old_size..new_addr+new_size
+      //    update exe_seg size = new_size
+      //    update exe_seg addr = new_addr...
+      VG_TRACK( copy_mem_remap, old_addr, new_addr, old_size );
+      // what should the permissions on the new extended part be??
+      // using 'rwx'
+      VG_TRACK( new_mem_mmap,   new_addr+old_size, new_size-old_size,
+                                False, True, True, True );
+   }
+}
+
+
+/* Is this a Linux kernel error return value? */
+/* From:
+   http://sources.redhat.com/cgi-bin/cvsweb.cgi/libc/sysdeps/unix/sysv/
+   linux/i386/sysdep.h?
+   rev=1.28&content-type=text/x-cvsweb-markup&cvsroot=glibc
+
+   \begin{quote}:
+
+   Linux uses a negative return value to indicate syscall errors,
+   unlike most Unices, which use the condition codes' carry flag.
+
+   Since version 2.1 the return value of a system call might be
+   negative even if the call succeeded.  E.g., the `lseek' system call
+   might return a large offset.  Therefore we must not anymore test
+   for < 0, but test for a real error by making sure the value in %eax
+   is a real error number.  Linus said he will make sure the no syscall
+   returns a value in -1 .. -4095 as a valid result so we can savely
+   test with -4095.  
+
+   END QUOTE
+*/
+Bool VG_(is_kerror) ( Int res )
+{
+   if (res >= -4095 && res <= -1)
+      return True;
+   else
+      return False;
+}
+
+static
+UInt get_shm_size ( Int shmid )
+{
+   struct shmid_ds buf;
+   long __res;
+    __asm__ volatile ( "int $0x80"
+                       : "=a" (__res)
+                       : "0" (__NR_ipc),
+                         "b" ((long)(24) /*IPCOP_shmctl*/),
+                         "c" ((long)(shmid)),
+                         "d" ((long)(IPC_STAT)),
+                         "S" ((long)(0)),
+                         "D" ((long)(&buf)) );
+    if ( VG_(is_kerror) ( __res ) )
+       return 0;
+ 
+   return buf.shm_segsz;
+}
+ 
+static
+Char *strdupcat ( const Char *s1, const Char *s2, ArenaId aid )
+{
+   UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
+   Char *result = VG_(arena_malloc) ( aid, len );
+   VG_(strcpy) ( result, s1 );
+   VG_(strcat) ( result, s2 );
+   return result;
+}
+
+static 
+void pre_mem_read_sendmsg ( ThreadState* tst, 
+                            Char *msg, UInt base, UInt size )
+{
+   Char *outmsg = strdupcat ( "socketcall.sendmsg", msg, VG_AR_TRANSIENT );
+   SYSCALL_TRACK( pre_mem_read, tst, outmsg, base, size );
+
+   VG_(arena_free) ( VG_AR_TRANSIENT, outmsg );
+}
+
+static 
+void pre_mem_write_recvmsg ( ThreadState* tst, 
+                             Char *msg, UInt base, UInt size )
+{
+   Char *outmsg = strdupcat ( "socketcall.recvmsg", msg, VG_AR_TRANSIENT );
+   SYSCALL_TRACK( pre_mem_write, tst, outmsg, base, size );
+   VG_(arena_free) ( VG_AR_TRANSIENT, outmsg );
+}
+
+static
+void post_mem_write_recvmsg ( ThreadState* tst,
+                              Char *fieldName, UInt base, UInt size )
+{
+   VG_TRACK( post_mem_write, base, size );
+}
+ 
+static
+void msghdr_foreachfield ( 
+        ThreadState* tst, 
+        struct msghdr *msg, 
+        void (*foreach_func)( ThreadState*, Char *, UInt, UInt ) 
+     )
+{
+   if ( !msg )
+      return;
+
+   foreach_func ( tst, "(msg)", (Addr)msg, sizeof( struct msghdr ) );
+
+   if ( msg->msg_name )
+      foreach_func ( tst, 
+                     "(msg.msg_name)", 
+                     (Addr)msg->msg_name, msg->msg_namelen );
+
+   if ( msg->msg_iov ) {
+      struct iovec *iov = msg->msg_iov;
+      UInt i;
+
+      foreach_func ( tst, 
+                     "(msg.msg_iov)", 
+                     (Addr)iov, msg->msg_iovlen * sizeof( struct iovec ) );
+
+      for ( i = 0; i < msg->msg_iovlen; ++i, ++iov )
+         foreach_func ( tst, 
+                        "(msg.msg_iov[i]", 
+                        (Addr)iov->iov_base, iov->iov_len );
+   }
+
+   if ( msg->msg_control )
+      foreach_func ( tst, 
+                     "(msg.msg_control)", 
+                     (Addr)msg->msg_control, msg->msg_controllen );
+}
+
+static
+void pre_mem_read_sockaddr ( ThreadState* tst,
+                                 Char *description,
+                                 struct sockaddr *sa, UInt salen )
+{
+   Char *outmsg = VG_(arena_malloc) ( VG_AR_TRANSIENT, 
+                                      strlen( description ) + 30 );
+
+   VG_(sprintf) ( outmsg, description, ".sa_family" );
+   SYSCALL_TRACK( pre_mem_read, tst, outmsg, (UInt) &sa->sa_family, sizeof (sa_family_t));
+               
+   switch (sa->sa_family) {
+                  
+      case AF_UNIX:
+         VG_(sprintf) ( outmsg, description, ".sun_path" );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, outmsg,
+            (UInt) ((struct sockaddr_un *) sa)->sun_path);
+         break;
+                     
+      case AF_INET:
+         VG_(sprintf) ( outmsg, description, ".sin_port" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in *) sa)->sin_port,
+            sizeof (((struct sockaddr_in *) sa)->sin_port));
+         VG_(sprintf) ( outmsg, description, ".sin_addr" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in *) sa)->sin_addr,
+            sizeof (struct in_addr));
+         break;
+                           
+      case AF_INET6:
+         VG_(sprintf) ( outmsg, description, ".sin6_port" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in6 *) sa)->sin6_port,
+            sizeof (((struct sockaddr_in6 *) sa)->sin6_port));
+         VG_(sprintf) ( outmsg, description, ".sin6_flowinfo" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in6 *) sa)->sin6_flowinfo,
+            sizeof (uint32_t));
+         VG_(sprintf) ( outmsg, description, ".sin6_addr" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in6 *) sa)->sin6_addr,
+            sizeof (struct in6_addr));
+#        ifndef GLIBC_2_1
+         VG_(sprintf) ( outmsg, description, ".sin6_scope_id" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg,
+            (UInt) &((struct sockaddr_in6 *) sa)->sin6_scope_id,
+            sizeof (uint32_t));
+#        endif
+         break;
+               
+      default:
+         VG_(sprintf) ( outmsg, description, "" );
+         SYSCALL_TRACK( pre_mem_read, tst, outmsg, (UInt) sa, salen );
+         break;
+   }
+   
+   VG_(arena_free) ( VG_AR_TRANSIENT, outmsg );
+}
+
+/* Dereference a pointer to a UInt. */
+static UInt deref_UInt ( ThreadState* tst, Addr a, Char* s )
+{
+   UInt* a_p = (UInt*)a;
+   SYSCALL_TRACK( pre_mem_read, tst, s, (Addr)a_p, sizeof(UInt) );
+   if (a_p == NULL)
+      return 0;
+   else
+      return *a_p;
+}
+
+/* Dereference a pointer to a pointer. */
+static Addr deref_Addr ( ThreadState* tst, Addr a, Char* s )
+{
+   Addr* a_p = (Addr*)a;
+   SYSCALL_TRACK( pre_mem_read, tst, s, (Addr)a_p, sizeof(Addr) );
+   return *a_p;
+}
+
+static 
+void buf_and_len_pre_check( ThreadState* tst, Addr buf_p, Addr buflen_p,
+                            Char* buf_s, Char* buflen_s )
+{
+   if (VG_(track_events).pre_mem_write) {
+      UInt buflen_in = deref_UInt( tst, buflen_p, buflen_s);
+      if (buflen_in > 0) {
+         VG_(track_events).pre_mem_write ( Vg_CoreSysCall,
+                                           tst, buf_s, buf_p, buflen_in );
+      }
+   }
+}
+
+static 
+void buf_and_len_post_check( ThreadState* tst, Int res,
+                             Addr buf_p, Addr buflen_p, Char* s )
+{
+   if (!VG_(is_kerror)(res) && VG_(track_events).post_mem_write) {
+      UInt buflen_out = deref_UInt( tst, buflen_p, s);
+      if (buflen_out > 0 && buf_p != (Addr)NULL) {
+         VG_(track_events).post_mem_write ( buf_p, buflen_out );
+      }
+   }
+}
+
+/* ---------------------------------------------------------------------
+   Data seg end, for brk()
+   ------------------------------------------------------------------ */
+
+/* Records the current end of the data segment so we can make sense of
+   calls to brk(). */
+Addr curr_dataseg_end;
+
+void VG_(init_dataseg_end_for_brk) ( void )
+{
+   curr_dataseg_end = (Addr)VG_(brk)(0);
+   if (curr_dataseg_end == (Addr)(-1))
+      VG_(panic)("can't determine data-seg end for brk()");
+   if (0)
+      VG_(printf)("DS END is %p\n", (void*)curr_dataseg_end);
+}
+
+/* ---------------------------------------------------------------------
+   The Main Entertainment ...
+   ------------------------------------------------------------------ */
+
+void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid )
+{
+   ThreadState* tst;
+   UInt         syscallno, arg1, arg2, arg3, arg4, arg5;
+   /* Do not make this unsigned! */
+   Int res;
+   void* pre_res = 0;   /* shut gcc up */
+
+   VGP_PUSHCC(VgpCoreSysWrap);
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   tst              = & VG_(threads)[tid];
+   syscallno        = tst->m_eax;
+   arg1             = tst->m_ebx;
+   arg2             = tst->m_ecx;
+   arg3             = tst->m_edx;
+   arg4             = tst->m_esi;
+   arg5             = tst->m_edi;
+
+   /* Do any pre-syscall actions */
+   if (VG_(needs).syscall_wrapper) {
+      VGP_PUSHCC(VgpSkinSysWrap);
+      pre_res = SK_(pre_syscall)(tid, syscallno, /*isBlocking*/False);
+      VGP_POPCC(VgpSkinSysWrap);
+   }
+
+   /* the syscall no is in %eax.  For syscalls with <= 5 args,
+      args 1 .. 5 to the syscall are in %ebx %ecx %edx %esi %edi.
+      For calls with > 5 args, %ebx points to a lump of memory
+      containing the args.
+
+      The result is returned in %eax.  If this value >= 0, the call
+      succeeded, and this is the return value.  If < 0, it failed, and
+      the negation of this value is errno.  To be more specific, 
+      if res is in the range -EMEDIUMTYPE (-124) .. -EPERM (-1)
+      (kernel 2.4.9 sources, include/asm-i386/errno.h)
+      then it indicates an error.  Otherwise it doesn't.
+
+      Dirk Mueller (mueller@kde.org) says that values -4095 .. -1
+      (inclusive?) indicate error returns.  Not sure where the -4095
+      comes from.
+   */
+
+   MAYBE_PRINTF("SYSCALL[%d,%d](%3d): ", 
+                  VG_(getpid)(), tid, syscallno);
+
+   switch (syscallno) {
+
+      case __NR_exit:
+         VG_(panic)("syscall exit() not caught by the scheduler?!");
+         break;
+
+      case __NR_clone:
+         VG_(unimplemented)
+            ("clone(): not supported by Valgrind.\n   "
+             "We do now support programs linked against\n   "
+             "libpthread.so, though.  Re-run with -v and ensure that\n   "
+             "you are picking up Valgrind's implementation of libpthread.so.");
+         break;
+
+#     if defined(__NR_modify_ldt)
+      case __NR_modify_ldt:
+         VG_(nvidia_moan)();
+         VG_(unimplemented)
+            ("modify_ldt(): I (JRS) haven't investigated this yet; sorry.");
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls !!!!!!!!!!!!!!!!!!!!! */
+
+#     if defined(__NR_vhangup)
+      case __NR_vhangup: /* syscall 111 */
+         /* int vhangup(void); */
+         MAYBE_PRINTF("vhangup()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_iopl)
+      case __NR_iopl: /* syscall 110 */
+         /* int iopl(int level); */
+         MAYBE_PRINTF("iopl ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_getxattr)
+      case __NR_getxattr: /* syscall 229 */
+         /* ssize_t getxattr (const char *path, const char* name,
+                              void* value, size_t size); */
+         MAYBE_PRINTF("getxattr ( %p, %p, %p, %d )\n", 
+                        arg1,arg2,arg3, arg4);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "getxattr(path)", arg1 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "getxattr(name)", arg2 );
+         SYSCALL_TRACK( pre_mem_write, tst, "getxattr(value)", arg3, arg4 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0 
+                                  && arg3 != (Addr)NULL) {
+            VG_TRACK( post_mem_write, arg3, res );
+         }
+         break;
+#     endif
+      
+#     if defined(__NR_quotactl)
+      case __NR_quotactl: /* syscall 131 */
+         /* int quotactl(int cmd, char *special, int uid, caddr_t addr); */
+         MAYBE_PRINTF("quotactl (0x%x, %p, 0x%x, 0x%x )\n", 
+                        arg1,arg2,arg3, arg4);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "quotactl(special)", arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_truncate64)
+      case __NR_truncate64: /* syscall 193 */
+         /* int truncate64(const char *path, off64_t length); */
+         MAYBE_PRINTF("truncate64 ( %p, %lld )\n",
+                        arg1, ((ULong)arg2) | (((ULong) arg3) << 32));
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "truncate64(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_fdatasync)
+      case __NR_fdatasync: /* syscall 148 */
+         /* int fdatasync(int fd); */
+         MAYBE_PRINTF("fdatasync ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_msync) /* syscall 144 */
+      case __NR_msync:
+         /* int msync(const void *start, size_t length, int flags); */
+         MAYBE_PRINTF("msync ( %p, %d, %d )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_read, tst, "msync(start)", arg1, arg2 );
+         KERNEL_DO_SYSCALL(tid,res);  
+         break;
+#     endif
+
+#     if defined(__NR_getpmsg) /* syscall 188 */
+      case __NR_getpmsg: 
+      {
+      /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
+      /* int getpmsg(int fd, struct strbuf *ctrl, struct strbuf *data, 
+                             int *bandp, int *flagsp); */
+      struct strbuf {
+         int     maxlen;         /* no. of bytes in buffer */
+         int     len;            /* no. of bytes returned */
+         caddr_t buf;            /* pointer to data */
+      };
+      struct strbuf *ctrl;
+      struct strbuf *data;
+      MAYBE_PRINTF("getpmsg ( %d, %p, %p, %p, %p )\n",
+                      arg1,arg2,arg3,arg4,arg5);
+      ctrl = (struct strbuf *)arg2;
+      data = (struct strbuf *)arg3;
+      if (ctrl && ctrl->maxlen > 0)
+          SYSCALL_TRACK( pre_mem_write,tst, "getpmsg(ctrl)", 
+                                (UInt)ctrl->buf, ctrl->maxlen);
+      if (data && data->maxlen > 0)
+          SYSCALL_TRACK( pre_mem_write,tst, "getpmsg(data)", 
+                                 (UInt)data->buf, data->maxlen);
+      if (arg4)
+          SYSCALL_TRACK( pre_mem_write,tst, "getpmsg(bandp)", 
+                                (UInt)arg4, sizeof(int));
+      if (arg5)
+          SYSCALL_TRACK( pre_mem_write,tst, "getpmsg(flagsp)", 
+                                (UInt)arg5, sizeof(int));
+      KERNEL_DO_SYSCALL(tid,res);
+      if (!VG_(is_kerror)(res) && res == 0 && ctrl && ctrl->len > 0) {
+         VG_TRACK( post_mem_write, (UInt)ctrl->buf, ctrl->len);
+      }
+      if (!VG_(is_kerror)(res) && res == 0 && data && data->len > 0) {
+         VG_TRACK( post_mem_write, (UInt)data->buf, data->len);
+      }
+      }
+      break;
+#     endif
+
+
+#     if defined(__NR_putpmsg) /* syscall 189 */
+      case __NR_putpmsg: 
+      {
+      /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
+      /* int putpmsg(int fd, struct strbuf *ctrl, struct strbuf *data, 
+                             int band, int flags); */
+      struct strbuf {
+         int     maxlen;         /* no. of bytes in buffer */
+         int     len;            /* no. of bytes returned */
+         caddr_t buf;            /* pointer to data */
+      };
+      struct strbuf *ctrl;
+      struct strbuf *data;
+      MAYBE_PRINTF("putpmsg ( %d, %p, %p, %d, %d )\n",
+                     arg1,arg2,arg3,arg4,arg5);
+      ctrl = (struct strbuf *)arg2;
+      data = (struct strbuf *)arg3;
+      if (ctrl && ctrl->len > 0)
+          SYSCALL_TRACK( pre_mem_read,tst, "putpmsg(ctrl)",
+                                (UInt)ctrl->buf, ctrl->len);
+      if (data && data->len > 0)
+          SYSCALL_TRACK( pre_mem_read,tst, "putpmsg(data)",
+                                (UInt)data->buf, data->len);
+      KERNEL_DO_SYSCALL(tid,res);
+      }
+      break;
+#     endif
+
+      case __NR_getitimer: /* syscall 105 */
+         /* int getitimer(int which, struct itimerval *value); */
+         MAYBE_PRINTF("getitimer ( %d, %p )\n", arg1, arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "getitimer(timer)", arg2, 
+                           sizeof(struct itimerval) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg2 != (Addr)NULL) {
+            VG_TRACK( post_mem_write,arg2, sizeof(struct itimerval));
+         }
+         break;
+
+#     if defined(__NR_syslog)
+      case __NR_syslog: /* syscall 103 */
+         /* int syslog(int type, char *bufp, int len); */
+         MAYBE_PRINTF("syslog (%d, %p, %d)\n",arg1,arg2,arg3);
+         switch(arg1) {
+            case 2: case 3: case 4:
+               SYSCALL_TRACK( pre_mem_write, tst, "syslog(buf)", arg2, arg3);
+	       break;
+            default: 
+               break;
+         }
+         KERNEL_DO_SYSCALL(tid, res);
+         if (!VG_(is_kerror)(res)) {
+            switch (arg1) {
+               case 2: case 3: case 4:
+                  VG_TRACK( post_mem_write, arg2, arg3 );
+                  break;
+               default:
+                  break;
+            }
+         }
+         break;
+#     endif
+
+      case __NR_personality: /* syscall 136 */
+         /* int personality(unsigned long persona); */
+         MAYBE_PRINTF("personality ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_chroot: /* syscall 61 */
+         /* int chroot(const char *path); */
+         MAYBE_PRINTF("chroot ( %p )\n", arg1);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "chroot(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_madvise)
+      case __NR_madvise: /* syscall 219 */
+         /* int madvise(void *start, size_t length, int advice ); */
+         MAYBE_PRINTF("madvise ( %p, %d, %d )\n", arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_mremap)
+      /* Treating it like an munmap() followed by a mmap() */
+      case __NR_mremap: /* syscall 163 */
+         /* void* mremap(void * old_address, size_t old_size, 
+                         size_t new_size, unsigned long flags); */
+         MAYBE_PRINTF("mremap ( %p, %d, %d, 0x%x )\n", 
+                        arg1, arg2, arg3, arg4);
+         SYSCALL_TRACK( pre_mem_write, tst, "mremap(old_address)", arg1, arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            mremap_segment( arg1, arg2, (Addr)res, arg3 );
+         }
+         break;         
+#     endif
+
+      case __NR_nice: /* syscall 34 */
+         /* int nice(int inc); */
+         MAYBE_PRINTF("nice ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      /* !!!!!!!!!! New, untested syscalls, 14 Mar 02 !!!!!!!!!! */
+
+#     if defined(__NR_setresgid32)
+      case __NR_setresgid32: /* syscall 210 */
+         /* int setresgid(gid_t rgid, gid_t egid, gid_t sgid); */
+         MAYBE_PRINTF("setresgid32 ( %d, %d, %d )\n", arg1, arg2, arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setfsuid32)
+      case __NR_setfsuid32: /* syscall 215 */
+         /* int setfsuid(uid_t fsuid); */
+          MAYBE_PRINTF("setfsuid ( %d )\n", arg1);
+          KERNEL_DO_SYSCALL(tid,res);
+          break;
+#     endif
+
+#     if defined(__NR__sysctl)
+      case __NR__sysctl:
+      /* int _sysctl(struct __sysctl_args *args); */
+         MAYBE_PRINTF("_sysctl ( %p )\n", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "_sysctl(args)", arg1, 
+                            sizeof(struct __sysctl_args) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg1, sizeof(struct __sysctl_args) );
+         break;
+#     endif
+
+#     if defined(__NR_sched_getscheduler)
+      case __NR_sched_getscheduler:
+         /* int sched_getscheduler(pid_t pid); */
+         MAYBE_PRINTF("sched_getscheduler ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_sched_setscheduler)
+      case __NR_sched_setscheduler:
+         /* int sched_setscheduler(pid_t pid, int policy, 
+                const struct sched_param *p); */
+         MAYBE_PRINTF("sched_setscheduler ( %d, %d, %p )\n",arg1,arg2,arg3);
+         if (arg3 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_read, tst,
+                              "sched_setscheduler(struct sched_param *p)", 
+                              arg3, sizeof(struct sched_param));
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_mlock)
+      case __NR_mlock:
+         /* int mlock(const void * addr, size_t len) */
+         MAYBE_PRINTF("mlock ( %p, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_mlockall)
+      case __NR_mlockall:
+         /* int mlockall(int flags); */
+         MAYBE_PRINTF("mlockall ( %x )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_munlockall)
+      case __NR_munlockall:
+         /* int munlockall(void); */
+         MAYBE_PRINTF("munlockall ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#if   defined(__NR_sched_get_priority_max)
+      case __NR_sched_get_priority_max:
+         /* int sched_get_priority_max(int policy); */
+         MAYBE_PRINTF("sched_get_priority_max ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#if   defined(__NR_sched_get_priority_min)
+      case __NR_sched_get_priority_min: /* syscall 160 */
+         /* int sched_get_priority_min(int policy); */
+         MAYBE_PRINTF("sched_get_priority_min ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#if   defined(__NR_setpriority)
+      case __NR_setpriority: /* syscall 97 */
+         /* int setpriority(int which, int who, int prio); */
+         MAYBE_PRINTF("setpriority ( %d, %d, %d )\n", arg1, arg2, arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#if   defined(__NR_getpriority)
+      case __NR_getpriority: /* syscall 96 */
+         /* int getpriority(int which, int who); */
+         MAYBE_PRINTF("getpriority ( %d, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setfsgid)
+      case __NR_setfsgid: /* syscall 139 */
+         /* int setfsgid(gid_t gid); */
+         MAYBE_PRINTF("setfsgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setregid)
+      case __NR_setregid: /* syscall 71 */
+         /* int setregid(gid_t rgid, gid_t egid); */
+         MAYBE_PRINTF("setregid ( %d, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setresuid)
+      case __NR_setresuid: /* syscall 164 */
+         /* int setresuid(uid_t ruid, uid_t euid, uid_t suid); */
+         MAYBE_PRINTF("setresuid ( %d, %d, %d )\n", arg1, arg2, arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setfsuid)
+      case __NR_setfsuid: /* syscall 138 */
+         /* int setfsuid(uid_t uid); */
+         MAYBE_PRINTF("setfsuid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 8 Mar 02 !!!!!!!!!!! */
+
+#     if defined(__NR_sendfile)
+      case __NR_sendfile: /* syscall 187 */
+         /* ssize_t sendfile(int out_fd, int in_fd, off_t *offset, 
+                             size_t count) */
+         MAYBE_PRINTF("sendfile ( %d, %d, %p, %d )\n",arg1,arg2,arg3,arg4);
+         if (arg3 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "sendfile(offset)", arg3, sizeof(off_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg3 != (UInt)NULL) {
+            VG_TRACK( post_mem_write, arg3, sizeof( off_t ) );
+         }
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 7 Mar 02 !!!!!!!!!!! */
+
+#     if defined(__NR_pwrite)
+      case __NR_pwrite: /* syscall 181 */
+         /* ssize_t pwrite (int fd, const void *buf, size_t nbytes,
+                            off_t offset); */
+         MAYBE_PRINTF("pwrite ( %d, %p, %d, %d )\n", arg1, arg2, arg3, arg4);
+         SYSCALL_TRACK( pre_mem_read, tst, "pwrite(buf)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 6 Mar 02 !!!!!!!!!!! */
+
+      case __NR_sync: /* syscall 36 */
+         /* int sync(); */
+         MAYBE_PRINTF("sync ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break; 
+ 
+      case __NR_fstatfs: /* syscall 100 */
+         /* int fstatfs(int fd, struct statfs *buf); */
+         MAYBE_PRINTF("fstatfs ( %d, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "stat(buf)", arg2, sizeof(struct statfs) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct statfs) );
+         break;
+
+      /* !!!!!!!!!! New, untested syscalls, 4 Mar 02 !!!!!!!!!!! */
+
+      case __NR_pause: /* syscall 29 */
+         /* int pause(void); */
+         MAYBE_PRINTF("pause ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_getsid: /* syscall 147 */
+         /* pid_t getsid(pid_t pid); */
+         MAYBE_PRINTF("getsid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_pread)
+      case __NR_pread: /* syscall 180 */
+         /* ssize_t pread(int fd, void *buf, size_t count, off_t offset); */
+         MAYBE_PRINTF("pread ( %d, %p, %d, %d ) ...\n",arg1,arg2,arg3,arg4);
+         SYSCALL_TRACK( pre_mem_write, tst, "pread(buf)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         MAYBE_PRINTF("SYSCALL[%d]       pread ( %d, %p, %d, %d ) --> %d\n",
+                        VG_(getpid)(),
+                        arg1, arg2, arg3, arg4, res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            VG_TRACK( post_mem_write, arg2, res );
+         }
+         break;
+#     endif
+
+      /* !!!!!!!!!! New, untested syscalls, 27 Feb 02 !!!!!!!!!! */
+
+      case __NR_mknod: /* syscall 14 */
+         /* int mknod(const char *pathname, mode_t mode, dev_t dev); */
+         MAYBE_PRINTF("mknod ( %p, 0x%x, 0x%x )\n", arg1, arg2, arg3 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "mknod(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_flock: /* syscall 143 */
+         /* int flock(int fd, int operation); */
+         MAYBE_PRINTF("flock ( %d, %d )\n", arg1, arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_rt_sigsuspend)
+      /* Viewed with great suspicion by me, but, hey, let's do it
+         anyway ... */
+      case __NR_rt_sigsuspend: /* syscall 179 */
+         /* int sigsuspend(const sigset_t *mask); */
+         MAYBE_PRINTF("sigsuspend ( %p )\n", arg1 );
+         if (arg1 != (Addr)NULL) {
+            /* above NULL test is paranoia */
+            SYSCALL_TRACK( pre_mem_read, tst, "sigsuspend(mask)", arg1, 
+                              sizeof(vki_ksigset_t) );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_init_module: /* syscall 128 */
+         /* int init_module(const char *name, struct module *image); */
+         MAYBE_PRINTF("init_module ( %p, %p )\n", arg1, arg2 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "init_module(name)", arg1 );
+         SYSCALL_TRACK( pre_mem_read, tst, "init_module(image)", arg2, 
+                           VKI_SIZEOF_STRUCT_MODULE );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_ioperm: /* syscall 101 */
+         /* int ioperm(unsigned long from, unsigned long num, int turn_on); */
+         MAYBE_PRINTF("ioperm ( %d, %d, %d )\n", arg1, arg2, arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_capget: /* syscall 184 */
+         /* int capget(cap_user_header_t header, cap_user_data_t data); */
+         MAYBE_PRINTF("capget ( %p, %p )\n", arg1, arg2 );
+         SYSCALL_TRACK( pre_mem_read, tst, "capget(header)", arg1, 
+                                             sizeof(vki_cap_user_header_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "capget(data)", arg2, 
+                                           sizeof( vki_cap_user_data_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg2 != (Addr)NULL)
+            VG_TRACK( post_mem_write, arg2, sizeof( vki_cap_user_data_t) );
+         break;
+
+      /* !!!!!!!!!!!!!!!!!!!!! mutant ones !!!!!!!!!!!!!!!!!!!!! */
+
+      case __NR_execve:
+         /* int execve (const char *filename, 
+                        char *const argv [], 
+                        char *const envp[]); */
+         MAYBE_PRINTF("execve ( %p(%s), %p, %p ) --- NOT CHECKED\n", 
+                        arg1, arg1, arg2, arg3);
+         /* Resistance is futile.  Nuke all other threads.  POSIX
+            mandates this. */
+            VG_(nuke_all_threads_except)( tid );
+         /* Make any binding for LD_PRELOAD disappear, so that child
+            processes don't get traced into. */
+         if (!VG_(clo_trace_children)) {
+            Int i;
+            Char** envp = (Char**)arg3;
+            Char*  ld_preload_str = NULL;
+            Char*  ld_library_path_str = NULL;
+            for (i = 0; envp[i] != NULL; i++) {
+               if (VG_(strncmp)(envp[i], "LD_PRELOAD=", 11) == 0)
+                  ld_preload_str = &envp[i][11];
+               if (VG_(strncmp)(envp[i], "LD_LIBRARY_PATH=", 16) == 0)
+                  ld_library_path_str = &envp[i][16];
+            }
+            VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH)(
+	       ld_preload_str, ld_library_path_str );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         /* Should we still be alive here?  Don't think so. */
+         /* Actually, above comment is wrong.  execve can fail, just
+            like any other syscall -- typically the file to exec does
+            not exist.  Hence: */
+         vg_assert(VG_(is_kerror)(res));
+         break;
+
+      /* !!!!!!!!!!!!!!!!!!!!!     end     !!!!!!!!!!!!!!!!!!!!! */
+
+      case __NR_access: /* syscall 33 */
+         /* int access(const char *pathname, int mode); */
+         MAYBE_PRINTF("access ( %p, %d )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "access(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_alarm: /* syscall 27 */
+         /* unsigned int alarm(unsigned int seconds); */
+         MAYBE_PRINTF("alarm ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_brk: /* syscall 45 */
+         /* Haven't a clue if this is really right. */
+         /* int brk(void *end_data_segment); */
+         MAYBE_PRINTF("brk ( %p ) --> ",arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         MAYBE_PRINTF("0x%x\n", res);
+
+         if (!VG_(is_kerror)(res)) {
+            if (arg1 == 0) {
+               /* Just asking where the current end is. (???) */
+               curr_dataseg_end = res;
+            } else
+            if (arg1 < curr_dataseg_end) {
+               /* shrinking the data segment. */
+               VG_TRACK( die_mem_brk, (Addr)arg1, 
+                                      curr_dataseg_end-arg1 );
+               curr_dataseg_end = arg1;
+            } else
+            if (arg1 > curr_dataseg_end && res != 0) {
+               /* asked for more memory, and got it */
+               /* 
+               VG_(printf)("BRK: new area %x .. %x\n", 
+                           VG_(curr_dataseg_end, arg1-1 );
+               */
+               VG_TRACK( new_mem_brk, (Addr)curr_dataseg_end, 
+                                         arg1-curr_dataseg_end );
+               curr_dataseg_end = arg1;         
+            }
+         }
+         break;
+
+      case __NR_chdir: /* syscall 12 */
+         /* int chdir(const char *path); */
+         MAYBE_PRINTF("chdir ( %p )\n", arg1);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "chdir(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_chmod: /* syscall 15 */
+         /* int chmod(const char *path, mode_t mode); */
+         MAYBE_PRINTF("chmod ( %p, %d )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "chmod(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_chown32)
+      case __NR_chown32: /* syscall 212 */
+#     endif
+#     if defined(__NR_lchown32)
+      case __NR_lchown32: /* syscall 198 */
+#     endif
+      case __NR_chown: /* syscall 16 */
+         /* int chown(const char *path, uid_t owner, gid_t group); */
+         MAYBE_PRINTF("chown ( %p, 0x%x, 0x%x )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "chown(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_close: /* syscall 6 */
+         /* int close(int fd); */
+         MAYBE_PRINTF("close ( %d )\n",arg1);
+         /* Detect and negate attempts by the client to close Valgrind's
+            logfile fd ... */
+         if (arg1 == VG_(clo_logfile_fd)) {
+            VG_(message)(Vg_UserMsg, 
+              "Warning: client attempted to close "
+               "Valgrind's logfile fd (%d).", 
+               VG_(clo_logfile_fd));
+            VG_(message)(Vg_UserMsg, 
+              "   Use --logfile-fd=<number> to select an "
+              "alternative logfile fd." );
+            /* Pretend the close succeeded, regardless.  (0 == success) */
+            res = 0;
+            SET_EAX(tid, res);
+         } else {
+            KERNEL_DO_SYSCALL(tid,res);
+         }
+         break;
+
+      case __NR_dup: /* syscall 41 */
+         /* int dup(int oldfd); */
+         MAYBE_PRINTF("dup ( %d ) --> ", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         MAYBE_PRINTF("%d\n", res);
+         break;
+
+      case __NR_dup2: /* syscall 63 */
+         /* int dup2(int oldfd, int newfd); */
+         MAYBE_PRINTF("dup2 ( %d, %d ) ...\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         MAYBE_PRINTF("SYSCALL[%d]       dup2 ( %d, %d ) = %d\n", 
+                        VG_(getpid)(), 
+                        arg1, arg2, res);
+         break;
+
+      case __NR_fcntl: /* syscall 55 */
+         /* int fcntl(int fd, int cmd, int arg); */
+         MAYBE_PRINTF("fcntl ( %d, %d, %d )\n",arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_fchdir: /* syscall 133 */
+         /* int fchdir(int fd); */
+         MAYBE_PRINTF("fchdir ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_fchown32)
+      case __NR_fchown32: /* syscall 207 */
+#     endif
+      case __NR_fchown: /* syscall 95 */
+         /* int fchown(int filedes, uid_t owner, gid_t group); */
+         MAYBE_PRINTF("fchown ( %d, %d, %d )\n", arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_fchmod: /* syscall 94 */
+         /* int fchmod(int fildes, mode_t mode); */
+         MAYBE_PRINTF("fchmod ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_fcntl64)
+      case __NR_fcntl64: /* syscall 221 */
+         /* I don't know what the prototype for this is supposed to be. */
+         /* ??? int fcntl(int fd, int cmd); */
+         MAYBE_PRINTF("fcntl64 (?!) ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_fstat: /* syscall 108 */
+         /* int fstat(int filedes, struct stat *buf); */
+         MAYBE_PRINTF("fstat ( %d, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "fstat", arg2, sizeof(struct stat) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat) );
+         break;
+
+      case __NR_vfork: /* syscall 190 */
+         /* pid_t vfork(void); */
+         MAYBE_PRINTF("vfork ( ) ... becomes ... ");
+         /* KLUDGE: we prefer to do a fork rather than vfork. 
+            vfork gives a SIGSEGV, and the stated semantics looks
+            pretty much impossible for us. */
+         tst->m_eax = __NR_fork;
+         /* fall through ... */
+      case __NR_fork: /* syscall 2 */
+         /* pid_t fork(void); */
+         MAYBE_PRINTF("fork ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         if (res == 0) {
+            /* I am the child.  Nuke all other threads which I might
+               have inherited from my parent.  POSIX mandates this. */
+            VG_(nuke_all_threads_except)( tid );
+         }
+         break;
+
+      case __NR_fsync: /* syscall 118 */
+         /* int fsync(int fd); */
+         MAYBE_PRINTF("fsync ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_ftruncate: /* syscall 93 */
+         /* int ftruncate(int fd, size_t length); */
+         MAYBE_PRINTF("ftruncate ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_ftruncate64)
+      case __NR_ftruncate64: /* syscall 194 */
+         /* int ftruncate64(int fd, off64_t length); */
+         MAYBE_PRINTF("ftruncate64 ( %d, %lld )\n", 
+                        arg1,arg2|((long long) arg3 << 32));
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_getdents: /* syscall 141 */
+         /* int getdents(unsigned int fd, struct dirent *dirp, 
+                         unsigned int count); */
+         MAYBE_PRINTF("getdents ( %d, %p, %d )\n",arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getdents(dirp)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0)
+            VG_TRACK( post_mem_write, arg2, res );
+         break;
+
+#     if defined(__NR_getdents64)
+      case __NR_getdents64: /* syscall 220 */
+         /* int getdents(unsigned int fd, struct dirent64 *dirp, 
+                         unsigned int count); */
+         MAYBE_PRINTF("getdents64 ( %d, %p, %d )\n",arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getdents64(dirp)", arg2, arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0)
+            VG_TRACK( post_mem_write, arg2, res );
+         break;
+#     endif
+
+#     if defined(__NR_getgroups32)
+      case __NR_getgroups32: /* syscall 205 */
+#     endif
+      case __NR_getgroups: /* syscall 80 */
+         /* int getgroups(int size, gid_t list[]); */
+         MAYBE_PRINTF("getgroups ( %d, %p )\n", arg1, arg2);
+         if (arg1 > 0)
+            SYSCALL_TRACK( pre_mem_write, tst, "getgroups(list)", arg2, 
+                               arg1 * sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (arg1 > 0 && !VG_(is_kerror)(res) && res > 0)
+            VG_TRACK( post_mem_write, arg2, res * sizeof(gid_t) );
+         break;
+
+      case __NR_getcwd: /* syscall 183 */
+         /* char *getcwd(char *buf, size_t size); */
+         MAYBE_PRINTF("getcwd ( %p, %d )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "getcwd(buf)", arg1, arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res != (Addr)NULL)
+            VG_TRACK( post_mem_write, arg1, arg2 );
+         /* Not really right -- really we should have the asciiz
+            string starting at arg1 readable, or up to arg2 bytes,
+            whichever finishes first. */
+         break;
+
+      case __NR_geteuid: /* syscall 49 */
+         /* uid_t geteuid(void); */
+         MAYBE_PRINTF("geteuid ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_geteuid32)
+      case __NR_geteuid32: /* syscall 201 */
+         /* ?? uid_t geteuid32(void); */
+         MAYBE_PRINTF("geteuid32(?) ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_getegid: /* syscall 50 */
+         /* gid_t getegid(void); */
+         MAYBE_PRINTF("getegid ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_getegid32)
+      case __NR_getegid32: /* syscall 202 */
+         /* gid_t getegid32(void); */
+         MAYBE_PRINTF("getegid32 ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_getgid: /* syscall 47 */
+         /* gid_t getgid(void); */
+         MAYBE_PRINTF("getgid ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_getgid32)
+      case __NR_getgid32: /* syscall 200 */
+         /* gid_t getgid32(void); */
+         MAYBE_PRINTF("getgid32 ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_getpid: /* syscall 20 */
+         /* pid_t getpid(void); */
+         MAYBE_PRINTF("getpid ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_getpgid: /* syscall 132 */
+         /* pid_t getpgid(pid_t pid); */
+         MAYBE_PRINTF("getpgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_getpgrp: /* syscall 65 */
+         /* pid_t getpprp(void); */
+         MAYBE_PRINTF("getpgrp ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_getppid: /* syscall 64 */
+         /* pid_t getppid(void); */
+         MAYBE_PRINTF("getppid ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_getresgid: /* syscall 171 */
+         /* int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); */
+         MAYBE_PRINTF("getresgid ( %p, %p, %p )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid(rgid)", arg1, sizeof(gid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid(egid)", arg2, sizeof(gid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid(sgid)", arg3, sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg1, sizeof(gid_t) );
+            VG_TRACK( post_mem_write, arg2, sizeof(gid_t) );
+            VG_TRACK( post_mem_write, arg3, sizeof(gid_t) );
+         }
+         break;
+
+#     if defined(__NR_getresgid32)
+      case __NR_getresgid32: /* syscall 211 */
+         /* int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); */
+         MAYBE_PRINTF("getresgid32 ( %p, %p, %p )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid32(rgid)", arg1, sizeof(gid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid32(egid)", arg2, sizeof(gid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresgid32(sgid)", arg3, sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg1, sizeof(gid_t) );
+            VG_TRACK( post_mem_write, arg2, sizeof(gid_t) );
+            VG_TRACK( post_mem_write, arg3, sizeof(gid_t) );
+         }
+         break;
+#     endif
+
+      case __NR_getresuid: /* syscall 165 */
+         /* int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); */
+         MAYBE_PRINTF("getresuid ( %p, %p, %p )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid(ruid)", arg1, sizeof(uid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid(euid)", arg2, sizeof(uid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid(suid)", arg3, sizeof(uid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg1, sizeof(uid_t) );
+            VG_TRACK( post_mem_write, arg2, sizeof(uid_t) );
+            VG_TRACK( post_mem_write, arg3, sizeof(uid_t) );
+         }
+         break;
+
+#     if defined(__NR_getresuid32)
+      case __NR_getresuid32: /* syscall 209 */
+         /* int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); */
+         MAYBE_PRINTF("getresuid32 ( %p, %p, %p )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid32(ruid)", arg1, sizeof(uid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid32(euid)", arg2, sizeof(uid_t) );
+         SYSCALL_TRACK( pre_mem_write, tst, "getresuid32(suid)", arg3, sizeof(uid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg1, sizeof(uid_t) );
+            VG_TRACK( post_mem_write, arg2, sizeof(uid_t) );
+            VG_TRACK( post_mem_write, arg3, sizeof(uid_t) );
+         }
+         break;
+#     endif
+
+#     if defined(__NR_ugetrlimit)
+      case __NR_ugetrlimit: /* syscall 191 */
+#     endif
+      case __NR_getrlimit: /* syscall 76 */
+         /* int getrlimit (int resource, struct rlimit *rlim); */
+         MAYBE_PRINTF("getrlimit ( %d, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "getrlimit(rlim)", arg2, 
+                           sizeof(struct rlimit) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0)
+            VG_TRACK( post_mem_write, arg2, sizeof(struct rlimit) );
+         break;
+
+      case __NR_getrusage: /* syscall 77 */
+         /* int getrusage (int who, struct rusage *usage); */
+         MAYBE_PRINTF("getrusage ( %d, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "getrusage(usage)", arg2, 
+                           sizeof(struct rusage) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0)
+            VG_TRACK( post_mem_write,arg2, sizeof(struct rusage) );
+         break;
+
+      case __NR_gettimeofday: /* syscall 78 */
+         /* int gettimeofday(struct timeval *tv, struct timezone *tz); */
+         MAYBE_PRINTF("gettimeofday ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "gettimeofday(tv)", arg1, 
+                           sizeof(struct timeval) );
+         if (arg2 != 0)
+            SYSCALL_TRACK( pre_mem_write, tst, "gettimeofday(tz)", arg2, 
+                              sizeof(struct timezone) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg1, sizeof(struct timeval) );
+            if (arg2 != 0)
+               VG_TRACK( post_mem_write, arg2, sizeof(struct timezone) );
+         }
+         break;
+
+      case __NR_getuid: /* syscall 24 */
+         /* uid_t getuid(void); */
+         MAYBE_PRINTF("getuid ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_getuid32)
+      case __NR_getuid32: /* syscall 199 */
+         /* ???uid_t getuid32(void); */
+         MAYBE_PRINTF("getuid32 ( )\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+      case __NR_ipc: /* syscall 117 */
+         /* int ipc ( unsigned int call, int first, int second, 
+                      int third, void *ptr, long fifth); */
+         {
+         UInt arg6 = tst->m_ebp;
+
+         MAYBE_PRINTF("ipc ( %d, %d, %d, %d, %p, %d )\n",
+                        arg1,arg2,arg3,arg4,arg5,arg6);
+         switch (arg1 /* call */) {
+            case 1: /* IPCOP_semop */
+               SYSCALL_TRACK( pre_mem_read, tst, "semop(sops)", arg5, 
+                                  arg3 * sizeof(struct sembuf) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case 2: /* IPCOP_semget */
+            case 3: /* IPCOP_semctl */
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case 11: /* IPCOP_msgsnd */
+               {
+                  struct msgbuf *msgp = (struct msgbuf *)arg5;
+                  Int msgsz = arg3;
+
+                  SYSCALL_TRACK( pre_mem_read, tst, "msgsnd(msgp->mtype)", 
+                                     (UInt)&msgp->mtype, sizeof(msgp->mtype) );
+                  SYSCALL_TRACK( pre_mem_read, tst, "msgsnd(msgp->mtext)", 
+                                     (UInt)msgp->mtext, msgsz );
+
+                  KERNEL_DO_SYSCALL(tid,res);
+                  break;
+               }
+            case 12: /* IPCOP_msgrcv */
+               {
+                  struct msgbuf *msgp;
+                  Int msgsz = arg3;
+ 
+                  msgp = (struct msgbuf *)deref_Addr( tst,
+                            (Addr) (&((struct ipc_kludge *)arg5)->msgp),
+                            "msgrcv(msgp)" );
+
+                  SYSCALL_TRACK( pre_mem_write, tst, "msgrcv(msgp->mtype)", 
+                                     (UInt)&msgp->mtype, sizeof(msgp->mtype) );
+                  SYSCALL_TRACK( pre_mem_write, tst, "msgrcv(msgp->mtext)", 
+                                     (UInt)msgp->mtext, msgsz );
+
+                  KERNEL_DO_SYSCALL(tid,res);
+
+                  if ( !VG_(is_kerror)(res) && res > 0 ) {
+                     VG_TRACK( post_mem_write, (UInt)&msgp->mtype, sizeof(msgp->mtype) );
+                     VG_TRACK( post_mem_write, (UInt)msgp->mtext, res );
+                  }
+                  break;
+               }
+            case 13: /* IPCOP_msgget */
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case 14: /* IPCOP_msgctl */
+               {
+                  switch (arg3 /* cmd */) {
+                     case IPC_STAT:
+                        SYSCALL_TRACK( pre_mem_write, tst, "msgctl(buf)", arg5, 
+                                           sizeof(struct msqid_ds) );
+                        KERNEL_DO_SYSCALL(tid,res);
+                        if ( !VG_(is_kerror)(res) && res > 0 ) {
+                           VG_TRACK( post_mem_write, arg5, sizeof(struct msqid_ds) );
+                        }
+                        break;
+                     case IPC_SET:
+                        SYSCALL_TRACK( pre_mem_read, tst, "msgctl(buf)", arg5, 
+                                           sizeof(struct msqid_ds) );
+                        KERNEL_DO_SYSCALL(tid,res);
+                        break;
+#                    if defined(IPC_64)
+                     case IPC_STAT|IPC_64:
+                        SYSCALL_TRACK( pre_mem_write, tst, "msgctl(buf)", arg5, 
+                                           sizeof(struct msqid64_ds) );
+                        KERNEL_DO_SYSCALL(tid,res);
+                        if ( !VG_(is_kerror)(res) && res > 0 ) {
+                           VG_TRACK( post_mem_write, arg5, sizeof(struct msqid64_ds) );
+                        }
+                        break;
+#                    endif
+#                    if defined(IPC_64)
+                     case IPC_SET|IPC_64:
+                        SYSCALL_TRACK( pre_mem_read, tst, "msgctl(buf)", arg5, 
+                                           sizeof(struct msqid64_ds) );
+                        KERNEL_DO_SYSCALL(tid,res);
+                        break;
+#                    endif
+                     default:
+                        KERNEL_DO_SYSCALL(tid,res);
+                        break;
+                  }
+                  break;
+               }
+            case 21: /* IPCOP_shmat */
+               {
+                  Int shmid = arg2;
+                  /*Int shmflag = arg3;*/
+                  Addr addr;
+
+                  KERNEL_DO_SYSCALL(tid,res);
+
+                  if ( VG_(is_kerror) ( res ) )
+                     break;
+                  
+                  /* force readability. before the syscall it is
+                   * indeed uninitialized, as can be seen in
+                   * glibc/sysdeps/unix/sysv/linux/shmat.c */
+                  VG_TRACK( post_mem_write, arg4, sizeof( ULong ) );
+
+                  addr = deref_Addr ( tst, arg4, "shmat(addr)" );
+                  if ( addr > 0 ) { 
+                     UInt segmentSize = get_shm_size ( shmid );
+                     if ( segmentSize > 0 ) {
+                        /* we don't distinguish whether it's read-only or
+                         * read-write -- it doesn't matter really. */
+                        VG_TRACK( post_mem_write, addr, segmentSize );
+                     }
+                  }
+                  break;
+               }
+            case 22: /* IPCOP_shmdt */
+                  KERNEL_DO_SYSCALL(tid,res);
+                  /* ### FIXME: this should call make_noaccess on the
+                   * area passed to shmdt. But there's no way to
+                   * figure out the size of the shared memory segment
+                   * just from the address...  Maybe we want to keep a
+                   * copy of the exiting mappings inside valgrind? */
+                  break;
+            case 23: /* IPCOP_shmget */
+                KERNEL_DO_SYSCALL(tid,res);
+                break;
+            case 24: /* IPCOP_shmctl */
+	      /* Subject: shmctl: The True Story
+                    Date: Thu, 9 May 2002 18:07:23 +0100 (BST)
+                    From: Reuben Thomas <rrt@mupsych.org>
+                      To: Julian Seward <jseward@acm.org>
+
+                 1. As you suggested, the syscall subop is in arg1.
+
+                 2. There are a couple more twists, so the arg order
+                    is actually:
+
+                 arg1 syscall subop
+                 arg2 file desc
+                 arg3 shm operation code (can have IPC_64 set)
+                 arg4 0 ??? is arg3-arg4 a 64-bit quantity when IPC_64
+                        is defined?
+                 arg5 pointer to buffer
+
+                 3. With this in mind, I've amended the case as below:
+	      */
+               {
+                  UInt cmd = arg3;
+                  Bool out_arg = False;
+                  if ( arg5 ) {
+#                    if defined(IPC_64)
+                     cmd = cmd & (~IPC_64);
+#                    endif
+                     out_arg = cmd == SHM_STAT || cmd == IPC_STAT;
+                     if ( out_arg )
+                        SYSCALL_TRACK( pre_mem_write, tst, 
+                           "shmctl(SHM_STAT or IPC_STAT,buf)", 
+                           arg5, sizeof(struct shmid_ds) );
+                     else
+                        SYSCALL_TRACK( pre_mem_read, tst, 
+                           "shmctl(SHM_XXXX,buf)", 
+                           arg5, sizeof(struct shmid_ds) );
+                  }
+                  KERNEL_DO_SYSCALL(tid,res);
+                  if ( arg5 && !VG_(is_kerror)(res) && res == 0 && out_arg )
+                          VG_TRACK( post_mem_write, arg5, sizeof(struct shmid_ds) );
+               }
+               break;
+            default:
+               VG_(message)(Vg_DebugMsg,
+                            "FATAL: unhandled syscall(ipc) %d",
+                            arg1 );
+               VG_(panic)("... bye!\n");
+               break; /*NOTREACHED*/
+         }
+         }
+         break;
+
+      case __NR_ioctl: /* syscall 54 */
+         /* int ioctl(int d, int request, ...)
+            [The  "third"  argument  is traditionally char *argp, 
+             and will be so named for this discussion.]
+         */
+         /*
+         VG_(message)(
+            Vg_DebugMsg, 
+            "is an IOCTL,  request = 0x%x,   d = %d,   argp = 0x%x", 
+            arg2,arg1,arg3);
+         */
+         MAYBE_PRINTF("ioctl ( %d, 0x%x, %p )\n",arg1,arg2,arg3);
+         switch (arg2 /* request */) {
+            case TCSETS:
+            case TCSETSW:
+            case TCSETSF:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(TCSET{S,SW,SF})", arg3, 
+                                 VKI_SIZEOF_STRUCT_TERMIOS );
+               KERNEL_DO_SYSCALL(tid,res);
+               break; 
+            case TCGETS:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(TCGETS)", arg3, 
+                                 VKI_SIZEOF_STRUCT_TERMIOS );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, VKI_SIZEOF_STRUCT_TERMIOS );
+               break;
+            case TCSETA:
+            case TCSETAW:
+            case TCSETAF:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(TCSET{A,AW,AF})", arg3,
+                                 VKI_SIZEOF_STRUCT_TERMIO );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case TCGETA:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(TCGETA)", arg3,
+                                 VKI_SIZEOF_STRUCT_TERMIO );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, VKI_SIZEOF_STRUCT_TERMIO );
+               break;
+            case TCSBRK:
+            case TCXONC:
+            case TCSBRKP:
+            case TCFLSH:
+               /* These just take an int by value */
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case TIOCGWINSZ:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(TIOCGWINSZ)", arg3, 
+                                 sizeof(struct winsize) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, sizeof(struct winsize) );
+               break;
+            case TIOCSWINSZ:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(TIOCSWINSZ)", arg3, 
+                                 sizeof(struct winsize) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case TIOCGPGRP:
+               /* Get process group ID for foreground processing group. */
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(TIOCGPGRP)", arg3,
+                                 sizeof(pid_t) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, sizeof(pid_t) );
+               break;
+            case TIOCSPGRP:
+               /* Set a process group ID? */
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(TIOCGPGRP)", arg3,
+                                 sizeof(pid_t) );
+               KERNEL_DO_SYSCALL(tid,res); 
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, sizeof(pid_t) );
+               break;
+            case TIOCGPTN: /* Get Pty Number (of pty-mux device) */
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(TIOCGPTN)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                   VG_TRACK( post_mem_write, arg3, sizeof(int));
+               break;
+            case TIOCSCTTY:
+               /* Just takes an int value.  */
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case TIOCSPTLCK: /* Lock/unlock Pty */
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(TIOCSPTLCK)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case FIONBIO:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(FIONBIO)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case FIOASYNC:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(FIOASYNC)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case FIONREAD:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(FIONREAD)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, sizeof(int) );
+               break;
+
+            /* If you get compilation problems here, change the #if
+               1 to #if 0 and get rid of <scsi/sg.h> in
+               vg_unsafe.h. */
+#       if 1
+            case SG_SET_COMMAND_Q:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(SG_SET_COMMAND_Q)", 
+                                 arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+#           if defined(SG_IO)
+            case SG_IO:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(SG_IO)", arg3, 
+                                 sizeof(struct sg_io_hdr) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct sg_io_hdr));
+               break;
+#           endif /* SG_IO */
+            case SG_GET_SCSI_ID:
+               /* Note: sometimes sg_scsi_id is called sg_scsi_id_t */
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(SG_GET_SCSI_ID)", arg3, 
+                                 sizeof(struct sg_scsi_id) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct sg_scsi_id));
+               break;
+            case SG_SET_RESERVED_SIZE:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(SG_SET_RESERVED_SIZE)", 
+                                 arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case SG_SET_TIMEOUT:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(SG_SET_TIMEOUT)", arg3, 
+                                 sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case SG_GET_RESERVED_SIZE:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(SG_GET_RESERVED_SIZE)", arg3, 
+                                 sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(int));
+               break;
+            case SG_GET_TIMEOUT:
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(SG_GET_TIMEOUT)", arg3, 
+                                 sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(int));
+               break;
+            case SG_GET_VERSION_NUM:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(SG_GET_VERSION_NUM)", 
+                                 arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+#       endif
+
+            case IIOCGETCPS:
+               /* In early 2.4 kernels, ISDN_MAX_CHANNELS was only defined
+                * when KERNEL was. I never saw a larger value than 64 though */
+#              ifndef ISDN_MAX_CHANNELS
+#              define ISDN_MAX_CHANNELS 64
+#              endif
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(IIOCGETCPS)", arg3,
+                                 ISDN_MAX_CHANNELS 
+                                 * 2 * sizeof(unsigned long) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, ISDN_MAX_CHANNELS 
+                                        * 2 * sizeof(unsigned long) );
+               break;
+            case IIOCNETGPN:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(IIOCNETGPN)",
+                                 (UInt)&((isdn_net_ioctl_phone *)arg3)->name,
+                                 sizeof(((isdn_net_ioctl_phone *)arg3)->name) );
+               SYSCALL_TRACK( pre_mem_write, tst, "ioctl(IIOCNETGPN)", arg3,
+                                 sizeof(isdn_net_ioctl_phone) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write, arg3, sizeof(isdn_net_ioctl_phone) );
+               break;
+
+            /* These all use struct ifreq AFAIK */
+            case SIOCGIFINDEX:
+            case SIOCGIFFLAGS:        /* get flags                    */
+            case SIOCGIFHWADDR:       /* Get hardware address         */
+            case SIOCGIFMTU:          /* get MTU size                 */
+            case SIOCGIFADDR:         /* get PA address               */
+            case SIOCGIFNETMASK:      /* get network PA mask          */
+            case SIOCGIFMETRIC:       /* get metric                   */
+            case SIOCGIFMAP:          /* Get device parameters        */
+            case SIOCGIFTXQLEN:       /* Get the tx queue length      */
+            case SIOCGIFDSTADDR:      /* get remote PA address        */
+            case SIOCGIFBRDADDR:      /* get broadcast PA address     */
+            case SIOCGIFNAME:         /* get iface name               */
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(SIOCGIFINDEX)", arg3, 
+                                sizeof(struct ifreq));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct ifreq));
+               break;
+            case SIOCGIFCONF:         /* get iface list               */
+               /* WAS:
+               SYSCALL_TRACK( pre_mem_write,"ioctl(SIOCGIFCONF)", arg3, 
+                                sizeof(struct ifconf));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct ifconf));
+               */
+               SYSCALL_TRACK( pre_mem_read,tst, "ioctl(SIOCGIFCONF)", arg3, 
+                                sizeof(struct ifconf));
+               if ( arg3 ) {
+                  // TODO len must be readable and writable
+                  // buf pointer only needs to be readable
+                  struct ifconf *ifc = (struct ifconf *) arg3;
+                  SYSCALL_TRACK( pre_mem_write,tst, "ioctl(SIOCGIFCONF).ifc_buf",
+                                   (Addr)(ifc->ifc_buf), (UInt)(ifc->ifc_len) );
+               }
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0 && arg3 ) {
+                  struct ifconf *ifc = (struct ifconf *) arg3;
+                  if (ifc->ifc_buf != NULL)
+                     VG_TRACK( post_mem_write, (Addr)(ifc->ifc_buf), 
+                                     (UInt)(ifc->ifc_len) );
+               }
+               break;
+            case SIOCGSTAMP:
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(SIOCGSTAMP)", arg3, 
+                                sizeof(struct timeval));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct timeval));
+               break;
+            case SIOCGRARP:           /* get RARP table entry         */
+            case SIOCGARP:            /* get ARP table entry          */
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(SIOCGARP)", arg3, 
+                                sizeof(struct arpreq));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct arpreq));
+               break;
+                    
+            case SIOCSIFFLAGS:        /* set flags                    */
+            case SIOCSIFMAP:          /* Set device parameters        */
+            case SIOCSIFTXQLEN:       /* Set the tx queue length      */
+            case SIOCSIFDSTADDR:      /* set remote PA address        */
+            case SIOCSIFBRDADDR:      /* set broadcast PA address     */
+            case SIOCSIFNETMASK:      /* set network PA mask          */
+            case SIOCSIFMETRIC:       /* set metric                   */
+            case SIOCSIFADDR:         /* set PA address               */
+            case SIOCSIFMTU:          /* set MTU size                 */
+            case SIOCSIFHWADDR:       /* set hardware address         */
+               SYSCALL_TRACK( pre_mem_read,tst,"ioctl(SIOCSIFFLAGS)", arg3, 
+                                sizeof(struct ifreq));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            /* Routing table calls.  */
+            case SIOCADDRT:           /* add routing table entry      */
+            case SIOCDELRT:           /* delete routing table entry   */
+               SYSCALL_TRACK( pre_mem_read,tst,"ioctl(SIOCADDRT/DELRT)", arg3, 
+                                sizeof(struct rtentry));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            /* RARP cache control calls. */
+            case SIOCDRARP:           /* delete RARP table entry      */
+            case SIOCSRARP:           /* set RARP table entry         */
+            /* ARP cache control calls. */
+            case SIOCSARP:            /* set ARP table entry          */
+            case SIOCDARP:            /* delete ARP table entry       */
+               SYSCALL_TRACK( pre_mem_read,tst, "ioctl(SIOCSIFFLAGS)", arg3, 
+                                sizeof(struct ifreq));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SIOCSPGRP:
+               SYSCALL_TRACK( pre_mem_read, tst, "ioctl(SIOCSPGRP)", arg3, sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            /* linux/soundcard interface (OSS) */
+            case SNDCTL_SEQ_GETOUTCOUNT:
+            case SNDCTL_SEQ_GETINCOUNT:
+            case SNDCTL_SEQ_PERCMODE:
+            case SNDCTL_SEQ_TESTMIDI:
+            case SNDCTL_SEQ_RESETSAMPLES:
+            case SNDCTL_SEQ_NRSYNTHS:
+            case SNDCTL_SEQ_NRMIDIS:
+            case SNDCTL_SEQ_GETTIME:
+            case SNDCTL_DSP_GETFMTS:
+            case SNDCTL_DSP_GETTRIGGER:
+            case SNDCTL_DSP_GETODELAY:
+#           if defined(SNDCTL_DSP_GETSPDIF)
+            case SNDCTL_DSP_GETSPDIF:
+#           endif
+            case SNDCTL_DSP_GETCAPS:
+            case SOUND_PCM_READ_RATE:
+            case SOUND_PCM_READ_CHANNELS:
+            case SOUND_PCM_READ_BITS:
+            case (SOUND_PCM_READ_BITS|0x40000000): /* what the fuck ? */
+            case SOUND_PCM_READ_FILTER:
+               SYSCALL_TRACK( pre_mem_write,tst,"ioctl(SNDCTL_XXX|SOUND_XXX (SIOR, int))", 
+                                arg3,
+                                sizeof(int));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(int));
+               break;
+            case SNDCTL_SEQ_CTRLRATE:
+            case SNDCTL_DSP_SPEED:
+            case SNDCTL_DSP_STEREO:
+            case SNDCTL_DSP_GETBLKSIZE: 
+            case SNDCTL_DSP_CHANNELS:
+            case SOUND_PCM_WRITE_FILTER:
+            case SNDCTL_DSP_SUBDIVIDE:
+            case SNDCTL_DSP_SETFRAGMENT:
+#           if defined(SNDCTL_DSP_GETCHANNELMASK)
+            case SNDCTL_DSP_GETCHANNELMASK:
+#           endif
+#           if defined(SNDCTL_DSP_BIND_CHANNEL)
+            case SNDCTL_DSP_BIND_CHANNEL:
+#           endif
+            case SNDCTL_TMR_TIMEBASE:
+            case SNDCTL_TMR_TEMPO:
+            case SNDCTL_TMR_SOURCE:
+            case SNDCTL_MIDI_PRETIME:
+            case SNDCTL_MIDI_MPUMODE:
+               SYSCALL_TRACK( pre_mem_read,tst, "ioctl(SNDCTL_XXX|SOUND_XXX "
+                                     "(SIOWR, int))", 
+                                arg3, sizeof(int));
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(SNDCTL_XXX|SOUND_XXX "
+                                     "(SIOWR, int))", 
+                                arg3, sizeof(int));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case SNDCTL_DSP_GETOSPACE:
+            case SNDCTL_DSP_GETISPACE:
+               SYSCALL_TRACK( pre_mem_write,tst, 
+                                "ioctl(SNDCTL_XXX|SOUND_XXX "
+                                "(SIOR, audio_buf_info))", arg3,
+                                sizeof(audio_buf_info));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(audio_buf_info));
+               break;
+            case SNDCTL_DSP_SETTRIGGER:
+               SYSCALL_TRACK( pre_mem_read,tst, "ioctl(SNDCTL_XXX|SOUND_XXX (SIOW, int))", 
+                                arg3, sizeof(int));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            /* Real Time Clock (/dev/rtc) ioctls */
+#           ifndef GLIBC_2_1
+            case RTC_UIE_ON:
+            case RTC_UIE_OFF:
+            case RTC_AIE_ON:
+            case RTC_AIE_OFF:
+            case RTC_PIE_ON:
+            case RTC_PIE_OFF:
+            case RTC_IRQP_SET:
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case RTC_RD_TIME:
+            case RTC_ALM_READ:
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(RTC_RD_TIME/ALM_READ)", arg3,
+                                sizeof(struct rtc_time));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(struct rtc_time));
+               break;
+            case RTC_ALM_SET:
+               SYSCALL_TRACK( pre_mem_read,tst, "ioctl(RTC_ALM_SET)", arg3,
+                                sizeof(struct rtc_time));
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+            case RTC_IRQP_READ:
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(RTC_IRQP_READ)", arg3,
+                                sizeof(unsigned long));
+               KERNEL_DO_SYSCALL(tid,res);
+               if(!VG_(is_kerror) && res == 0)
+                   VG_TRACK( post_mem_write,arg3, sizeof(unsigned long));
+               break;
+#           endif /* GLIBC_2_1 */
+
+#           ifdef BLKGETSIZE
+            case BLKGETSIZE:
+               SYSCALL_TRACK( pre_mem_write,tst, "ioctl(BLKGETSIZE)", arg3,
+                                sizeof(unsigned long));
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res == 0)
+                  VG_TRACK( post_mem_write,arg3, sizeof(unsigned long));
+               break;
+#           endif /* BLKGETSIZE */
+
+            /* CD ROM stuff (??)  */
+            case CDROMSUBCHNL:
+                SYSCALL_TRACK( pre_mem_read,tst, "ioctl(CDROMSUBCHNL (cdsc_format, char))",
+                   (int) &(((struct cdrom_subchnl *) arg3)->cdsc_format), 
+                   sizeof(((struct cdrom_subchnl *) arg3)->cdsc_format));
+                SYSCALL_TRACK( pre_mem_write,tst, "ioctl(CDROMSUBCHNL)", arg3, 
+                   sizeof(struct cdrom_subchnl));
+                KERNEL_DO_SYSCALL(tid,res);
+                if (!VG_(is_kerror)(res) && res == 0)
+                   VG_TRACK( post_mem_write,arg3, sizeof(struct cdrom_subchnl));
+                break;
+            case CDROMREADTOCHDR:
+                SYSCALL_TRACK( pre_mem_write,tst, "ioctl(CDROMREADTOCHDR)", arg3, 
+                   sizeof(struct cdrom_tochdr));
+                KERNEL_DO_SYSCALL(tid,res);
+                if (!VG_(is_kerror)(res) && res == 0)
+                   VG_TRACK( post_mem_write,arg3, sizeof(struct cdrom_tochdr));
+                break;
+            case CDROMREADTOCENTRY:
+                 SYSCALL_TRACK( pre_mem_read,tst, "ioctl(CDROMREADTOCENTRY (cdte_format, char))",
+                    (int) &(((struct cdrom_tocentry *) arg3)->cdte_format), 
+                    sizeof(((struct cdrom_tocentry *) arg3)->cdte_format));
+                 SYSCALL_TRACK( pre_mem_read,tst, "ioctl(CDROMREADTOCENTRY (cdte_track, char))",
+                    (int) &(((struct cdrom_tocentry *) arg3)->cdte_track), 
+                    sizeof(((struct cdrom_tocentry *) arg3)->cdte_track));
+                 SYSCALL_TRACK( pre_mem_write,tst, "ioctl(CDROMREADTOCENTRY)", arg3, 
+                    sizeof(struct cdrom_tocentry));
+                 KERNEL_DO_SYSCALL(tid,res);
+                 if (!VG_(is_kerror)(res) && res == 0)
+                    VG_TRACK( post_mem_write,arg3, sizeof(struct cdrom_tochdr));
+                 break;
+            case CDROMPLAYMSF:
+                 SYSCALL_TRACK( pre_mem_read,tst, "ioctl(CDROMPLAYMSF)", arg3, 
+                    sizeof(struct cdrom_msf));
+                 KERNEL_DO_SYSCALL(tid,res);
+                 break;
+            /* We don't have any specific information on it, so
+               try to do something reasonable based on direction and
+               size bits.  The encoding scheme is described in
+               /usr/include/asm/ioctl.h.  
+
+               According to Simon Hausmann, _IOC_READ means the kernel
+               writes a value to the ioctl value passed from the user
+               space and the other way around with _IOC_WRITE. */
+            default: {
+               UInt dir  = _IOC_DIR(arg2);
+               UInt size = _IOC_SIZE(arg2);
+               if (/* size == 0 || */ dir == _IOC_NONE) {
+                  VG_(message)(Vg_UserMsg, 
+                     "Warning: noted but unhandled ioctl 0x%x"
+                     " with no size/direction hints",
+                     arg2); 
+                  VG_(message)(Vg_UserMsg, 
+                     "   This could cause spurious value errors"
+                     " to appear.");
+                  VG_(message)(Vg_UserMsg, 
+                     "   See README_MISSING_SYSCALL_OR_IOCTL for guidance on"
+                     " writing a proper wrapper." );
+               } else {
+                  if ((dir & _IOC_WRITE) && size > 0)
+                     SYSCALL_TRACK( pre_mem_read,tst, "ioctl(generic)", arg3, size);
+                  if ((dir & _IOC_READ) && size > 0)
+                     SYSCALL_TRACK( pre_mem_write,tst, "ioctl(generic)", arg3, size);
+               }
+               KERNEL_DO_SYSCALL(tid,res);
+               if (size > 0 && (dir & _IOC_READ)
+                   && !VG_(is_kerror)(res) && res == 0
+                   && arg3 != (Addr)NULL)
+                  VG_TRACK( post_mem_write,arg3, size);
+               break;
+            }
+         }
+         break;
+
+      case __NR_kill: /* syscall 37 */
+         /* int kill(pid_t pid, int sig); */
+         MAYBE_PRINTF("kill ( %d, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_link: /* syscall 9 */
+         /* int link(const char *oldpath, const char *newpath); */
+         MAYBE_PRINTF("link ( %p, %p)\n", arg1, arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "link(oldpath)", arg1);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "link(newpath)", arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_lseek: /* syscall 19 */
+         /* off_t lseek(int fildes, off_t offset, int whence); */
+         MAYBE_PRINTF("lseek ( %d, %d, %d )\n",arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR__llseek: /* syscall 140 */
+         /* int _llseek(unsigned int fd, unsigned long offset_high,       
+                        unsigned long  offset_low, 
+                        loff_t * result, unsigned int whence); */
+         MAYBE_PRINTF("llseek ( %d, 0x%x, 0x%x, %p, %d )\n",
+                        arg1,arg2,arg3,arg4,arg5);
+         SYSCALL_TRACK( pre_mem_write, tst, "llseek(result)", arg4, sizeof(loff_t));
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0)
+            VG_TRACK( post_mem_write, arg4, sizeof(loff_t) );
+         break;
+
+      case __NR_lstat: /* syscall 107 */
+         /* int lstat(const char *file_name, struct stat *buf); */
+         MAYBE_PRINTF("lstat ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "lstat(file_name)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "lstat(buf)", arg2, sizeof(struct stat) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat) );
+         }
+         break;
+
+#     if defined(__NR_lstat64)
+      case __NR_lstat64: /* syscall 196 */
+         /* int lstat64(const char *file_name, struct stat64 *buf); */
+         MAYBE_PRINTF("lstat64 ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "lstat64(file_name)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "lstat64(buf)", arg2, sizeof(struct stat64) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res == 0) {
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat64) );
+         }
+         break;
+#     endif
+
+      case __NR_mkdir: /* syscall 39 */
+         /* int mkdir(const char *pathname, mode_t mode); */
+         MAYBE_PRINTF("mkdir ( %p, %d )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "mkdir(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_mmap2)
+      case __NR_mmap2: /* syscall 192 */
+         /* My impression is that this is exactly like __NR_mmap 
+            except that all 6 args are passed in regs, rather than in 
+            a memory-block. */
+         /* void* mmap(void *start, size_t length, int prot, 
+                       int flags, int fd, off_t offset); 
+         */
+         if (VG_(clo_trace_syscalls)) {
+            UInt arg6 = tst->m_ebp;
+            VG_(printf)("mmap2 ( %p, %d, %d, %d, %d, %d )\n",
+                        arg1, arg2, arg3, arg4, arg5, arg6 );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            mmap_segment( (Addr)res, arg2, arg3, arg5 );
+         }
+         break;
+#     endif
+
+      case __NR_mmap: /* syscall 90 */
+         /* void* mmap(void *start, size_t length, int prot, 
+                       int flags, int fd, off_t offset); 
+         */
+         SYSCALL_TRACK( pre_mem_read, tst, "mmap(args)", arg1, 6*sizeof(UInt) );
+         {
+            UInt* arg_block = (UInt*)arg1;
+            UInt arg6;
+            arg1 = arg_block[0];
+            arg2 = arg_block[1];
+            arg3 = arg_block[2];
+            arg4 = arg_block[3];
+            arg5 = arg_block[4];
+            arg6 = arg_block[5];
+            MAYBE_PRINTF("mmap ( %p, %d, %d, %d, %d, %d )\n",
+                        arg1, arg2, arg3, arg4, arg5, arg6 );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            mmap_segment( (Addr)res, arg2, arg3, arg5 );
+         }
+         break;
+
+      case __NR_mprotect: /* syscall 125 */
+         /* int mprotect(const void *addr, size_t len, int prot); */
+         /* should addr .. addr+len-1 be checked before the call? */
+         MAYBE_PRINTF("mprotect ( %p, %d, %d )\n", arg1,arg2,arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            mprotect_segment( arg1, arg2, arg3 );
+         }
+         break;
+
+      case __NR_munmap: /* syscall 91 */
+         /* int munmap(void *start, size_t length); */
+         /* should start .. start+length-1 be checked before the call? */
+         MAYBE_PRINTF("munmap ( %p, %d )\n", arg1,arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            munmap_segment( arg1, arg2 );
+         }
+         break;
+
+      case __NR_nanosleep: /* syscall 162 */
+         /* int nanosleep(const struct timespec *req, struct timespec *rem); */
+         MAYBE_PRINTF("nanosleep ( %p, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read, tst, "nanosleep(req)", arg1, 
+                                              sizeof(struct timespec) );
+         if (arg2 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "nanosleep(rem)", arg2, 
+                               sizeof(struct timespec) );
+         KERNEL_DO_SYSCALL(tid,res);
+         /* Somewhat bogus ... is only written by the kernel if
+            res == -1 && errno == EINTR. */
+         if (!VG_(is_kerror)(res) && arg2 != (UInt)NULL)
+            VG_TRACK( post_mem_write, arg2, sizeof(struct timespec) );
+         break;
+
+      case __NR__newselect: /* syscall 142 */
+         /* int select(int n,  
+                       fd_set *readfds, fd_set *writefds, fd_set *exceptfds, 
+                       struct timeval *timeout);
+         */
+         MAYBE_PRINTF("newselect ( %d, %p, %p, %p, %p )\n",
+                        arg1,arg2,arg3,arg4,arg5);
+         if (arg2 != 0)
+            SYSCALL_TRACK( pre_mem_read, tst, "newselect(readfds)",   
+                              arg2, arg1/8 /* __FD_SETSIZE/8 */ );
+         if (arg3 != 0)
+            SYSCALL_TRACK( pre_mem_read, tst, "newselect(writefds)",  
+                              arg3, arg1/8 /* __FD_SETSIZE/8 */ );
+         if (arg4 != 0)
+            SYSCALL_TRACK( pre_mem_read, tst, "newselect(exceptfds)", 
+                              arg4, arg1/8 /* __FD_SETSIZE/8 */ );
+         if (arg5 != 0)
+            SYSCALL_TRACK( pre_mem_read, tst, "newselect(timeout)", arg5, 
+                              sizeof(struct timeval) );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+         
+      case __NR_open: /* syscall 5 */
+         /* int open(const char *pathname, int flags); */
+         MAYBE_PRINTF("open ( %p(%s), %d ) --> ",arg1,arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "open(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         MAYBE_PRINTF("%d\n",res);
+         break;
+
+      case __NR_pipe: /* syscall 42 */
+         /* int pipe(int filedes[2]); */
+         MAYBE_PRINTF("pipe ( %p ) ...\n", arg1);
+         SYSCALL_TRACK( pre_mem_write, tst, "pipe(filedes)", arg1, 2*sizeof(int) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg1, 2*sizeof(int) );
+         if (VG_(clo_trace_syscalls) && !VG_(is_kerror)(res))
+            VG_(printf)("SYSCALL[%d]       pipe --> (rd %d, wr %d)\n", 
+                        VG_(getpid)(), 
+                        ((UInt*)arg1)[0], ((UInt*)arg1)[1] );
+         break;
+
+      case __NR_poll: /* syscall 168 */
+         /* struct pollfd {
+               int fd;           -- file descriptor
+               short events;     -- requested events
+               short revents;    -- returned events
+            };
+           int poll(struct pollfd *ufds, unsigned int nfds, 
+                                         int timeout) 
+         */
+         MAYBE_PRINTF("poll ( %p, %d, %d )\n",arg1,arg2,arg3);
+         /* In fact some parts of this struct should be readable too.
+            This should be fixed properly. */
+         SYSCALL_TRACK( pre_mem_write, tst, "poll(ufds)", 
+                           arg1, arg2 * sizeof(struct pollfd) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            Int i;
+            struct pollfd * arr = (struct pollfd *)arg1;
+            for (i = 0; i < arg2; i++)
+               VG_TRACK( post_mem_write, (Addr)(&arr[i].revents), sizeof(Short) );
+         }
+         break;
+ 
+      case __NR_readlink: /* syscall 85 */
+         /* int readlink(const char *path, char *buf, size_t bufsiz); */
+         MAYBE_PRINTF("readlink ( %p, %p, %d )\n", arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "readlink(path)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "readlink(buf)", arg2,arg3 );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            VG_TRACK( post_mem_write, arg2, res );
+         }
+         break;
+
+      case __NR_readv: { /* syscall 145 */
+         /* int readv(int fd, const struct iovec * vector, size_t count); */
+         UInt i;
+         struct iovec * vec;
+         MAYBE_PRINTF("readv ( %d, %p, %d )\n",arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_read, tst, "readv(vector)", 
+                           arg2, arg3 * sizeof(struct iovec) );
+         /* ToDo: don't do any of the following if the vector is invalid */
+         vec = (struct iovec *)arg2;
+         for (i = 0; i < arg3; i++)
+            SYSCALL_TRACK( pre_mem_write, tst, "readv(vector[...])",
+                              (UInt)vec[i].iov_base,vec[i].iov_len );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && res > 0) {
+            /* res holds the number of bytes read. */
+            for (i = 0; i < arg3; i++) {
+               Int nReadThisBuf = vec[i].iov_len;
+               if (nReadThisBuf > res) nReadThisBuf = res;
+               VG_TRACK( post_mem_write, (UInt)vec[i].iov_base, nReadThisBuf );
+               res -= nReadThisBuf;
+               if (res < 0) VG_(panic)("readv: res < 0");
+            }
+         }
+         break;
+      }
+
+      case __NR_rename: /* syscall 38 */
+         /* int rename(const char *oldpath, const char *newpath); */
+         MAYBE_PRINTF("rename ( %p, %p )\n", arg1, arg2 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "rename(oldpath)", arg1 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "rename(newpath)", arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_rmdir: /* syscall 40 */
+         /* int rmdir(const char *pathname); */
+         MAYBE_PRINTF("rmdir ( %p )\n", arg1);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "rmdir(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_sched_setparam: /* syscall 154 */
+         /* int sched_setparam(pid_t pid, const struct sched_param *p); */
+         MAYBE_PRINTF("sched_setparam ( %d, %p )\n", arg1, arg2 );
+         SYSCALL_TRACK( pre_mem_read, tst, "sched_setparam(ptr)",
+                           arg2, sizeof(struct sched_param) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct sched_param) );
+         break;
+
+      case __NR_sched_getparam: /* syscall 155 */
+         /* int sched_getparam(pid_t pid, struct sched_param *p); */
+         MAYBE_PRINTF("sched_getparam ( %d, %p )\n", arg1, arg2 );
+         SYSCALL_TRACK( pre_mem_write, tst, "sched_getparam(ptr)",
+                           arg2, sizeof(struct sched_param) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct sched_param) );
+         break;
+
+      case __NR_sched_yield: /* syscall 158 */
+         /* int sched_yield(void); */
+         MAYBE_PRINTF("sched_yield ()\n" );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_select: /* syscall 82 */
+         /* struct sel_arg_struct {
+              unsigned long n;
+              fd_set *inp, *outp, *exp;
+              struct timeval *tvp;
+            };
+            int old_select(struct sel_arg_struct *arg);
+         */
+         SYSCALL_TRACK( pre_mem_read, tst, "select(args)", arg1, 5*sizeof(UInt) );
+         {
+            UInt* arg_struct = (UInt*)arg1;
+            arg1 = arg_struct[0];
+            arg2 = arg_struct[1];
+            arg3 = arg_struct[2];
+            arg4 = arg_struct[3];
+            arg5 = arg_struct[4];
+
+            MAYBE_PRINTF("select ( %d, %p, %p, %p, %p )\n", 
+                         arg1,arg2,arg3,arg4,arg5);
+            if (arg2 != (Addr)NULL)
+               SYSCALL_TRACK( pre_mem_read, tst, "select(readfds)", arg2, 
+                                          arg1/8 /* __FD_SETSIZE/8 */ );
+            if (arg3 != (Addr)NULL)
+               SYSCALL_TRACK( pre_mem_read, tst, "select(writefds)", arg3, 
+                                          arg1/8 /* __FD_SETSIZE/8 */ );
+            if (arg4 != (Addr)NULL)
+               SYSCALL_TRACK( pre_mem_read, tst, "select(exceptfds)", arg4, 
+                                          arg1/8 /* __FD_SETSIZE/8 */ );
+            if (arg5 != (Addr)NULL)
+               SYSCALL_TRACK( pre_mem_read, tst, "select(timeout)", arg5, 
+                                          sizeof(struct timeval) );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_setitimer: /* syscall 104 */
+         /* setitimer(int which, const struct itimerval *value,
+                                 struct itimerval *ovalue); */
+         MAYBE_PRINTF("setitimer ( %d, %p, %p )\n", arg1,arg2,arg3);
+         if (arg2 != (Addr)NULL)
+            SYSCALL_TRACK( pre_mem_read,tst, "setitimer(value)", 
+                             arg2, sizeof(struct itimerval) );
+         if (arg3 != (Addr)NULL)
+            SYSCALL_TRACK( pre_mem_write,tst, "setitimer(ovalue)", 
+                             arg3, sizeof(struct itimerval));
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg3 != (Addr)NULL) {
+            VG_TRACK( post_mem_write,arg3, sizeof(struct itimerval));
+         }
+         break;
+
+#     if defined(__NR_setfsgid32)
+      case __NR_setfsgid32: /* syscall 216 */
+         /* int setfsgid(uid_t fsgid); */
+         MAYBE_PRINTF("setfsgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setgid32)
+      case __NR_setgid32: /* syscall 214 */
+#     endif
+      case __NR_setgid: /* syscall 46 */
+         /* int setgid(gid_t gid); */
+         MAYBE_PRINTF("setgid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_setsid: /* syscall 66 */
+         /* pid_t setsid(void); */
+         MAYBE_PRINTF("setsid ()\n");
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_setgroups32)
+      case __NR_setgroups32: /* syscall 206 */
+#     endif
+      case __NR_setgroups: /* syscall 81 */
+         /* int setgroups(size_t size, const gid_t *list); */
+         MAYBE_PRINTF("setgroups ( %d, %p )\n", arg1, arg2);
+         if (arg1 > 0)
+            SYSCALL_TRACK( pre_mem_read, tst, "setgroups(list)", arg2, 
+                               arg1 * sizeof(gid_t) );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_setpgid: /* syscall 57 */
+         /* int setpgid(pid_t pid, pid_t pgid); */
+         MAYBE_PRINTF("setpgid ( %d, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_setregid32)
+      case __NR_setregid32: /* syscall 204 */
+         /* int setregid(gid_t rgid, gid_t egid); */
+         MAYBE_PRINTF("setregid32(?) ( %d, %d )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setresuid32)
+      case __NR_setresuid32: /* syscall 208 */
+         /* int setresuid(uid_t ruid, uid_t euid, uid_t suid); */
+         MAYBE_PRINTF("setresuid32(?) ( %d, %d, %d )\n", arg1, arg2, arg3);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+#     endif
+
+#     if defined(__NR_setreuid32)
+      case __NR_setreuid32: /* syscall 203 */
+#     endif
+      case __NR_setreuid: /* syscall 70 */
+         /* int setreuid(uid_t ruid, uid_t euid); */
+         MAYBE_PRINTF("setreuid ( 0x%x, 0x%x )\n", arg1, arg2);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_setrlimit: /* syscall 75 */
+         /* int setrlimit (int resource, const struct rlimit *rlim); */
+         MAYBE_PRINTF("setrlimit ( %d, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read, tst, "setrlimit(rlim)", arg2, sizeof(struct rlimit) );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+#     if defined(__NR_setuid32)
+      case __NR_setuid32: /* syscall 213 */
+#     endif
+      case __NR_setuid: /* syscall 23 */
+         /* int setuid(uid_t uid); */
+         MAYBE_PRINTF("setuid ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_socketcall: /* syscall 102 */
+         /* int socketcall(int call, unsigned long *args); */
+         MAYBE_PRINTF("socketcall ( %d, %p )\n",arg1,arg2);
+         switch (arg1 /* request */) {
+
+            case SYS_SOCKETPAIR:
+               /* int socketpair(int d, int type, int protocol, int sv[2]); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.socketpair(args)", 
+                                 arg2, 4*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_write, tst, "socketcall.socketpair(sv)", 
+                                 ((UInt*)arg2)[3], 2*sizeof(int) );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res))
+                  VG_TRACK( post_mem_write, ((UInt*)arg2)[3], 2*sizeof(int) );
+               break;
+
+            case SYS_SOCKET:
+               /* int socket(int domain, int type, int protocol); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.socket(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_BIND:
+               /* int bind(int sockfd, struct sockaddr *my_addr, 
+                           int addrlen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.bind(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               pre_mem_read_sockaddr( tst, "socketcall.bind(my_addr.%s)",
+                  (struct sockaddr *) (((UInt*)arg2)[1]), ((UInt*)arg2)[2]);
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+               
+            case SYS_LISTEN:
+               /* int listen(int s, int backlog); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.listen(args)", 
+                                 arg2, 2*sizeof(Addr) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_ACCEPT: {
+               /* int accept(int s, struct sockaddr *addr, int *addrlen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.accept(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               {
+               Addr addr_p     = ((UInt*)arg2)[1];
+               Addr addrlen_p  = ((UInt*)arg2)[2];
+               buf_and_len_pre_check ( tst, addr_p, addrlen_p,
+                                       "socketcall.accept(addr)",
+                                       "socketcall.accept(addrlen_in)" );
+               KERNEL_DO_SYSCALL(tid,res);
+               buf_and_len_post_check ( tst, res, addr_p, addrlen_p,
+                                        "socketcall.accept(addrlen_out)" );
+               }
+               break;
+            }
+
+            case SYS_SENDTO:
+               /* int sendto(int s, const void *msg, int len, 
+                             unsigned int flags, 
+                             const struct sockaddr *to, int tolen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.sendto(args)", arg2, 
+                                 6*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.sendto(msg)",
+                                 ((UInt*)arg2)[1], /* msg */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               pre_mem_read_sockaddr( tst, "socketcall.sendto(to.%s)",
+                  (struct sockaddr *) (((UInt*)arg2)[4]), ((UInt*)arg2)[5]);
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_SEND:
+               /* int send(int s, const void *msg, size_t len, int flags); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.send(args)", arg2,
+                                 4*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.send(msg)",
+                                 ((UInt*)arg2)[1], /* msg */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_RECVFROM:
+               /* int recvfrom(int s, void *buf, int len, unsigned int flags,
+                               struct sockaddr *from, int *fromlen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.recvfrom(args)", 
+                                 arg2, 6*sizeof(Addr) );
+               {
+               Addr buf_p      = ((UInt*)arg2)[1];
+               Int  len        = ((UInt*)arg2)[2];
+               Addr from_p     = ((UInt*)arg2)[4];
+               Addr fromlen_p  = ((UInt*)arg2)[5];
+
+               SYSCALL_TRACK( pre_mem_write, tst, "socketcall.recvfrom(buf)", 
+                                             buf_p, len );
+               buf_and_len_pre_check ( tst, from_p, fromlen_p, 
+                                       "socketcall.recvfrom(from)",
+                                       "socketcall.recvfrom(fromlen_in)" );
+               KERNEL_DO_SYSCALL(tid,res);
+               buf_and_len_post_check ( tst, res, from_p, fromlen_p,
+                                        "socketcall.recvfrom(fromlen_out)" );
+               if (!VG_(is_kerror)(res))
+                  VG_TRACK( post_mem_write, buf_p, len );
+               }
+               break;
+
+            case SYS_RECV:
+               /* int recv(int s, void *buf, int len, unsigned int flags); */
+               /* man 2 recv says:
+               The  recv call is normally used only on a connected socket
+               (see connect(2)) and is identical to recvfrom with a  NULL
+               from parameter.
+               */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.recv(args)", 
+                                 arg2, 4*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_write, tst, "socketcall.recv(buf)", 
+                                 ((UInt*)arg2)[1], /* buf */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               KERNEL_DO_SYSCALL(tid,res);
+               if (!VG_(is_kerror)(res) && res >= 0 
+                                   && ((UInt*)arg2)[1] != (UInt)NULL) {
+                  VG_TRACK( post_mem_write, ((UInt*)arg2)[1], /* buf */
+                                 ((UInt*)arg2)[2]  /* len */ );
+               }
+               break;
+
+            case SYS_CONNECT:
+               /* int connect(int sockfd, 
+                              struct sockaddr *serv_addr, int addrlen ); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.connect(args)", 
+                                 arg2, 3*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.connect(serv_addr.sa_family)",
+                                 ((UInt*)arg2)[1], /* serv_addr */
+                                 sizeof (sa_family_t));
+               pre_mem_read_sockaddr( tst,
+                  "socketcall.connect(serv_addr.%s)",
+                  (struct sockaddr *) (((UInt*)arg2)[1]), ((UInt*)arg2)[2]);
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_SETSOCKOPT:
+               /* int setsockopt(int s, int level, int optname, 
+                                 const void *optval, int optlen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.setsockopt(args)", 
+                                 arg2, 5*sizeof(Addr) );
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.setsockopt(optval)",
+                                 ((UInt*)arg2)[3], /* optval */
+                                 ((UInt*)arg2)[4]  /* optlen */ );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_GETSOCKOPT:
+               /* int setsockopt(int s, int level, int optname, 
+                                 void *optval, socklen_t *optlen); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.getsockopt(args)", 
+                                 arg2, 5*sizeof(Addr) );
+               {
+               Addr optval_p  = ((UInt*)arg2)[3];
+               Addr optlen_p  = ((UInt*)arg2)[4];
+               /* vg_assert(sizeof(socklen_t) == sizeof(UInt)); */
+               buf_and_len_pre_check ( tst, optval_p, optlen_p,
+                                       "socketcall.getsockopt(optval)",
+                                       "socketcall.getsockopt(optlen)" );
+               KERNEL_DO_SYSCALL(tid,res);
+               buf_and_len_post_check ( tst, res, optval_p, optlen_p,
+                                        "socketcall.getsockopt(optlen_out)" );
+               }
+               break;
+
+            case SYS_GETSOCKNAME:
+               /* int getsockname(int s, struct sockaddr* name, int* namelen) */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.getsockname(args)",
+                                            arg2, 3*sizeof(Addr) );
+               {
+               Addr name_p     = ((UInt*)arg2)[1];
+               Addr namelen_p  = ((UInt*)arg2)[2];
+
+               buf_and_len_pre_check ( tst, name_p, namelen_p,
+                                       "socketcall.getsockname(name)",
+                                       "socketcall.getsockname(namelen_in)" );
+               KERNEL_DO_SYSCALL(tid,res);
+               buf_and_len_post_check ( tst, res, name_p, namelen_p,
+                                        "socketcall.getsockname(namelen_out)" );
+               }
+               break;
+
+            case SYS_GETPEERNAME:
+               /* int getpeername(int s, struct sockaddr* name, int* namelen) */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.getpeername(args)",
+                                            arg2, 3*sizeof(Addr) );
+               {
+               Addr name_p     = ((UInt*)arg2)[1];
+               Addr namelen_p  = ((UInt*)arg2)[2];
+               buf_and_len_pre_check ( tst, name_p, namelen_p,
+                                       "socketcall.getpeername(name)",
+                                       "socketcall.getpeername(namelen_in)" );
+               KERNEL_DO_SYSCALL(tid,res);
+               buf_and_len_post_check ( tst, res, name_p, namelen_p,
+                                        "socketcall.getpeername(namelen_out)" );
+               }
+               break;
+
+            case SYS_SHUTDOWN:
+               /* int shutdown(int s, int how); */
+               SYSCALL_TRACK( pre_mem_read, tst, "socketcall.shutdown(args)", 
+                                            arg2, 2*sizeof(Addr) );
+               KERNEL_DO_SYSCALL(tid,res);
+               break;
+
+            case SYS_SENDMSG:
+               {
+                  /* int sendmsg(int s, const struct msghdr *msg, int flags); */
+
+                  /* this causes warnings, and I don't get why. glibc bug?
+                   * (after all it's glibc providing the arguments array)
+                  SYSCALL_TRACK( pre_mem_read, "socketcall.sendmsg(args)", 
+                                     arg2, 3*sizeof(Addr) );
+                  */
+
+                  struct msghdr *msg = (struct msghdr *)((UInt *)arg2)[ 1 ];
+                  msghdr_foreachfield ( tst, msg, pre_mem_read_sendmsg );
+
+                  KERNEL_DO_SYSCALL(tid,res);
+                  break;
+               }
+
+            case SYS_RECVMSG:
+               {
+                  /* int recvmsg(int s, struct msghdr *msg, int flags); */
+
+                  /* this causes warnings, and I don't get why. glibc bug?
+                   * (after all it's glibc providing the arguments array)
+                  SYSCALL_TRACK( pre_mem_read, "socketcall.recvmsg(args)", 
+                                     arg2, 3*sizeof(Addr) );
+                  */
+
+                  struct msghdr *msg = (struct msghdr *)((UInt *)arg2)[ 1 ];
+                  msghdr_foreachfield ( tst, msg, pre_mem_write_recvmsg );
+
+                  KERNEL_DO_SYSCALL(tid,res);
+
+                  if ( !VG_(is_kerror)( res ) )
+                     msghdr_foreachfield( tst, msg, post_mem_write_recvmsg );
+
+                  break;
+               }
+
+            default:
+               VG_(message)(Vg_DebugMsg,"FATAL: unhandled socketcall 0x%x",arg1);
+               VG_(panic)("... bye!\n");
+               break; /*NOTREACHED*/
+         }
+         break;
+
+      case __NR_stat: /* syscall 106 */
+         /* int stat(const char *file_name, struct stat *buf); */
+         MAYBE_PRINTF("stat ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "stat(file_name)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "stat(buf)", arg2, sizeof(struct stat) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat) );
+         break;
+
+      case __NR_statfs: /* syscall 99 */
+         /* int statfs(const char *path, struct statfs *buf); */
+         MAYBE_PRINTF("statfs ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "statfs(path)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "stat(buf)", arg2, sizeof(struct statfs) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct statfs) );
+         break;
+
+      case __NR_symlink: /* syscall 83 */
+         /* int symlink(const char *oldpath, const char *newpath); */
+         MAYBE_PRINTF("symlink ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "symlink(oldpath)", arg1 );
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "symlink(newpath)", arg2 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break; 
+
+#     if defined(__NR_stat64)
+      case __NR_stat64: /* syscall 195 */
+         /* int stat64(const char *file_name, struct stat64 *buf); */
+         MAYBE_PRINTF("stat64 ( %p, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "stat64(file_name)", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "stat64(buf)", arg2, sizeof(struct stat64) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat64) );
+         break;
+#     endif
+
+#     if defined(__NR_fstat64)
+      case __NR_fstat64: /* syscall 197 */
+         /* int fstat64(int filedes, struct stat64 *buf); */
+         MAYBE_PRINTF("fstat64 ( %d, %p )\n",arg1,arg2);
+         SYSCALL_TRACK( pre_mem_write, tst, "fstat64(buf)", arg2, sizeof(struct stat64) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg2, sizeof(struct stat64) );
+         break;
+#     endif
+
+      case __NR_sysinfo: /* syscall 116 */
+         /* int sysinfo(struct sysinfo *info); */
+         MAYBE_PRINTF("sysinfo ( %p )\n",arg1);
+         SYSCALL_TRACK( pre_mem_write, tst, "sysinfo(info)", arg1, sizeof(struct sysinfo) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res))
+            VG_TRACK( post_mem_write, arg1, sizeof(struct sysinfo) );
+         break;
+
+      case __NR_time: /* syscall 13 */
+         /* time_t time(time_t *t); */
+         MAYBE_PRINTF("time ( %p )\n",arg1);
+         if (arg1 != (UInt)NULL) {
+            SYSCALL_TRACK( pre_mem_write, tst, "time", arg1, sizeof(time_t) );
+         }
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
+            VG_TRACK( post_mem_write, arg1, sizeof(time_t) );
+         }
+         break;
+
+      case __NR_times: /* syscall 43 */
+         /* clock_t times(struct tms *buf); */
+         MAYBE_PRINTF("times ( %p )\n",arg1);
+         SYSCALL_TRACK( pre_mem_write, tst, "times(buf)", arg1, sizeof(struct tms) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
+            VG_TRACK( post_mem_write, arg1, sizeof(struct tms) );
+         }
+         break;
+
+      case __NR_truncate: /* syscall 92 */
+         /* int truncate(const char *path, size_t length); */
+         MAYBE_PRINTF("truncate ( %p, %d )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "truncate(path)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_umask: /* syscall 60 */
+         /* mode_t umask(mode_t mask); */
+         MAYBE_PRINTF("umask ( %d )\n", arg1);
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_unlink: /* syscall 10 */
+         /* int unlink(const char *pathname) */
+         MAYBE_PRINTF("ulink ( %p )\n",arg1);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "unlink(pathname)", arg1 );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_uname: /* syscall 122 */
+         /* int uname(struct utsname *buf); */
+         MAYBE_PRINTF("uname ( %p )\n",arg1);
+         SYSCALL_TRACK( pre_mem_write, tst, "uname(buf)", arg1, sizeof(struct utsname) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res) && arg1 != (UInt)NULL) {
+            VG_TRACK( post_mem_write, arg1, sizeof(struct utsname) );
+         }
+         break;
+
+      case __NR_utime: /* syscall 30 */
+         /* int utime(const char *filename, struct utimbuf *buf); */
+         MAYBE_PRINTF("utime ( %p, %p )\n", arg1,arg2);
+         SYSCALL_TRACK( pre_mem_read_asciiz, tst, "utime(filename)", arg1 );
+         if (arg2 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_read, tst, "utime(buf)", arg2, 
+                                                 sizeof(struct utimbuf) );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+
+      case __NR_wait4: /* syscall 114 */
+         /* pid_t wait4(pid_t pid, int *status, int options,
+                        struct rusage *rusage) */
+         MAYBE_PRINTF("wait4 ( %d, %p, %d, %p )\n",
+                      arg1,arg2,arg3,arg4);
+         if (arg2 != (Addr)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "wait4(status)", arg2, sizeof(int) );
+         if (arg4 != (Addr)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "wait4(rusage)", arg4, 
+                              sizeof(struct rusage) );
+         KERNEL_DO_SYSCALL(tid,res);
+         if (!VG_(is_kerror)(res)) {
+            if (arg2 != (Addr)NULL)
+               VG_TRACK( post_mem_write, arg2, sizeof(int) );
+            if (arg4 != (Addr)NULL)
+               VG_TRACK( post_mem_write, arg4, sizeof(struct rusage) );
+         }
+         break;
+
+      case __NR_writev: { /* syscall 146 */
+         /* int writev(int fd, const struct iovec * vector, size_t count); */
+         UInt i;
+         struct iovec * vec;
+         MAYBE_PRINTF("writev ( %d, %p, %d )\n",arg1,arg2,arg3);
+         SYSCALL_TRACK( pre_mem_read, tst, "writev(vector)", 
+                           arg2, arg3 * sizeof(struct iovec) );
+         /* ToDo: don't do any of the following if the vector is invalid */
+         vec = (struct iovec *)arg2;
+         for (i = 0; i < arg3; i++)
+            SYSCALL_TRACK( pre_mem_read, tst, "writev(vector[...])",
+                              (UInt)vec[i].iov_base,vec[i].iov_len );
+         KERNEL_DO_SYSCALL(tid,res);
+         break;
+      }
+
+      /*-------------------------- SIGNALS --------------------------*/
+
+      /* Normally set to 1, so that Valgrind's signal-simulation machinery
+         is engaged.  Sometimes useful to disable (set to 0), for
+         debugging purposes, to make clients more deterministic. */
+#     define SIGNAL_SIMULATION 1
+
+      case __NR_sigaltstack: /* syscall 186 */
+         /* int sigaltstack(const stack_t *ss, stack_t *oss); */
+         MAYBE_PRINTF("sigaltstack ( %p, %p )\n",arg1,arg2);
+         if (arg1 != (UInt)NULL) {
+            SYSCALL_TRACK( pre_mem_read, tst, "sigaltstack(ss)", 
+                              arg1, sizeof(vki_kstack_t) );
+         }
+         if (arg2 != (UInt)NULL) {
+            SYSCALL_TRACK( pre_mem_write, tst, "sigaltstack(ss)", 
+                              arg1, sizeof(vki_kstack_t) );
+         }
+#        if SIGNAL_SIMULATION
+         VG_(do__NR_sigaltstack) (tid);
+         res = tst->m_eax;
+#        else
+         KERNEL_DO_SYSCALL(tid,res);
+#        endif
+         if (!VG_(is_kerror)(res) && res == 0 && arg2 != (UInt)NULL)
+            VG_TRACK( post_mem_write, arg2, sizeof(vki_kstack_t));
+         break;
+
+      case __NR_rt_sigaction:
+      case __NR_sigaction:
+         /* int sigaction(int signum, struct k_sigaction *act, 
+                                      struct k_sigaction *oldact); */
+         MAYBE_PRINTF("sigaction ( %d, %p, %p )\n",arg1,arg2,arg3);
+         if (arg2 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_read, tst, "sigaction(act)", 
+                              arg2, sizeof(vki_ksigaction));
+         if (arg3 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "sigaction(oldact)", 
+                              arg3, sizeof(vki_ksigaction));
+         /* We do this one ourselves! */
+#        if SIGNAL_SIMULATION
+         VG_(do__NR_sigaction)(tid);
+         res = tst->m_eax;
+#        else
+         /* debugging signals; when we don't handle them. */
+         KERNEL_DO_SYSCALL(tid,res);
+#        endif
+         if (!VG_(is_kerror)(res) && res == 0 && arg3 != (UInt)NULL)
+            VG_TRACK( post_mem_write, arg3, sizeof(vki_ksigaction));
+         break;
+
+      case __NR_rt_sigprocmask:
+      case __NR_sigprocmask:
+         /* int sigprocmask(int how, k_sigset_t *set, 
+                                     k_sigset_t *oldset); */
+         MAYBE_PRINTF("sigprocmask ( %d, %p, %p )\n",arg1,arg2,arg3);
+         if (arg2 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_read, tst, "sigprocmask(set)", 
+                              arg2, sizeof(vki_ksigset_t));
+         if (arg3 != (UInt)NULL)
+            SYSCALL_TRACK( pre_mem_write, tst, "sigprocmask(oldset)", 
+                              arg3, sizeof(vki_ksigset_t));
+#        if SIGNAL_SIMULATION
+         VG_(do__NR_sigprocmask) ( tid, 
+                                   arg1 /*how*/, 
+                                   (vki_ksigset_t*) arg2,
+                                   (vki_ksigset_t*) arg3 );
+         res = tst->m_eax;
+#        else
+         KERNEL_DO_SYSCALL(tid,res);
+#        endif
+         if (!VG_(is_kerror)(res) && res == 0 && arg3 != (UInt)NULL)
+            VG_TRACK( post_mem_write, arg3, sizeof(vki_ksigset_t));
+         break;
+      case __NR_sigpending: /* syscall 73 */
+#     if defined(__NR_rt_sigpending)
+      case __NR_rt_sigpending: /* syscall 176 */
+#     endif
+         /* int sigpending( sigset_t *set ) ; */
+         MAYBE_PRINTF( "sigpending ( %p )\n", arg1 );
+         SYSCALL_TRACK( pre_mem_write, tst, "sigpending(set)", 
+                           arg1, sizeof(vki_ksigset_t));
+#        if SIGNAL_SIMULATION
+         VG_(do_sigpending)( tid, (vki_ksigset_t*)arg1 );
+         res = 0;
+	 SET_EAX(tid, res);
+#        else
+         KERNEL_DO_SYSCALL(tid, res);
+#        endif
+         if ( !VG_( is_kerror )( res ) && res == 0 )
+            VG_TRACK( post_mem_write, arg1, sizeof( vki_ksigset_t ) ) ;
+         break ;
+
+      default:
+         VG_(message)
+            (Vg_DebugMsg,"FATAL: unhandled syscall: %d",syscallno);
+         VG_(message)
+            (Vg_DebugMsg,"Do not panic.  You may be able to fix this easily.");
+         VG_(message)
+            (Vg_DebugMsg,"Read the file README_MISSING_SYSCALL_OR_IOCTL.");
+         VG_(unimplemented)("no wrapper for the above system call");
+         vg_assert(3+3 == 7);
+         break; /*NOTREACHED*/
+   }
+
+   /* { void zzzmemscan(void); zzzmemscan(); } */
+
+   /* Do any post-syscall actions */
+   if (VG_(needs).syscall_wrapper) {
+      VGP_PUSHCC(VgpSkinSysWrap);
+      SK_(post_syscall)(tid, syscallno, pre_res, res, /*isBlocking*/False);
+      VGP_POPCC(VgpSkinSysWrap);
+   }
+
+   VGP_POPCC(VgpCoreSysWrap);
+}
+
+
+
+/* Perform pre-actions for a blocking syscall, but do not do the
+   syscall itself.
+
+   Because %eax is used both for the syscall number before the call
+   and the result value afterwards, we can't reliably use it to get
+   the syscall number.  So the caller has to pass it explicitly.  
+*/
+void* VG_(pre_known_blocking_syscall) ( ThreadId tid, Int syscallno )
+{
+   ThreadState* tst;
+   UInt         arg1, arg2, arg3;
+   void*        pre_res = 0;
+
+   VGP_PUSHCC(VgpCoreSysWrap);
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   tst              = & VG_(threads)[tid];
+   arg1             = tst->m_ebx;
+   arg2             = tst->m_ecx;
+   arg3             = tst->m_edx;
+   /*
+   arg4             = tst->m_esi;
+   arg5             = tst->m_edi;
+   */
+
+   if (VG_(needs).syscall_wrapper) {
+      VGP_PUSHCC(VgpSkinSysWrap);
+      pre_res = SK_(pre_syscall)(tid, syscallno, /*isBlocking*/True);
+      VGP_POPCC(VgpSkinSysWrap);
+   }
+
+   switch (syscallno) {
+
+      case __NR_read: /* syscall 3 */
+         /* size_t read(int fd, void *buf, size_t count); */
+         MAYBE_PRINTF(
+               "SYSCALL--PRE[%d,%d]       read ( %d, %p, %d )\n", 
+               VG_(getpid)(), tid,
+               arg1, arg2, arg3);
+         SYSCALL_TRACK( pre_mem_write, tst, "read(buf)", arg2, arg3 );
+         break;
+
+      case __NR_write: /* syscall 4 */
+         /* size_t write(int fd, const void *buf, size_t count); */
+         MAYBE_PRINTF(
+               "SYSCALL--PRE[%d,%d]       write ( %d, %p, %d )\n", 
+               VG_(getpid)(), tid,
+               arg1, arg2, arg3);
+         SYSCALL_TRACK( pre_mem_read, tst, "write(buf)", arg2, arg3 );
+         break;
+
+      default:
+         VG_(printf)("pre_known_blocking_syscall: unexpected %d\n", syscallno);
+         VG_(panic)("pre_known_blocking_syscall");
+         /*NOTREACHED*/
+         break;
+   }
+   VGP_POPCC(VgpCoreSysWrap);
+
+   return pre_res;      /* 0 if SK_(pre_syscall)() not called */
+}
+
+
+/* Perform post-actions for a blocking syscall, but do not do the
+   syscall itself.  
+
+   Because %eax is used both for the syscall number before the call
+   and the result value afterwards, we can't reliably use it to get
+   the syscall number.  So the caller has to pass it explicitly.  
+*/
+void VG_(post_known_blocking_syscall) ( ThreadId tid,
+                                        Int syscallno,
+                                        void* pre_res,
+                                        Int res )
+{
+   ThreadState* tst;
+   UInt         arg1, arg2, arg3;
+
+   VGP_PUSHCC(VgpCoreSysWrap);
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   tst              = & VG_(threads)[tid];
+   arg1             = tst->m_ebx;
+   arg2             = tst->m_ecx;
+   arg3             = tst->m_edx;
+   /*
+   arg4             = tst->m_esi;
+   arg5             = tst->m_edi;
+   */
+
+   switch (syscallno) {
+
+      case __NR_read: /* syscall 3 */
+         /* size_t read(int fd, void *buf, size_t count); */
+         MAYBE_PRINTF(
+               "SYSCALL-POST[%d,%d]       read ( %d, %p, %d ) --> %d\n", 
+               VG_(getpid)(), tid,
+               arg1, arg2, arg3, res);
+         if (!VG_(is_kerror)(res) && res > 0)
+            VG_TRACK( post_mem_write, arg2, res );
+         break;
+
+      case __NR_write: /* syscall 4 */
+         /* size_t write(int fd, const void *buf, size_t count); */
+         MAYBE_PRINTF(
+               "SYSCALL-POST[%d,%d]       write ( %d, %p, %d ) --> %d\n", 
+               VG_(getpid)(), tid,
+               arg1, arg2, arg3, res);
+         break;
+
+      default:
+         VG_(printf)("post_known_blocking_syscall: unexpected %d\n", 
+                     syscallno);
+         VG_(panic)("post_known_blocking_syscall");
+         /*NOTREACHED*/
+         break;
+   }
+
+   if (VG_(needs).syscall_wrapper) {
+      VGP_PUSHCC(VgpSkinSysWrap);
+      SK_(post_syscall)(tid, syscallno, pre_res, res, /*isBlocking*/True);
+      VGP_POPCC(VgpSkinSysWrap);
+   }
+
+   VGP_POPCC(VgpCoreSysWrap);
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                         vg_syscall_mem.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/vg_to_ucode.c b/vg_to_ucode.c
index 179c059..0447d8f 100644
--- a/vg_to_ucode.c
+++ b/vg_to_ucode.c
@@ -26,7 +26,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
@@ -40,12 +40,12 @@
 #define uInstr1   VG_(newUInstr1)
 #define uInstr2   VG_(newUInstr2)
 #define uInstr3   VG_(newUInstr3)
-#define dis       VG_(disassemble)
 #define nameIReg  VG_(nameOfIntReg)
 #define nameISize VG_(nameOfIntSize)
 #define newTemp   VG_(getNewTemp)
 #define uLiteral  VG_(setLiteralField)
 
+#define dis       VG_(print_codegen)
 
 /*------------------------------------------------------------*/
 /*--- Here so it can be inlined everywhere.                ---*/
@@ -66,21 +66,6 @@
    return SHADOW(t);
 }
 
-/* Handy predicates. */
-#define SMC_IF_SOME(cb)                              \
-   do {                                              \
-      if (VG_(clo_smc_check) >= VG_CLO_SMC_SOME) {   \
-           LAST_UINSTR((cb)).smc_check = True;       \
-      }                                              \
-   } while (0)
-
-#define SMC_IF_ALL(cb)                               \
-   do {                                              \
-      if (VG_(clo_smc_check) == VG_CLO_SMC_ALL) {    \
-         LAST_UINSTR((cb)).smc_check = True;         \
-      }                                              \
-   } while (0)
-
 
 /*------------------------------------------------------------*/
 /*--- Helper bits and pieces for deconstructing the        ---*/
@@ -818,7 +803,6 @@
       }
       if (keep) {
          uInstr2(cb, STORE, size, TempReg, tmpv, TempReg, tmpa);
-         SMC_IF_ALL(cb);
       }
       if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), 
                            nameIReg(size,gregOfRM(rm)), dis_buf);
@@ -916,7 +900,6 @@
       Int  tmpv = newTemp(cb);
       uInstr2(cb, GET,   size, ArchReg, gregOfRM(rm), TempReg, tmpv);
       uInstr2(cb, STORE, size, TempReg, tmpv, TempReg, tmpa);
-      SMC_IF_SOME(cb);
       if (dis) VG_(printf)("mov%c %s,%s\n", nameISize(size), 
                            nameIReg(size,gregOfRM(rm)), dis_buf);
       return HI8(pair)+eip0;
@@ -1113,7 +1096,6 @@
       }
       if (gregOfRM(modrm) < 7) {
          uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1);
-         SMC_IF_ALL(cb);
       }
       if (dis)
          VG_(printf)("%s%c $0x%x, %s\n",
@@ -1201,7 +1183,6 @@
       }
       setFlagsFromUOpcode(cb, uopc);
       uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1);
-      SMC_IF_ALL(cb);
       if (dis) {
          if (orig_src_tag == Literal)
             VG_(printf)("%s%c $0x%x, %s\n",
@@ -1321,7 +1302,6 @@
       /* Dump the result back, if non-BT. */
       if (gregOfRM(modrm) != 4 /* BT */) {
          uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1);
-         SMC_IF_ALL(cb);
       }
       if (dis)
             VG_(printf)("%s%c $0x%x, %s\n",
@@ -1512,7 +1492,6 @@
             uInstr1(cb, NOT, sz, TempReg, t1);
             setFlagsFromUOpcode(cb, NOT);
             uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             if (dis)
                VG_(printf)("not%c %s\n", nameISize(sz), dis_buf);
             break;
@@ -1520,7 +1499,6 @@
             uInstr1(cb, NEG, sz, TempReg, t1);
             setFlagsFromUOpcode(cb, NEG);
             uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             if (dis)
                VG_(printf)("neg%c %s\n", nameISize(sz), dis_buf);
             break;
@@ -1595,13 +1573,11 @@
             uInstr1(cb, INC, 1, TempReg, t1);
             setFlagsFromUOpcode(cb, INC);
             uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             break;
          case 1: /* DEC */
             uInstr1(cb, DEC, 1, TempReg, t1);
             setFlagsFromUOpcode(cb, DEC);
             uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             break;
          default: 
             VG_(printf)(
@@ -1650,7 +1626,6 @@
             uInstr2(cb, MOV,   4, Literal, 0,     TempReg, t4);
 	    uLiteral(cb, eip+1);
             uInstr2(cb, STORE, 4, TempReg, t4,    TempReg, t3);
-            SMC_IF_ALL(cb);
             uInstr1(cb, JMP,   0, TempReg, t1);
             uCond(cb, CondAlways);
             LAST_UINSTR(cb).jmpkind = JmpCall;
@@ -1680,13 +1655,11 @@
             uInstr1(cb, INC, sz, TempReg, t1);
             setFlagsFromUOpcode(cb, INC);
             uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             break;
          case 1: /* DEC */
             uInstr1(cb, DEC, sz, TempReg, t1);
             setFlagsFromUOpcode(cb, DEC);
             uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             break;
          case 2: /* call Ev */
             t3 = newTemp(cb); t4 = newTemp(cb);
@@ -1697,7 +1670,6 @@
             uInstr2(cb, MOV,   4, Literal, 0,     TempReg, t4);
 	         uLiteral(cb, eip+HI8(pair));
             uInstr2(cb, STORE, 4, TempReg, t4,    TempReg, t3);
-            SMC_IF_ALL(cb);
             uInstr1(cb, JMP,   0, TempReg, t1);
             uCond(cb, CondAlways);
             LAST_UINSTR(cb).jmpkind = JmpCall;
@@ -1715,7 +1687,6 @@
 	    uLiteral(cb, sz);
             uInstr2(cb, PUT,    4, TempReg, t3,    ArchReg, R_ESP);
             uInstr2(cb, STORE, sz, TempReg, t1,    TempReg, t3);
-            SMC_IF_ALL(cb);
             break;
          default: 
             VG_(printf)(
@@ -1864,7 +1835,6 @@
 
    uInstr2(cb, LOAD,  sz, TempReg, ts,    TempReg, tv);
    uInstr2(cb, STORE, sz, TempReg, tv,    TempReg, td);
-   SMC_IF_SOME(cb);
 
    uInstr0(cb, CALLM_S, 0);
    uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tv);
@@ -1912,7 +1882,6 @@
    uInstr2(cb, GET,   sz, ArchReg, R_EAX, TempReg, ta);
    uInstr2(cb, GET,    4, ArchReg, R_EDI, TempReg, td);
    uInstr2(cb, STORE, sz, TempReg, ta,    TempReg, td);
-   SMC_IF_SOME(cb);
 
    uInstr0(cb, CALLM_S, 0);
    uInstr2(cb, MOV,   4, Literal, 0,     TempReg, ta);
@@ -1996,7 +1965,6 @@
 
    uInstr2(cb, LOAD,  sz, TempReg, ts,    TempReg, tv);
    uInstr2(cb, STORE, sz, TempReg, tv,    TempReg, td);
-   SMC_IF_SOME(cb);
 
    uInstr0(cb, CALLM_S, 0);
    uInstr2(cb, MOV,   4, Literal, 0,     TempReg, tv);
@@ -2032,7 +2000,6 @@
    uInstr2(cb, GET,   sz, ArchReg, R_EAX, TempReg, ta);
    uInstr2(cb, GET,    4, ArchReg, R_EDI, TempReg, td);
    uInstr2(cb, STORE, sz, TempReg, ta,    TempReg, td);
-   SMC_IF_SOME(cb);
 
    uInstr0(cb, CALLM_S, 0);
    uInstr2(cb, MOV,   4, Literal, 0,     TempReg, ta);
@@ -2269,7 +2236,6 @@
                Lit16, 
                (((UShort)first_byte) << 8) | ((UShort)second_byte),
                TempReg, ta);
-   if (is_write) SMC_IF_ALL(cb);
    if (dis) {
       if (is_write)
          VG_(printf)("fpu_w_%d 0x%x:0x%x, %s\n",
@@ -2485,7 +2451,13 @@
                return dis_fpu_mem(cb, 8, rd, eip, first_byte); 
             case 2: /* FST double-real */
             case 3: /* FSTP double-real */
-               return dis_fpu_mem(cb, 8, wr, eip, first_byte); 
+               return dis_fpu_mem(cb, 8, wr, eip, first_byte);
+            case 4: /* FRSTOR */
+               return dis_fpu_mem(cb, 108, rd, eip, first_byte);
+            case 6: /* FSAVE */
+               return dis_fpu_mem(cb, 108, wr, eip, first_byte);
+            case 7: /* FSTSW */
+               return dis_fpu_mem(cb, 2, wr, eip, first_byte);
             default: 
                goto unhandled;
          }
@@ -2585,7 +2557,6 @@
       uFlagsRWU(cb, FlagsEmpty, FlagsOSZACP, FlagsEmpty);
       uInstr1(cb, POP,   sz, TempReg, t);
       uInstr2(cb, STORE, sz, TempReg, t,      TempReg, ta);
-      SMC_IF_ALL(cb);
       if (dis)
          VG_(printf)("shld%c %%cl, %s, %s\n",
                      nameISize(sz), nameIReg(sz, gregOfRM(modrm)), 
@@ -3010,7 +2981,6 @@
       uInstr2(cb,  ADD, sz, TempReg, tmpd, TempReg, tmpt);
       setFlagsFromUOpcode(cb, ADD);
       uInstr2(cb, STORE, sz, TempReg, tmpt, TempReg, tmpa);
-      SMC_IF_SOME(cb);
       uInstr2(cb, PUT, sz, TempReg, tmpd, ArchReg, gregOfRM(rm));
       if (dis)
          VG_(printf)("xadd%c %s, %s\n", nameISize(sz), 
@@ -3167,7 +3137,6 @@
          uInstr2(cb, MOV,   4, Literal, 0,     TempReg, t2);
 	 uLiteral(cb, eip);
          uInstr2(cb, STORE, 4, TempReg, t2,    TempReg, t1);
-         SMC_IF_ALL(cb);
          uInstr1(cb, JMP,   0, Literal, 0);
 	 uLiteral(cb, d32);
          uCond(cb, CondAlways);
@@ -3472,7 +3441,6 @@
       uInstr2(cb, MOV,    4, Literal, 0,     TempReg, t2);
       uLiteral(cb, d32);
       uInstr2(cb, STORE, sz, TempReg, t1,    TempReg, t2);
-      SMC_IF_SOME(cb);
       if (dis) VG_(printf)("mov%c %s,0x%x\n", nameISize(sz), 
                            nameIReg(sz,R_EAX), d32);
       break;
@@ -3535,7 +3503,6 @@
          uInstr2(cb, MOV, sz, Literal, 0, TempReg, t1);
 	 uLiteral(cb, d32);
          uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2);
-         SMC_IF_SOME(cb);
          if (dis) VG_(printf)("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
       }
       break;
@@ -3556,6 +3523,10 @@
       eip = dis_op_imm_A(cb, sz, OR, True, eip, "or" );
       break;
 
+   case 0x15: /* ADC Iv, eAX */
+      eip = dis_op_imm_A(cb, sz, ADC, True, eip, "adc" );
+      break;
+
    case 0x1C: /* SBB Ib, AL */
       eip = dis_op_imm_A(cb, 1, SBB, True, eip, "sbb" );
       break;
@@ -3718,40 +3689,7 @@
    case 0x5D: /* POP eBP */
    case 0x5E: /* POP eSI */
    case 0x5F: /* POP eDI */
-    { Int   n_pops;
-      Addr  eipS, eipE;
-      UChar ch;
-      if (sz != 4)           goto normal_pop_case;
-      if (VG_(clo_cachesim)) goto normal_pop_case;
-      /* eip points at first pop insn + 1.  Make eipS and eipE
-         bracket the sequence. */
-      eipE = eipS = eip - 1;
-      while (True) { 
-         ch = getUChar(eipE+1);
-         if (ch < 0x58 || ch > 0x5F || ch == 0x5C) break;
-         eipE++;
-      }
-      n_pops = eipE - eipS + 1;
-      if (0 && n_pops > 1) VG_(printf)("%d pops\n", n_pops);
-      t1 = newTemp(cb); t3 = newTemp(cb);
-      uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t1);
-      for (; eipS <= eipE; eipS++) {
-         ch = getUChar(eipS);
-	 uInstr2(cb, LOAD, 4, TempReg, t1, TempReg, t3);
-         uInstr2(cb, PUT,  4, TempReg, t3, ArchReg, ch-0x58);
-         uInstr2(cb, ADD,  4, Literal, 0,        TempReg, t1);
-         uLiteral(cb, 4);
-         SMC_IF_ALL(cb);
-         if (dis) 
-            VG_(printf)("popl %s\n", nameIReg(4,ch-0x58));
-      }
-      uInstr2(cb, PUT,    4, TempReg, t1,       ArchReg, R_ESP);
-      eip = eipE + 1;
-      break;
-    }
-
    case 0x5C: /* POP eSP */
-   normal_pop_case:
       t1 = newTemp(cb); t2 = newTemp(cb);
       uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t2);
       uInstr2(cb, LOAD,  sz, TempReg, t2,       TempReg, t1);
@@ -3863,43 +3801,7 @@
    case 0x55: /* PUSH eBP */
    case 0x56: /* PUSH eSI */
    case 0x57: /* PUSH eDI */
-    { Int   n_pushes;
-      Addr  eipS, eipE;
-      UChar ch;
-      if (sz != 4)           goto normal_push_case;
-      if (VG_(clo_cachesim)) goto normal_push_case;
-      /* eip points at first push insn + 1.  Make eipS and eipE
-         bracket the sequence. */
-      eipE = eipS = eip - 1;
-      while (True) { 
-         ch = getUChar(eipE+1);
-         if (ch < 0x50 || ch > 0x57 || ch == 0x54) break;
-         eipE++;
-      }
-      n_pushes = eipE - eipS + 1;
-      if (0 && n_pushes > 1) VG_(printf)("%d pushes\n", n_pushes);
-      t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb);
-      uInstr2(cb, GET,    4, ArchReg, R_ESP,    TempReg, t1);
-      uInstr2(cb, MOV,    4, TempReg, t1,       TempReg, t2);
-      uInstr2(cb, SUB,    4, Literal, 0,        TempReg, t2);
-      uLiteral(cb, 4 * n_pushes);
-      uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
-      for (; eipS <= eipE; eipS++) {
-         ch = getUChar(eipS);
-         uInstr2(cb, SUB,    4, Literal, 0,        TempReg, t1);
-         uLiteral(cb, 4);
-         uInstr2(cb, GET, 4, ArchReg, ch-0x50, TempReg, t3);
-	 uInstr2(cb, STORE, 4, TempReg, t3, TempReg, t1);
-         SMC_IF_ALL(cb);
-         if (dis) 
-            VG_(printf)("pushl %s\n", nameIReg(4,ch-0x50));
-      }
-      eip = eipE + 1;
-      break;
-    }
-
    case 0x54: /* PUSH eSP */
-   normal_push_case:
       /* This is the Right Way, in that the value to be pushed is
          established before %esp is changed, so that pushl %esp
          correctly pushes the old value. */
@@ -3911,7 +3813,6 @@
       uLiteral(cb, sz);
       uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
       uInstr2(cb, STORE, sz, TempReg, t1,       TempReg, t2);
-      SMC_IF_ALL(cb);
       if (dis) 
          VG_(printf)("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50));
       break;
@@ -3931,7 +3832,6 @@
       uInstr2(cb, MOV,   sz, Literal, 0,     TempReg, t2);
       uLiteral(cb, d32);
       uInstr2(cb, STORE, sz, TempReg, t2,    TempReg, t1);
-      SMC_IF_ALL(cb);
       if (dis) 
          VG_(printf)("push%c $0x%x\n", nameISize(sz), d32);
       break;
@@ -3948,7 +3848,6 @@
       uLiteral(cb, sz);
       uInstr2(cb, PUT,    4, TempReg, t2,       ArchReg, R_ESP);
       uInstr2(cb, STORE, sz, TempReg, t1,       TempReg, t2);
-      SMC_IF_ALL(cb);
       if (dis) 
          VG_(printf)("pushf%c\n", nameISize(sz));
       break;
@@ -3980,20 +3879,17 @@
          uInstr2(cb, SUB,    4, Literal,   0, TempReg, t2);
          uLiteral(cb, sz);
          uInstr2(cb, STORE, sz, TempReg,  t1, TempReg, t2);
-         SMC_IF_ALL(cb);
       }
       /* Push old value of %esp */
       uInstr2(cb, SUB,    4, Literal,   0, TempReg, t2);
       uLiteral(cb, sz);
       uInstr2(cb, STORE, sz, TempReg,  t3, TempReg, t2);
-      SMC_IF_ALL(cb);
       /* Do %ebp, %esi, %edi */
       for (reg = 5; reg <= 7; reg++) {
          uInstr2(cb, GET,   sz, ArchReg, reg, TempReg, t1);
          uInstr2(cb, SUB,    4, Literal,   0, TempReg, t2);
          uLiteral(cb, sz);
          uInstr2(cb, STORE, sz, TempReg,  t1, TempReg, t2);
-         SMC_IF_ALL(cb);
       }
       if (dis)
          VG_(printf)("pusha%c\n", nameISize(sz));
@@ -4149,7 +4045,6 @@
          uInstr2(cb, LOAD, sz, TempReg, t3, TempReg, t1);
          uInstr2(cb, GET, sz, ArchReg, gregOfRM(modrm), TempReg, t2);
          uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t3);
-         SMC_IF_SOME(cb);
          uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, gregOfRM(modrm));
          eip += HI8(pair);
          if (dis)
@@ -4231,6 +4126,14 @@
       eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, Literal, d32 );
       break;
 
+   case 0xD2: /* Grp2 CL,Eb */
+      modrm = getUChar(eip);
+      am_sz = lengthAMode(eip);
+      d_sz  = 0;
+      sz    = 1;
+      eip   = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, ArchReg, R_ECX );
+      break;
+
    case 0xD3: /* Grp2 CL,Ev */
       modrm = getUChar(eip);
       am_sz = lengthAMode(eip);
@@ -4499,7 +4402,6 @@
             uCond(cb, (Condcode)(opc-0x90));
             uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty);
             uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2);
-            SMC_IF_ALL(cb);
             if (dis) VG_(printf)("set%s %s\n", 
                                  VG_(nameCondcode)(opc-0x90), 
                                  dis_buf);
@@ -4568,10 +4470,11 @@
    if (dis)
       VG_(printf)("\n");
    for (; first_uinstr < cb->used; first_uinstr++) {
-      Bool sane = VG_(saneUInstr)(True, &cb->instrs[first_uinstr]);
-      if (dis || !sane) 
-         VG_(ppUInstr)(sane ? first_uinstr : -1,
-                       &cb->instrs[first_uinstr]);
+      Bool sane = VG_(saneUInstr)(True, True, &cb->instrs[first_uinstr]);
+      if (dis) 
+         VG_(ppUInstr)(first_uinstr, &cb->instrs[first_uinstr]);
+      else if (!sane)
+         VG_(upUInstr)(-1, &cb->instrs[first_uinstr]);
       vg_assert(sane);
    }
 
@@ -4588,28 +4491,17 @@
    Addr eip   = eip0;
    Bool isEnd = False;
    Bool block_sane;
-   Int INCEIP_allowed_lag = 4;
    Int delta = 0;
 
-   if (dis) VG_(printf)("\n");
+   if (dis) VG_(printf)("Original x86 code to UCode:\n\n");
 
-   /* When cache simulating, to ensure cache misses are attributed to the
-    * correct line we ensure EIP is always correct.   This is done by:
+   /* After every x86 instruction do an INCEIP, except for the final one
+    * in the basic block.  For them we patch in the x86 instruction size 
+    * into the `extra4b' field of the basic-block-ending JMP. 
     *
-    * a) Using eager INCEIP updating to cope with all instructions except those
-    *    at the end of a basic block.
-    *
-    * b) Patching in the size of the original x86 instr in the `extra4b' field
-    *    of JMPs at the end of a basic block.  Two cases:
-    *       - Jcond followed by Juncond:  patch the Jcond
-    *       - Juncond alone:              patch the Juncond
-    *
-    * See vg_cachesim_instrument() for how this is used. 
+    * The INCEIPs and JMP.extra4b fields allows a skin to track x86
+    * instruction sizes, important for some skins (eg. cache simulation).
     */
-   if (VG_(clo_cachesim)) {
-       INCEIP_allowed_lag = 0;
-   }
-
    if (VG_(clo_single_step)) {
       eip = disInstr ( cb, eip, &isEnd );
 
@@ -4620,15 +4512,17 @@
          uInstr1(cb, JMP, 0, Literal, 0);
          uLiteral(cb, eip);
          uCond(cb, CondAlways);
+         /* Print added JMP */
          if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]);
       }
+      if (dis) VG_(printf)("\n");
       delta = eip - eip0;
 
    } else {
       Addr eip2;
       while (!isEnd) {
          eip2 = disInstr ( cb, eip, &isEnd );
-         delta += (eip2 - eip);
+         delta = (eip2 - eip);
          eip = eip2;
          /* Split up giant basic blocks into pieces, so the
             translations fall within 64k. */
@@ -4639,27 +4533,23 @@
             uInstr1(cb, JMP, 0, Literal, 0);
             uLiteral(cb, eip);
             uCond(cb, CondAlways);
+            /* Print added JMP */
             if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]);
             isEnd = True;
 
-         } else if (delta > INCEIP_allowed_lag && !isEnd) {
+         } else if (!isEnd) {
             uInstr1(cb, INCEIP, 0, Lit16, delta);
+            /* Print added INCEIP */
             if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]);
-            delta = 0;
          }
          if (dis) VG_(printf)("\n");
       }
    }
-   if (VG_(clo_cachesim)) {
-      /* Patch instruction size into earliest JMP. */
-      if (cb->used >= 2 && JMP == cb->instrs[cb->used - 2].opcode) {
-         cb->instrs[cb->used - 2].extra4b = delta;
-      } else {
-         LAST_UINSTR(cb).extra4b = delta;
-      }
-   }
 
-   block_sane = VG_(saneUCodeBlock)(cb);
+   /* Patch instruction size into final JMP. */
+   LAST_UINSTR(cb).extra4b = delta;
+
+   block_sane = VG_(saneUCodeBlockCalls)(cb);
    if (!block_sane) {
       VG_(ppUCodeBlock)(cb, "block failing sanity check");
       vg_assert(block_sane);
@@ -4668,6 +4558,7 @@
    return eip - eip0;
 }
 
+#undef dis
 
 /*--------------------------------------------------------------------*/
 /*--- end                                            vg_to_ucode.c ---*/
diff --git a/vg_translate.c b/vg_translate.c
index 68d9faf..cd52c65 100644
--- a/vg_translate.c
+++ b/vg_translate.c
@@ -26,79 +26,20 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
 
-
 /*------------------------------------------------------------*/
 /*--- Renamings of frequently-used global functions.       ---*/
 /*------------------------------------------------------------*/
 
-#define uInstr1   VG_(newUInstr1)
 #define uInstr2   VG_(newUInstr2)
-#define uInstr3   VG_(newUInstr3)
-#define dis       VG_(disassemble)
 #define nameIReg  VG_(nameOfIntReg)
 #define nameISize VG_(nameOfIntSize)
-#define uLiteral  VG_(setLiteralField)
-#define newTemp   VG_(getNewTemp)
-#define newShadow VG_(getNewShadow)
 
-
-/*------------------------------------------------------------*/
-/*--- Memory management for the translater.                ---*/
-/*------------------------------------------------------------*/
-
-#define N_JITBLOCKS    4
-#define N_JITBLOCK_SZ  5000
-
-static UChar jitstorage[N_JITBLOCKS][N_JITBLOCK_SZ];
-static Bool  jitstorage_inuse[N_JITBLOCKS];
-static Bool  jitstorage_initdone = False;
-
-static __inline__ void jitstorage_initialise ( void )
-{
-   Int i;
-   if (jitstorage_initdone) return;
-   jitstorage_initdone = True;
-   for (i = 0; i < N_JITBLOCKS; i++)
-      jitstorage_inuse[i] = False; 
-}
-
-void* VG_(jitmalloc) ( Int nbytes )
-{
-   Int i;
-   jitstorage_initialise();
-   if (nbytes > N_JITBLOCK_SZ) {
-      /* VG_(printf)("too large: %d\n", nbytes); */
-      return VG_(malloc)(VG_AR_PRIVATE, nbytes);
-   }
-   for (i = 0; i < N_JITBLOCKS; i++) {
-      if (!jitstorage_inuse[i]) {
-         jitstorage_inuse[i] = True;
-         /* VG_(printf)("alloc %d -> %d\n", nbytes, i ); */
-         return & jitstorage[i][0];
-      }
-   }
-   VG_(panic)("out of slots in vg_jitmalloc\n");
-   return VG_(malloc)(VG_AR_PRIVATE, nbytes);
-}
-
-void VG_(jitfree) ( void* ptr )
-{
-   Int i;
-   jitstorage_initialise();
-   for (i = 0; i < N_JITBLOCKS; i++) {
-      if (ptr == & jitstorage[i][0]) {
-         vg_assert(jitstorage_inuse[i]);
-         jitstorage_inuse[i] = False;
-         return;
-      }
-   }
-   VG_(free)(VG_AR_PRIVATE, ptr);
-}
+#define dis       VG_(print_codegen)
 
 /*------------------------------------------------------------*/
 /*--- Basics                                               ---*/
@@ -106,7 +47,7 @@
 
 UCodeBlock* VG_(allocCodeBlock) ( void )
 {
-   UCodeBlock* cb = VG_(malloc)(VG_AR_PRIVATE, sizeof(UCodeBlock));
+   UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
    cb->used = cb->size = cb->nextTemp = 0;
    cb->instrs = NULL;
    return cb;
@@ -115,8 +56,8 @@
 
 void VG_(freeCodeBlock) ( UCodeBlock* cb )
 {
-   if (cb->instrs) VG_(free)(VG_AR_PRIVATE, cb->instrs);
-   VG_(free)(VG_AR_PRIVATE, cb);
+   if (cb->instrs) VG_(arena_free)(VG_AR_CORE, cb->instrs);
+   VG_(arena_free)(VG_AR_CORE, cb);
 }
 
 
@@ -129,15 +70,15 @@
          vg_assert(cb->size == 0);
          vg_assert(cb->used == 0);
          cb->size = 8;
-         cb->instrs = VG_(malloc)(VG_AR_PRIVATE, 8 * sizeof(UInstr));
+         cb->instrs = VG_(arena_malloc)(VG_AR_CORE, 8 * sizeof(UInstr));
       } else {
          Int i;
-         UInstr* instrs2 = VG_(malloc)(VG_AR_PRIVATE, 
+         UInstr* instrs2 = VG_(arena_malloc)(VG_AR_CORE, 
                                        2 * sizeof(UInstr) * cb->size);
          for (i = 0; i < cb->used; i++)
             instrs2[i] = cb->instrs[i];
          cb->size *= 2;
-         VG_(free)(VG_AR_PRIVATE, cb->instrs);
+         VG_(arena_free)(VG_AR_CORE, cb->instrs);
          cb->instrs = instrs2;
       }
    }
@@ -147,18 +88,20 @@
 
 
 __inline__ 
-void VG_(emptyUInstr) ( UInstr* u )
+void VG_(newNOP) ( UInstr* u )
 {
    u->val1 = u->val2 = u->val3 = 0;
    u->tag1 = u->tag2 = u->tag3 = NoValue;
    u->flags_r = u->flags_w = FlagsEmpty;
    u->jmpkind = JmpBoring;
-   u->smc_check = u->signed_widen = False;
+   u->signed_widen = u->has_ret_val = False;
+   u->regs_live_after = ALL_RREGS_LIVE;
    u->lit32    = 0;
-   u->opcode   = 0;
+   u->opcode   = NOP;
    u->size     = 0;
    u->cond     = 0;
    u->extra4b  = 0;
+   u->argc = u->regparms_n = 0;
 }
 
 
@@ -174,7 +117,7 @@
    ensureUInstr(cb);
    ui = & cb->instrs[cb->used];
    cb->used++;
-   VG_(emptyUInstr)(ui);
+   VG_(newNOP)(ui);
    ui->val1   = val1;
    ui->val2   = val2;
    ui->val3   = val3;
@@ -198,7 +141,7 @@
    ensureUInstr(cb);
    ui = & cb->instrs[cb->used];
    cb->used++;
-   VG_(emptyUInstr)(ui);
+   VG_(newNOP)(ui);
    ui->val1   = val1;
    ui->val2   = val2;
    ui->opcode = opcode;
@@ -218,7 +161,7 @@
    ensureUInstr(cb);
    ui = & cb->instrs[cb->used];
    cb->used++;
-   VG_(emptyUInstr)(ui);
+   VG_(newNOP)(ui);
    ui->val1   = val1;
    ui->opcode = opcode;
    ui->tag1   = tag1;
@@ -234,7 +177,7 @@
    ensureUInstr(cb);
    ui = & cb->instrs[cb->used];
    cb->used++;
-   VG_(emptyUInstr)(ui);
+   VG_(newNOP)(ui);
    ui->opcode = opcode;
    ui->size   = sz;
 }
@@ -252,13 +195,16 @@
 static __inline__ 
 void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
 {
-   dst->cond          = src->cond;
-   dst->extra4b       = src->extra4b;
-   dst->smc_check     = src->smc_check;
-   dst->signed_widen  = src->signed_widen;
-   dst->jmpkind       = src->jmpkind;
-   dst->flags_r       = src->flags_r;
-   dst->flags_w       = src->flags_w;
+   dst->cond            = src->cond;
+   dst->extra4b         = src->extra4b;
+   dst->signed_widen    = src->signed_widen;
+   dst->jmpkind         = src->jmpkind;
+   dst->flags_r         = src->flags_r;
+   dst->flags_w         = src->flags_w;
+   dst->argc            = src->argc;
+   dst->regparms_n      = src->regparms_n;
+   dst->has_ret_val     = src->has_ret_val;
+   dst->regs_live_after = src->regs_live_after;
 }
 
 
@@ -280,44 +226,85 @@
 }
 
 
+/* Set the C call info fields of the most recent uinsn. */
+void  VG_(setCCallFields) ( UCodeBlock* cb, Addr fn, UChar argc, UChar
+                            regparms_n, Bool has_ret_val )
+{
+   vg_assert(argc       <  4);
+   vg_assert(regparms_n <= argc);
+   LAST_UINSTR(cb).lit32       = fn;
+   LAST_UINSTR(cb).argc        = argc;
+   LAST_UINSTR(cb).regparms_n  = regparms_n;
+   LAST_UINSTR(cb).has_ret_val = has_ret_val;
+}
+
 Bool VG_(anyFlagUse) ( UInstr* u )
 {
    return (u->flags_r != FlagsEmpty 
            || u->flags_w != FlagsEmpty);
 }
 
-
-
+#if 1
+#  define BEST_ALLOC_ORDER
+#endif
 
 /* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
    register number.  This effectively defines the order in which real
    registers are allocated.  %ebp is excluded since it is permanently
-   reserved for pointing at VG_(baseBlock).  %edi is a general spare
-   temp used for Left4 and various misc tag ops.
+   reserved for pointing at VG_(baseBlock).
 
-   Important!  If you change the set of allocatable registers from
-   %eax, %ebx, %ecx, %edx, %esi you must change the
-   save/restore sequences in various places to match!  
+   Important!  This function must correspond with the value of
+   VG_MAX_REALREGS (actually, VG_MAX_REALREGS can be reduced without
+   a problem, except the generated code will obviously be worse).
 */
-__inline__ Int VG_(rankToRealRegNo) ( Int rank )
+__inline__ 
+Int VG_(rankToRealRegNum) ( Int rank )
 {
    switch (rank) {
-#     if 1
+#     ifdef BEST_ALLOC_ORDER
       /* Probably the best allocation ordering. */
       case 0: return R_EAX;
       case 1: return R_EBX;
       case 2: return R_ECX;
       case 3: return R_EDX;
       case 4: return R_ESI;
+      case 5: return R_EDI;
 #     else
       /* Contrary; probably the worst.  Helpful for debugging, tho. */
-      case 4: return R_EAX;
-      case 3: return R_EBX;
-      case 2: return R_ECX;
-      case 1: return R_EDX;
-      case 0: return R_ESI;
+      case 5: return R_EAX;
+      case 4: return R_EBX;
+      case 3: return R_ECX;
+      case 2: return R_EDX;
+      case 1: return R_ESI;
+      case 0: return R_EDI;
 #     endif
-      default: VG_(panic)("rankToRealRegNo");
+      default: VG_(panic)("VG_(rankToRealRegNum)");
+   }
+}
+
+/* Convert an Intel register number into a rank in the range 0 ..
+   VG_MAX_REALREGS-1.  See related comments for rankToRealRegNum()
+   above.  */
+__inline__
+Int VG_(realRegNumToRank) ( Int realReg )
+{
+   switch (realReg) {
+#     ifdef BEST_ALLOC_ORDER
+      case R_EAX: return 0;
+      case R_EBX: return 1;
+      case R_ECX: return 2;
+      case R_EDX: return 3;
+      case R_ESI: return 4;
+      case R_EDI: return 5;
+#     else
+      case R_EAX: return 5;
+      case R_EBX: return 4;
+      case R_ECX: return 3;
+      case R_EDX: return 2;
+      case R_ESI: return 1;
+      case R_EDI: return 0;
+#     endif
+      default: VG_(panic)("VG_(realRegNumToRank)");
    }
 }
 
@@ -382,78 +369,62 @@
    from the result of register allocation on the ucode efficiently and
    without need of any further RealRegs.
 
-   Restrictions on insns (as generated by the disassembler) are as
-   follows:
-
-      A=ArchReg   S=SpillNo   T=TempReg   L=Literal   R=RealReg
-      N=NoValue
-
-         GETF       T       N       N
-         PUTF       T       N       N
-
-         GET        A,S     T       N
-         PUT        T       A,S     N
-         LOAD       T       T       N
-         STORE      T       T       N
-         MOV        T,L     T       N
-         CMOV       T       T       N
-         WIDEN      T       N       N
-         JMP        T,L     N       N
-         CALLM      L       N       N
-         CALLM_S    N       N       N
-         CALLM_E    N       N       N
-         CCALL_1_0  T       N       N
-         CCALL_2_0  T       T       N
-         PUSH,POP   T       N       N
-         CLEAR      L       N       N
-
-         AND, OR
-                    T       T       N
-
-         ADD, ADC, XOR, SUB, SBB
-                    A,L,T   T       N
-
-         SHL, SHR, SAR, ROL, ROR, RCL, RCR
-                    L,T     T       N
-
-         NOT, NEG, INC, DEC, CC2VAL, BSWAP
-                    T       N       N
-
-         JIFZ       T       L       N
-
-         FPU_R      L       T       N
-         FPU_W      L       T       N
-         FPU        L       T       N
-
-         LEA1       T       T   (const in a seperate field)
-         LEA2       T       T       T   (const & shift ditto)
-
-         INCEIP     L       N       N
+   Restrictions for the individual UInstrs are clear from the checks below.
+   Abbreviations: A=ArchReg   S=SpillNo   T=TempReg   L=Literal
+                  Ls=Lit16    R=RealReg   N=NoValue
  
-   and for instrumentation insns:
-
-         LOADV      T       T       N
-         STOREV     T,L     T       N
-         GETV       A       T       N
-         PUTV       T,L     A       N
-         GETVF      T       N       N
-         PUTVF      T       N       N
-         WIDENV     T       N       N
-         TESTV      A,T     N       N
-         SETV       A,T     N       N
-         TAG1       T       N       N
-         TAG2       T       T       N
-
    Before register allocation, S operands should not appear anywhere.
    After register allocation, all T operands should have been
    converted into Rs, and S operands are allowed in GET and PUT --
    denoting spill saves/restores.  
 
+   Before liveness analysis, save_e[acd]x fields should all be True.
+   Afterwards, they may be False.
+
    The size field should be 0 for insns for which it is meaningless,
    ie those which do not directly move/operate on data.
 */
-Bool VG_(saneUInstr) ( Bool beforeRA, UInstr* u )
+Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u )
 {
+#  define LIT0 (u->lit32 == 0)
+#  define LIT1 (!(LIT0))
+#  define LITm (u->tag1 == Literal ? True : LIT0 )
+#  define SZ4  (u->size == 4)
+#  define SZ2  (u->size == 2)
+#  define SZ1  (u->size == 1)
+#  define SZ0  (u->size == 0)
+#  define SZ42 (u->size == 4 || u->size == 2)
+#  define SZi  (u->size == 4 || u->size == 2 || u->size == 1)
+#  define SZf  (  u->size ==  4 || u->size ==  8 || u->size ==   2     \
+               || u->size == 10 || u->size == 28 || u->size == 108)
+#  define SZ4m ((u->tag1 == TempReg || u->tag1 == RealReg) \
+                      ? (u->size == 4) : True)
+
+/* For these ones, two cases:
+ *
+ * 1. They are transliterations of the corresponding x86 instruction, in
+ *    which case they should have its flags (except that redundant write
+ *    flags can be annulled by the optimisation pass).
+ *
+ * 2. They are being used generally for other purposes, eg. helping with a
+ *    'rep'-prefixed instruction, in which case should have empty flags .
+ */
+#  define emptyR (u->flags_r == FlagsEmpty)
+#  define emptyW (u->flags_w == FlagsEmpty)
+#  define CC0 (emptyR && emptyW)
+#  define CCr (u->flags_r == FlagsALL && emptyW)
+#  define CCw (emptyR &&  u->flags_w == FlagsALL)
+#  define CCa (emptyR && (u->flags_w == FlagsOSZACP  || emptyW))
+#  define CCc (emptyR && (u->flags_w == FlagsOC      || emptyW))
+#  define CCe (emptyR && (u->flags_w == FlagsOSZAP   || emptyW))
+#  define CCb ((u->flags_r==FlagC       || emptyR) && \
+               (u->flags_w==FlagsOSZACP || emptyW))
+#  define CCd ((u->flags_r==FlagC   || emptyR) && \
+               (u->flags_w==FlagsOC || emptyW))
+#  define CCf (CC0 || CCr || CCw)
+#  define CCg ((u->flags_r==FlagsOSZACP || emptyR) && emptyW)
+#  define CCj (u->cond==CondAlways ? CC0 : CCg)
+
 #  define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
 #  define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
 #  define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
@@ -466,24 +437,29 @@
 #  define L2  (u->tag2 == Literal && u->val2 == 0)
 #  define Ls1 (u->tag1 == Lit16)
 #  define Ls3 (u->tag3 == Lit16)
+#  define TRL1 (TR1 || L1)
+#  define TRAL1 (TR1 || A1 || L1)
 #  define N1  (u->tag1 == NoValue)
 #  define N2  (u->tag2 == NoValue)
 #  define N3  (u->tag3 == NoValue)
-#  define SZ4 (u->size == 4)
-#  define SZ2 (u->size == 2)
-#  define SZ1 (u->size == 1)
-#  define SZ0 (u->size == 0)
-#  define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)
-#  define FLG_RD (u->flags_r == FlagsALL && u->flags_w == FlagsEmpty)
-#  define FLG_WR (u->flags_r == FlagsEmpty && u->flags_w == FlagsALL)
-#  define FLG_RD_WR_MAYBE                                         \
-       ((u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)    \
-        || (u->flags_r == FlagsEmpty && u->flags_w == FlagsZCP)   \
-        || (u->flags_r == FlagsZCP && u->flags_w == FlagsEmpty))
-#  define CC1 (!(CC0))
-#  define SZ4_IF_TR1 ((u->tag1 == TempReg || u->tag1 == RealReg) \
-                      ? (u->size == 4) : True)
 
+#  define COND0    (u->cond         == 0)
+#  define EXTRA4b0 (u->extra4b      == 0)
+#  define SG_WD0   (u->signed_widen == 0)
+#  define JMPKIND0 (u->jmpkind      == 0)
+#  define CCALL0   (u->argc==0 && u->regparms_n==0 && u->has_ret_val==0 && \
+                    ( beforeLiveness                                       \
+                    ? u->regs_live_after == ALL_RREGS_LIVE                 \
+                    : True ))
+
+#  define XCONDi   (         EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
+#  define Xextra4b (COND0             && SG_WD0 && JMPKIND0 && CCALL0)
+#  define XWIDEN   (COND0                       && JMPKIND0 && CCALL0)
+#  define XJMP     (                     SG_WD0             && CCALL0)
+#  define XCCALL   (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0          )
+#  define XOTHER   (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
+
+   /* 0 or 1 Literal args per UInstr */
    Int n_lits = 0;
    if (u->tag1 == Literal) n_lits++;
    if (u->tag2 == Literal) n_lits++;
@@ -491,94 +467,94 @@
    if (n_lits > 1) 
       return False;
 
+   /* Fields not checked: val1, val2, val3 */
+
    switch (u->opcode) {
-      case GETF:
-         return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_RD;
-      case PUTF:
-         return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_WR;
-      case CALLM_S: case CALLM_E:
-         return SZ0 && N1 && N2 && N3;
-      case INCEIP:
-         return SZ0 && CC0 && Ls1 && N2 && N3;
-      case LEA1:
-         return CC0 && TR1 && TR2 && N3 && SZ4;
-      case LEA2:
-         return CC0 && TR1 && TR2 && TR3 && SZ4;
-      case NOP: 
-         return SZ0 && CC0 && N1 && N2 && N3;
-      case GET: 
-         return CC0 && AS1 && TR2 && N3;
-      case PUT: 
-         return CC0 && TR1 && AS2 && N3;
-      case LOAD: case STORE: 
-         return CC0 && TR1 && TR2 && N3;
-      case MOV:
-         return CC0 && (TR1 || L1) && TR2 && N3 && SZ4_IF_TR1;
-      case CMOV:
-         return CC1 && TR1 && TR2 && N3 && SZ4;
-      case JMP: 
-         return (u->cond==CondAlways ? CC0 : CC1)
-                && (TR1 || L1) && N2 && SZ0 && N3;
-      case CLEAR:
-         return CC0 && Ls1 && N2 && SZ0 && N3;
-      case CALLM:
-         return SZ0 && Ls1 && N2 && N3;
-      case CCALL_1_0:
-         return SZ0 && CC0 && TR1 && N2 && N3;
-      case CCALL_2_0:
-         return SZ0 && CC0 && TR1 && TR2 && N3;
-      case PUSH: case POP:
-         return CC0 && TR1 && N2 && N3;
-      case AND: case OR:
-         return TR1 && TR2 && N3;
-      case ADD: case ADC: case XOR: case SUB: case SBB:
-         return (A1 || TR1 || L1) && TR2 && N3;
-      case SHL: case SHR: case SAR: case ROL: case ROR: case RCL: case RCR:
-         return       (TR1 || L1) && TR2 && N3;
-      case NOT: case NEG: case INC: case DEC:
-         return        TR1 && N2 && N3;
-      case BSWAP:
-         return TR1 && N2 && N3 && CC0 && SZ4;
-      case CC2VAL: 
-         return CC1 && SZ1 && TR1 && N2 && N3;
-      case JIFZ:
-         return CC0 && SZ4 && TR1 && L2 && N3;
-      case FPU_R:  case FPU_W: 
-         return CC0 && Ls1 && TR2 && N3;
-      case FPU: 
-         return SZ0 && FLG_RD_WR_MAYBE && Ls1 && N2 && N3;
-      case LOADV:
-         return CC0 && TR1 && TR2 && N3;
-      case STOREV:
-         return CC0 && (TR1 || L1) && TR2 && N3;
-      case GETV: 
-         return CC0 && A1 && TR2 && N3;
-      case PUTV: 
-         return CC0 && (TR1 || L1) && A2 && N3;
-      case GETVF: 
-         return CC0 && TR1 && N2 && N3 && SZ0;
-      case PUTVF: 
-         return CC0 && TR1 && N2 && N3 && SZ0;
-      case WIDEN:
-         return CC0 && TR1 && N2 && N3;
-      case TESTV: 
-         return CC0 && (A1 || TR1) && N2 && N3;
-      case SETV:
-         return CC0 && (A1 || TR1) && N2 && N3;
-      case TAG1:
-         return CC0 && TR1 && N2 && Ls3 && SZ0;
-      case TAG2:
-         return CC0 && TR1 && TR2 && Ls3 && SZ0;
-      default: 
-         VG_(panic)("vg_saneUInstr: unhandled opcode");
+
+   /* Fields checked: lit32   size  flags_r/w tag1   tag2   tag3    (rest) */
+   case NOP:    return LIT0 && SZ0  && CC0 &&   N1 &&  N2 &&  N3 && XOTHER;
+   case GETF:   return LIT0 && SZ42 && CCr &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case PUTF:   return LIT0 && SZ42 && CCw &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case GET:    return LIT0 && SZi  && CC0 &&  AS1 && TR2 &&  N3 && XOTHER;
+   case PUT:    return LIT0 && SZi  && CC0 &&  TR1 && AS2 &&  N3 && XOTHER;
+   case LOAD: 
+   case STORE:  return LIT0 && SZi  && CC0 &&  TR1 && TR2 &&  N3 && XOTHER;
+   case MOV:    return LITm && SZ4m && CC0 && TRL1 && TR2 &&  N3 && XOTHER;
+   case CMOV:   return LIT0 && SZ4  && CCg &&  TR1 && TR2 &&  N3 && XCONDi;
+   case WIDEN:  return LIT0 && SZi  && CC0 &&  TR1 &&  N2 &&  N3 && XWIDEN;
+   case JMP:    return LITm && SZ0  && CCj && TRL1 &&  N2 &&  N3 && XJMP;
+   case CALLM:  return LIT0 && SZ0 /*any*/ &&  Ls1 &&  N2 &&  N3 && XOTHER;
+   case CALLM_S: 
+   case CALLM_E:return LIT0 && SZ0  && CC0 &&   N1 &&  N2 &&  N3 && XOTHER;
+   case PUSH: 
+   case POP:    return LIT0 && SZi  && CC0 &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case CLEAR:  return LIT0 && SZ0  && CC0 &&  Ls1 &&  N2 &&  N3 && XOTHER;
+   case AND:
+   case OR:     return LIT0 && SZi  && CCa &&  TR1 && TR2 &&  N3 && XOTHER;
+   case ADD:
+   case XOR:
+   case SUB:    return LITm && SZi  && CCa &&TRAL1 && TR2 &&  N3 && XOTHER;
+   case SBB:
+   case ADC:    return LITm && SZi  && CCb &&TRAL1 && TR2 &&  N3 && XOTHER;
+   case SHL:
+   case SHR:
+   case SAR:    return LITm && SZi  && CCa && TRL1 && TR2 &&  N3 && XOTHER;
+   case ROL:
+   case ROR:    return LITm && SZi  && CCc && TRL1 && TR2 &&  N3 && XOTHER;
+   case RCL:
+   case RCR:    return LITm && SZi  && CCd && TRL1 && TR2 &&  N3 && XOTHER;
+   case NOT:    return LIT0 && SZi  && CC0 &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case NEG:    return LIT0 && SZi  && CCa &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case INC:
+   case DEC:    return LIT0 && SZi  && CCe &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case CC2VAL: return LIT0 && SZ1  && CCg &&  TR1 &&  N2 &&  N3 && XCONDi;
+   case BSWAP:  return LIT0 && SZ4  && CC0 &&  TR1 &&  N2 &&  N3 && XOTHER;
+   case JIFZ:   return LIT1 && SZ4  && CC0 &&  TR1 &&  L2 &&  N3 && XOTHER;
+   case FPU_R:  
+   case FPU_W:  return LIT0 && SZf  && CC0 &&  Ls1 && TR2 &&  N3 && XOTHER;
+   case FPU:    return LIT0 && SZ0  && CCf &&  Ls1 &&  N2 &&  N3 && XOTHER;
+   case LEA1:   return /*any*/ SZ4  && CC0 &&  TR1 && TR2 &&  N3 && XOTHER;
+   case LEA2:   return /*any*/ SZ4  && CC0 &&  TR1 && TR2 && TR3 && Xextra4b;
+   case INCEIP: return LIT0 && SZ0  && CC0 &&  Ls1 &&  N2 &&  N3 && XOTHER;
+   case CCALL:  return LIT1 && SZ0  && CC0 && 
+                       (u->argc > 0                   ? TR1 : N1) && 
+                       (u->argc > 1                   ? TR2 : N2) && 
+                       (u->argc > 2 || u->has_ret_val ? TR3 : N3) &&
+                       u->regparms_n <= u->argc && XCCALL;
+   default: 
+      if (VG_(needs).extended_UCode)
+         return SK_(saneExtUInstr)(beforeRA, beforeLiveness, u);
+      else {
+         VG_(printf)("unhandled opcode: %u.  Perhaps " 
+                     "VG_(needs).extended_UCode should be set?",
+                     u->opcode);
+         VG_(panic)("VG_(saneUInstr): unhandled opcode");
+      }
    }
-#  undef SZ4_IF_TR1
-#  undef CC0
-#  undef CC1
+#  undef LIT0
+#  undef LIT1
+#  undef LITm
 #  undef SZ4
 #  undef SZ2
 #  undef SZ1
 #  undef SZ0
+#  undef SZ42
+#  undef SZi
+#  undef SZf
+#  undef SZ4m
+#  undef emptyR
+#  undef emptyW
+#  undef CC0
+#  undef CCr
+#  undef CCw
+#  undef CCa
+#  undef CCb
+#  undef CCc
+#  undef CCd
+#  undef CCe
+#  undef CCf
+#  undef CCg
+#  undef CCj
 #  undef TR1
 #  undef TR2
 #  undef TR3
@@ -588,20 +564,42 @@
 #  undef AS2
 #  undef AS3
 #  undef L1
-#  undef Ls1
 #  undef L2
+#  undef Ls1
 #  undef Ls3
+#  undef TRL1
+#  undef TRAL1
 #  undef N1
 #  undef N2
 #  undef N3
-#  undef FLG_RD
-#  undef FLG_WR
-#  undef FLG_RD_WR_MAYBE 
+#  undef COND0
+#  undef EXTRA4b0
+#  undef SG_WD0
+#  undef JMPKIND0
+#  undef CCALL0
+#  undef Xextra4b
+#  undef XWIDEN
+#  undef XJMP
+#  undef XCCALL
+#  undef XOTHER
 }
 
+void VG_(saneUCodeBlock) ( UCodeBlock* cb )
+{
+   Int i;
+        
+   for (i = 0; i < cb->used; i++) {
+      Bool sane = VG_(saneUInstr)(True, True, &cb->instrs[i]);
+      if (!sane) {
+         VG_(printf)("Instruction failed sanity check:\n");
+         VG_(upUInstr)(i, &cb->instrs[i]);
+      }
+      vg_assert(sane);
+   }
+}
 
 /* Sanity checks to do with CALLMs in UCodeBlocks. */
-Bool VG_(saneUCodeBlock) ( UCodeBlock* cb )
+Bool VG_(saneUCodeBlockCalls) ( UCodeBlock* cb )
 {
    Int  callm = 0;
    Int  callm_s = 0;
@@ -687,6 +685,9 @@
 /*--- Printing uinstrs.                                    ---*/
 /*------------------------------------------------------------*/
 
+/* Global that dictates whether to print generated code at all stages */
+Bool VG_(print_codegen);
+
 Char* VG_(nameCondcode) ( Condcode cond )
 {
    switch (cond) {
@@ -734,14 +735,14 @@
 }
 
 
-static void ppUOperand ( UInstr* u, Int operandNo, Int sz, Bool parens )
+void VG_(ppUOperand) ( UInstr* u, Int operandNo, Int sz, Bool parens )
 {
    UInt tag, val;
    switch (operandNo) {
       case 1: tag = u->tag1; val = u->val1; break;
       case 2: tag = u->tag2; val = u->val2; break;
       case 3: tag = u->tag3; val = u->val3; break;
-      default: VG_(panic)("ppUOperand(1)");
+      default: VG_(panic)("VG_(ppUOperand)(1)");
    }
    if (tag == Literal) val = u->lit32;
 
@@ -754,7 +755,7 @@
       case NoValue: VG_(printf)("NoValue"); break;
       case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
       case SpillNo: VG_(printf)("spill%d", val); break;
-      default: VG_(panic)("ppUOperand(2)");
+      default: VG_(panic)("VG_(ppUOperand)(2)");
    }
    if (parens) VG_(printf)(")");
 }
@@ -786,10 +787,6 @@
    }
    if (!upper) VG_(panic)("vg_nameUOpcode: invalid !upper");
    switch (opc) {
-      case GETVF:   return "GETVF";
-      case PUTVF:   return "PUTVF";
-      case TAG1:    return "TAG1";
-      case TAG2:    return "TAG2";
       case CALLM_S: return "CALLM_S";
       case CALLM_E: return "CALLM_E";
       case INCEIP:  return "INCEIP";
@@ -808,8 +805,7 @@
       case JMP:     return "J"    ;
       case JIFZ:    return "JIFZ" ;
       case CALLM:   return "CALLM";
-      case CCALL_1_0: return "CCALL_1_0";
-      case CCALL_2_0: return "CCALL_2_0";
+      case CCALL:   return "CCALL";
       case PUSH:    return "PUSH" ;
       case POP:     return "POP"  ;
       case CLEAR:   return "CLEAR";
@@ -817,18 +813,61 @@
       case FPU_R:   return "FPU_R";
       case FPU_W:   return "FPU_W";
       case FPU:     return "FPU"  ;
-      case LOADV:   return "LOADV";
-      case STOREV:  return "STOREV";
-      case GETV:    return "GETV";
-      case PUTV:    return "PUTV";
-      case TESTV:   return "TESTV";
-      case SETV:    return "SETV";
-      default:      VG_(panic)("nameUOpcode: unhandled case");
+      default:
+         if (VG_(needs).extended_UCode)
+            return SK_(nameExtUOpcode)(opc);
+         else {
+            VG_(printf)("unhandled opcode: %u.  Perhaps " 
+                        "VG_(needs).extended_UCode should be set?",
+                        opc);
+            VG_(panic)("nameUOpcode: unhandled opcode");
+         }
    }
 }
 
+void ppRealRegsLiveness ( UInstr* u )
+{
+#  define PRINT_RREG_LIVENESS(realReg,s) \
+     VG_(printf)( IS_RREG_LIVE(VG_(realRegNumToRank)(realReg), \
+                               u->regs_live_after)             \
+                     ? s : "-");
 
-void VG_(ppUInstr) ( Int instrNo, UInstr* u )
+   VG_(printf)("[");
+   PRINT_RREG_LIVENESS(R_EAX, "a");
+   PRINT_RREG_LIVENESS(R_EBX, "b");
+   PRINT_RREG_LIVENESS(R_ECX, "c");
+   PRINT_RREG_LIVENESS(R_EDX, "d");
+   PRINT_RREG_LIVENESS(R_ESI, "S");
+   PRINT_RREG_LIVENESS(R_EDI, "D");
+   VG_(printf)("]");
+
+#  undef PRINT_RREG_LIVENESS
+}
+
+/* Ugly-print UInstr :) */
+void VG_(upUInstr) ( Int i, UInstr* u )
+{
+   VG_(ppUInstrWithRegs)(i, u);
+   
+   VG_(printf)("opcode:          %d\n", u->opcode);
+   VG_(printf)("lit32:           %x\n", u->lit32);
+   VG_(printf)("size:            %d\n", u->size);
+   VG_(printf)("val1,val2,val3:  %d, %d, %d\n", u->val1, u->val2, u->val3);
+   VG_(printf)("tag1,tag2,tag3:  %d, %d, %d\n", u->tag1, u->tag2, u->tag3);
+   VG_(printf)("flags_r:         %x\n", u->flags_r);
+   VG_(printf)("flags_w:         %x\n", u->flags_w);
+   VG_(printf)("extra4b:         %x\n", u->extra4b);
+   VG_(printf)("cond:            %x\n", u->cond);
+   VG_(printf)("signed_widen:    %d\n", u->signed_widen);
+   VG_(printf)("jmpkind:         %d\n", u->jmpkind);
+   VG_(printf)("argc,regparms_n: %d, %d\n", u->argc, u->regparms_n);
+   VG_(printf)("has_ret_val:     %d\n", u->has_ret_val);
+   VG_(printf)("regs_live_after: ");
+   ppRealRegsLiveness(u);
+   VG_(printf)("\n");
+}
+
+void ppUInstrWorker ( Int instrNo, UInstr* u, Bool ppRegsLiveness )
 {
    VG_(printf)("\t%4d: %s", instrNo, 
                             VG_(nameUOpcode)(True, u->opcode));
@@ -846,24 +885,6 @@
 
    switch (u->opcode) {
 
-      case TAG1:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, 4, False);
-         VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
-         ppUOperand(u, 1, 4, False);
-         VG_(printf)(" )");
-         break;
-
-      case TAG2:
-         VG_(printf)("\t");
-         ppUOperand(u, 2, 4, False);
-         VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
-         ppUOperand(u, 1, 4, False);
-         VG_(printf)(", ");
-         ppUOperand(u, 2, 4, False);
-         VG_(printf)(" )");
-         break;
-
       case CALLM_S: case CALLM_E:
          break;
 
@@ -873,18 +894,18 @@
 
       case LEA2:
          VG_(printf)("\t%d(" , u->lit32);
-         ppUOperand(u, 1, 4, False);
+         VG_(ppUOperand)(u, 1, 4, False);
          VG_(printf)(",");
-         ppUOperand(u, 2, 4, False);
+         VG_(ppUOperand)(u, 2, 4, False);
          VG_(printf)(",%d), ", (Int)u->extra4b);
-         ppUOperand(u, 3, 4, False);
+         VG_(ppUOperand)(u, 3, 4, False);
          break;
 
       case LEA1:
          VG_(printf)("\t%d" , u->lit32);
-         ppUOperand(u, 1, 4, True);
+         VG_(ppUOperand)(u, 1, 4, True);
          VG_(printf)(", ");
-         ppUOperand(u, 2, 4, False);
+         VG_(ppUOperand)(u, 2, 4, False);
          break;
 
       case NOP:
@@ -893,12 +914,12 @@
       case FPU_W:
          VG_(printf)("\t0x%x:0x%x, ",
                      (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
-         ppUOperand(u, 2, 4, True);
+         VG_(ppUOperand)(u, 2, 4, True);
          break;
 
       case FPU_R:
          VG_(printf)("\t");
-         ppUOperand(u, 2, 4, True);
+         VG_(ppUOperand)(u, 2, 4, True);
          VG_(printf)(", 0x%x:0x%x",
                      (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
          break;
@@ -908,97 +929,93 @@
                      (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
          break;
 
-      case STOREV: case LOADV:
       case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
          VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, u->opcode==LOAD || u->opcode==LOADV); 
+         VG_(ppUOperand)(u, 1, u->size, u->opcode==LOAD); 
          VG_(printf)(", ");
-         ppUOperand(u, 2, u->size, u->opcode==STORE || u->opcode==STOREV);
+         VG_(ppUOperand)(u, 2, u->size, u->opcode==STORE);
+         break;
+
+      case JMP:
+         switch (u->jmpkind) {
+            case JmpCall:      VG_(printf)("-c"); break;
+            case JmpRet:       VG_(printf)("-r"); break;
+            case JmpSyscall:   VG_(printf)("-sys"); break;
+            case JmpClientReq: VG_(printf)("-cli"); break;
+            default: break;
+         }
+         VG_(printf)("\t");
+         VG_(ppUOperand)(u, 1, u->size, False);
+         if (CondAlways == u->cond) {
+            /* Print x86 instruction size if filled in */
+            if (0 != u->extra4b)
+               VG_(printf)("  ($%u)", u->extra4b);
+         }
          break;
 
       case GETF: case PUTF:
+      case CC2VAL: case PUSH: case POP: case CLEAR: case CALLM:
+      case NOT: case NEG: case INC: case DEC: case BSWAP:
          VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False);
+         VG_(ppUOperand)(u, 1, u->size, False);
          break;
 
-      case JMP: case CC2VAL:
-      case PUSH: case POP: case CLEAR: case CALLM:
-         if (u->opcode == JMP) {
-            switch (u->jmpkind) {
-               case JmpCall:      VG_(printf)("-c"); break;
-               case JmpRet:       VG_(printf)("-r"); break;
-               case JmpSyscall:   VG_(printf)("-sys"); break;
-               case JmpClientReq: VG_(printf)("-cli"); break;
-               default: break;
-            }
+      /* Print a "(s)" after args passed on stack */
+      case CCALL:
+         VG_(printf)("\t");
+         if (u->has_ret_val) {
+            VG_(ppUOperand)(u, 3, 0, False);
+            VG_(printf)(" = ");
          }
-         VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False);
-         break;
-
-      case CCALL_1_0:
-         VG_(printf)(" ");
-         ppUOperand(u, 1, 0, False);
-         VG_(printf)(" (%u)", u->lit32);
-         break;
-
-      case CCALL_2_0:
-         VG_(printf)(" ");
-         ppUOperand(u, 1, 0, False);
-         VG_(printf)(", ");
-         ppUOperand(u, 2, 0, False);
-         VG_(printf)(" (%u)", u->lit32);
+         VG_(printf)("%p(", u->lit32);
+         if (u->argc > 0) {
+            VG_(ppUOperand)(u, 1, 0, False);
+            if (u->regparms_n < 1)
+               VG_(printf)("(s)");
+         }
+         if (u->argc > 1) {
+            VG_(printf)(", ");
+            VG_(ppUOperand)(u, 2, 0, False);
+            if (u->regparms_n < 2)
+               VG_(printf)("(s)");
+         }
+         if (u->argc > 2) {
+            VG_(printf)(", ");
+            VG_(ppUOperand)(u, 3, 0, False);
+            if (u->regparms_n < 3)
+               VG_(printf)("(s)");
+         }
+         VG_(printf)(") ");
          break;
 
       case JIFZ:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False);
-         VG_(printf)(", ");
-         ppUOperand(u, 2, u->size, False);
-         break;
-
-      case PUTVF: case GETVF:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, 0, False); 
-         break;
-
-      case NOT: case NEG: case INC: case DEC: case BSWAP:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False); 
-         break;
-
       case ADD: case ADC: case AND: case OR:  
       case XOR: case SUB: case SBB:   
       case SHL: case SHR: case SAR: 
       case ROL: case ROR: case RCL: case RCR:   
          VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False); 
+         VG_(ppUOperand)(u, 1, u->size, False); 
          VG_(printf)(", ");
-         ppUOperand(u, 2, u->size, False);
-         break;
-
-      case GETV: case PUTV:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, u->opcode==PUTV ? 4 : u->size, False);
-         VG_(printf)(", ");
-         ppUOperand(u, 2, u->opcode==GETV ? 4 : u->size, False);
+         VG_(ppUOperand)(u, 2, u->size, False);
          break;
 
       case WIDEN:
          VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
                               u->signed_widen?'s':'z');
          VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False);
+         VG_(ppUOperand)(u, 1, u->size, False);
          break;
 
-      case TESTV: case SETV:
-         VG_(printf)("\t");
-         ppUOperand(u, 1, u->size, False);
-         break;
-
-      default: VG_(panic)("ppUInstr: unhandled opcode");
+      default: 
+         if (VG_(needs).extended_UCode)
+            SK_(ppExtUInstr)(u);
+         else {
+            VG_(printf)("unhandled opcode: %u.  Perhaps " 
+                        "VG_(needs).extended_UCode should be set?",
+                        u->opcode);
+            VG_(panic)("ppUInstr: unhandled opcode");
+         }
    }
-
    if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
       VG_(printf)("  (");
       if (u->flags_r != FlagsEmpty) 
@@ -1007,16 +1024,31 @@
          vg_ppFlagSet("-w", u->flags_w);
       VG_(printf)(")");
    }
+
+   if (ppRegsLiveness) {
+      VG_(printf)("\t\t");
+      ppRealRegsLiveness ( u );
+   }
+
    VG_(printf)("\n");
 }
 
+void VG_(ppUInstr) ( Int instrNo, UInstr* u )
+{
+   ppUInstrWorker ( instrNo, u, /*ppRegsLiveness*/False );
+}
+
+void VG_(ppUInstrWithRegs) ( Int instrNo, UInstr* u )
+{
+   ppUInstrWorker ( instrNo, u, /*ppRegsLiveness*/True );
+}
 
 void VG_(ppUCodeBlock) ( UCodeBlock* cb, Char* title )
 {
    Int i;
-   VG_(printf)("\n%s\n", title);
+   VG_(printf)("%s\n", title);
    for (i = 0; i < cb->used; i++)
-      if (0 || cb->instrs[i].opcode != NOP)
+      if (cb->instrs[i].opcode != NOP)
          VG_(ppUInstr) ( i, &cb->instrs[i] );
    VG_(printf)("\n");
 }
@@ -1027,43 +1059,35 @@
 /*--- and code improvement.                                ---*/
 /*------------------------------------------------------------*/
 
-/* A structure for communicating temp uses, and for indicating
-   temp->real register mappings for patchUInstr. */
-typedef
-   struct {
-      Int   realNo;
-      Int   tempNo;
-      Bool  isWrite;
-   }
-   TempUse;
-
-
-/* Get the temp use of a uinstr, parking them in an array supplied by
+/* Get the temp/reg use of a uinstr, parking them in an array supplied by
    the caller, which is assumed to be big enough.  Return the number
    of entries.  Insns which read _and_ write a register wind up
    mentioning it twice.  Entries are placed in the array in program
    order, so that if a reg is read-modified-written, it appears first
-   as a read and then as a write.  
+   as a read and then as a write.  'tag' indicates whether we are looking at
+   TempRegs or RealRegs.
 */
-static __inline__ 
-Int getTempUsage ( UInstr* u, TempUse* arr )
+__inline__
+Int VG_(getRegUsage) ( UInstr* u, Tag tag, RegUse* arr )
 {
-
-#  define RD(ono)                                  \
-      if (mycat(u->tag,ono) == TempReg)            \
-         { arr[n].tempNo  = mycat(u->val,ono);     \
-           arr[n].isWrite = False; n++; }
-#  define WR(ono)                                  \
-      if (mycat(u->tag,ono) == TempReg)            \
-         { arr[n].tempNo  = mycat(u->val,ono);     \
-           arr[n].isWrite = True; n++; }
+#  define RD(ono)    VG_UINSTR_READS_REG(ono)
+#  define WR(ono)    VG_UINSTR_WRITES_REG(ono)
 
    Int n = 0;
    switch (u->opcode) {
       case LEA1: RD(1); WR(2); break;
       case LEA2: RD(1); RD(2); WR(3); break;
 
-      case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E: break;
+      case NOP:   case FPU:   case INCEIP: case CALLM_S: case CALLM_E:
+      case CLEAR: case CALLM: break;
+
+      case CCALL:
+         if (u->argc > 0)    RD(1); 
+         if (u->argc > 1)    RD(2); 
+         if (u->argc > 2)    RD(3); 
+         if (u->has_ret_val) WR(3);
+         break;
+
       case FPU_R: case FPU_W: RD(2); break;
 
       case GETF:  WR(1); break;
@@ -1072,16 +1096,14 @@
       case GET:   WR(2); break;
       case PUT:   RD(1); break;
       case LOAD:  RD(1); WR(2); break;
-      case STORE: case CCALL_2_0: RD(1); RD(2); break;
+      case STORE: RD(1); RD(2); break;
       case MOV:   RD(1); WR(2); break;
 
       case JMP:   RD(1); break;
-      case CLEAR: case CALLM: break;
 
-      case PUSH: case CCALL_1_0: RD(1); break;
+      case PUSH: RD(1); break;
       case POP:  WR(1); break;
 
-      case TAG2:
       case CMOV:
       case ADD: case ADC: case AND: case OR:  
       case XOR: case SUB: case SBB:   
@@ -1091,7 +1113,7 @@
       case ROL: case ROR: case RCL: case RCR:
          RD(1); RD(2); WR(2); break;
 
-      case NOT: case NEG: case INC: case DEC: case TAG1: case BSWAP:
+      case NOT: case NEG: case INC: case DEC: case BSWAP:
          RD(1); WR(1); break;
 
       case WIDEN: RD(1); WR(1); break;
@@ -1099,19 +1121,15 @@
       case CC2VAL: WR(1); break;
       case JIFZ: RD(1); break;
 
-      /* These sizes are only ever consulted when the instrumentation
-         code is being added, so the following can return
-         manifestly-bogus sizes. */
-      case LOADV:   RD(1); WR(2); break;
-      case STOREV:  RD(1); RD(2); break;
-      case GETV:    WR(2); break;
-      case PUTV:    RD(1); break;
-      case TESTV:   RD(1); break;
-      case SETV:    WR(1); break;
-      case PUTVF:   RD(1); break;
-      case GETVF:   WR(1); break;
-
-      default: VG_(panic)("getTempUsage: unhandled opcode");
+      default:
+         if (VG_(needs).extended_UCode)
+            return SK_(getExtRegUsage)(u, tag, arr);
+         else {
+            VG_(printf)("unhandled opcode: %u.  Perhaps " 
+                        "VG_(needs).extended_UCode should be set?",
+                        u->opcode);
+            VG_(panic)("VG_(getRegUsage): unhandled opcode");
+         }
    }
    return n;
 
@@ -1120,31 +1138,32 @@
 }
 
 
-/* Change temp regs in u into real regs, as directed by tmap. */
-static __inline__ 
-void patchUInstr ( UInstr* u, TempUse* tmap, Int n_tmap )
+/* Change temp regs in u into real regs, as directed by the
+ * temps[i]-->reals[i] mapping. */
+static __inline__
+void patchUInstr ( UInstr* u, RegUse temps[], UInt reals[], Int n_tmap )
 {
    Int i;
    if (u->tag1 == TempReg) {
       for (i = 0; i < n_tmap; i++)
-         if (tmap[i].tempNo == u->val1) break;
+         if (temps[i].num == u->val1) break;
       if (i == n_tmap) VG_(panic)("patchUInstr(1)");
       u->tag1 = RealReg;
-      u->val1 = tmap[i].realNo;
+      u->val1 = reals[i];
    }
    if (u->tag2 == TempReg) {
       for (i = 0; i < n_tmap; i++)
-         if (tmap[i].tempNo == u->val2) break;
+         if (temps[i].num == u->val2) break;
       if (i == n_tmap) VG_(panic)("patchUInstr(2)");
       u->tag2 = RealReg;
-      u->val2 = tmap[i].realNo;
+      u->val2 = reals[i];
    }
    if (u->tag3 == TempReg) {
       for (i = 0; i < n_tmap; i++)
-         if (tmap[i].tempNo == u->val3) break;
+         if (temps[i].num == u->val3) break;
       if (i == n_tmap) VG_(panic)("patchUInstr(3)");
       u->tag3 = RealReg;
-      u->val3 = tmap[i].realNo;
+      u->val3 = reals[i];
    }
 }
 
@@ -1166,7 +1185,9 @@
 
 
 /* If u reads an ArchReg, return the number of the containing arch
-   reg.  Otherwise return -1.  Used in redundant-PUT elimination. */
+   reg.  Otherwise return -1.  Used in redundant-PUT elimination.
+   Note that this is not required for skins extending UCode because
+   this happens before instrumentation. */
 static __inline__ 
 Int maybe_uinstrReadsArchReg ( UInstr* u )
 {
@@ -1211,10 +1232,10 @@
 Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
 {
    Int i, k;
-   TempUse tempUse[3];
-   k = getTempUsage ( u, &tempUse[0] );
+   RegUse tempUse[3];
+   k = VG_(getRegUsage) ( u, TempReg, &tempUse[0] );
    for (i = 0; i < k; i++)
-      if (tempUse[i].tempNo == tempreg)
+      if (tempUse[i].num == tempreg)
          return True;
    return False;
 }
@@ -1236,14 +1257,18 @@
    Int     i, j, k, m, n, ar, tr, told, actual_areg;
    Int     areg_map[8];
    Bool    annul_put[8];
-   TempUse tempUse[3];
+   RegUse  tempUse[3];
    UInstr* u;
    Bool    wr;
    Int*    last_live_before;
    FlagSet future_dead_flags;
 
+   if (dis) 
+      VG_(printf) ("Improvements:\n");
+
    if (cb->nextTemp > 0)
-      last_live_before = VG_(jitmalloc) ( cb->nextTemp * sizeof(Int) );
+      last_live_before = VG_(arena_malloc) ( VG_AR_JITTER, 
+                                             cb->nextTemp * sizeof(Int) );
    else
       last_live_before = NULL;
 
@@ -1259,11 +1284,11 @@
    for (i = cb->used-1; i >= 0; i--) {
       u = &cb->instrs[i];
 
-      k = getTempUsage(u, &tempUse[0]);
+      k = VG_(getRegUsage)(u, TempReg, &tempUse[0]);
 
       /* For each temp usage ... bwds in program order. */
       for (j = k-1; j >= 0; j--) {
-         tr = tempUse[j].tempNo;
+         tr = tempUse[j].num;
          wr = tempUse[j].isWrite;
          if (last_live_before[tr] == -1) {
             vg_assert(tr >= 0 && tr < cb->nextTemp);
@@ -1300,15 +1325,14 @@
                out here.  Annul this GET, rename tr to told for the
                rest of the block, and extend told's live range to that
                of tr.  */
-            u->opcode = NOP;
-            u->tag1 = u->tag2 = NoValue;
+            VG_(newNOP)(u);
             n = last_live_before[tr] + 1;
             if (n > cb->used) n = cb->used;
             last_live_before[told] = last_live_before[tr];
             last_live_before[tr] = i-1;
-            if (VG_(disassemble))
+            if (dis)
                VG_(printf)(
-                  "at %d: delete GET, rename t%d to t%d in (%d .. %d)\n", 
+                  "   at %2d: delete GET, rename t%d to t%d in (%d .. %d)\n", 
                   i, tr, told,i+1, n-1);
             for (m = i+1; m < n; m++) {
                if (cb->instrs[m].tag1 == TempReg 
@@ -1349,9 +1373,9 @@
                case ADC: case SBB:
                case SHL: case SHR: case SAR: case ROL: case ROR:
                case RCL: case RCR:
-                  if (VG_(disassemble)) 
+                  if (dis) 
                      VG_(printf)(
-                        "at %d: change ArchReg %S to TempReg t%d\n", 
+                        "   at %2d: change ArchReg %S to TempReg t%d\n", 
                         i, nameIReg(4,u->val1), areg_map[u->val1]);
                   u->tag1 = TempReg;
                   u->val1 = areg_map[u->val1];
@@ -1366,12 +1390,12 @@
          }
 
          /* boring insn; invalidate any mappings to temps it writes */
-         k = getTempUsage(u, &tempUse[0]);
+         k = VG_(getRegUsage)(u, TempReg, &tempUse[0]);
 
          for (j = 0; j < k; j++) {
             wr  = tempUse[j].isWrite;
             if (!wr) continue;
-            tr = tempUse[j].tempNo;
+            tr = tempUse[j].num;
             for (m = 0; m < 8; m++)
                if (areg_map[m] == tr) areg_map[m] = -1;
          }
@@ -1398,10 +1422,9 @@
          actual_areg = containingArchRegOf ( 4, u->val2 );
          if (annul_put[actual_areg]) {
             vg_assert(actual_areg != R_ESP);
-            u->opcode = NOP;
-            u->tag1 = u->tag2 = NoValue;
-            if (VG_(disassemble)) 
-               VG_(printf)("at %d: delete PUT\n", i );
+            VG_(newNOP)(u);
+            if (dis) 
+               VG_(printf)("   at %2d: delete PUT\n", i );
          } else {
             if (actual_areg != R_ESP)
                annul_put[actual_areg] = True;
@@ -1443,9 +1466,9 @@
       vg_assert(u->tag1 == TempReg);
       vg_assert(u->tag2 == TempReg);
       if (last_live_before[u->val1] == i) {
-         if (VG_(disassemble))
+         if (dis)
             VG_(printf)(
-               "at %d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
+               "   at %2d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
                i, u->val2, u->val1, i+1, last_live_before[u->val2] );
          for (j = i+1; j <= last_live_before[u->val2]; j++) {
             if (cb->instrs[j].tag1 == TempReg 
@@ -1457,8 +1480,7 @@
          }
          last_live_before[u->val1] = last_live_before[u->val2];
          last_live_before[u->val2] = i-1;
-         u->opcode = NOP;
-         u->tag1 = u->tag2 = NoValue;
+         VG_(newNOP)(u);
       }
    }
 
@@ -1495,8 +1517,8 @@
          this insn.*/
       if (u->flags_w != FlagsEmpty
           && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
-         if (VG_(disassemble)) {
-            VG_(printf)("at %d: annul flag write ", i);
+         if (dis) {
+            VG_(printf)("   at %2d: annul flag write ", i);
             vg_ppFlagSet("", u->flags_w);
             VG_(printf)(" due to later ");
             vg_ppFlagSet("", future_dead_flags);
@@ -1515,7 +1537,12 @@
    }
 
    if (last_live_before) 
-      VG_(jitfree) ( last_live_before );
+      VG_(arena_free) ( VG_AR_JITTER, last_live_before );
+
+   if (dis) {
+      VG_(printf)("\n");
+      VG_(ppUCodeBlock) ( cb, "Improved UCode:" );
+   }
 }
 
 
@@ -1570,7 +1597,8 @@
    Int          ss_busy_until_before[VG_MAX_SPILLSLOTS];
    Int          i, j, k, m, r, tno, max_ss_no;
    Bool         wr, defer, isRead, spill_reqd;
-   TempUse      tempUse[3];
+   UInt         realUse[3];
+   RegUse       tempUse[3];
    UCodeBlock*  c2;
 
    /* Used to denote ... well, "no value" in this fn. */
@@ -1578,7 +1606,8 @@
 
    /* Initialise the TempReg info.  */
    if (c1->nextTemp > 0)
-      temp_info = VG_(jitmalloc)(c1->nextTemp * sizeof(TempInfo) );
+      temp_info = VG_(arena_malloc)(VG_AR_JITTER,
+                                    c1->nextTemp * sizeof(TempInfo) );
    else
       temp_info = NULL;
 
@@ -1594,12 +1623,12 @@
    /* Scan fwds to establish live ranges. */
 
    for (i = 0; i < c1->used; i++) {
-      k = getTempUsage(&c1->instrs[i], &tempUse[0]);
+      k = VG_(getRegUsage)(&c1->instrs[i], TempReg, &tempUse[0]);
       vg_assert(k >= 0 && k <= 3);
 
       /* For each temp usage ... fwds in program order */
       for (j = 0; j < k; j++) {
-         tno = tempUse[j].tempNo;
+         tno = tempUse[j].num;
          wr  = tempUse[j].isWrite;
          if (wr) {
             /* Writes hold a reg live until after this insn. */
@@ -1662,26 +1691,30 @@
 
    /* Show live ranges and assigned spill slot nos. */
 
-   if (VG_(disassemble)) {
-      VG_(printf)("Live Range Assignments\n");
+   if (dis) {
+      VG_(printf)("Live range assignments:\n");
 
       for (i = 0; i < c1->nextTemp; i++) {
          if (temp_info[i].live_after == VG_NOTHING) 
             continue;
          VG_(printf)(
-            "   LR %d is   after %d to before %d   spillno %d\n",
+            "   LR %d is  after %d to before %d\tspillno %d\n",
             i,
             temp_info[i].live_after,
             temp_info[i].dead_before,
             temp_info[i].spill_no
          );
       }
+      VG_(printf)("\n");
    }
 
    /* Now that we've established a spill slot number for each used
       temporary, we can go ahead and do the core of the "Second-chance
       binpacking" allocation algorithm. */
 
+   if (dis) VG_(printf)("Register allocated UCode:\n");
+      
+
    /* Resulting code goes here.  We generate it all in a forwards
       pass. */
    c2 = VG_(allocCodeBlock)();
@@ -1694,9 +1727,6 @@
    for (i = 0; i < c1->nextTemp; i++)
       temp_info[i].real_no = VG_NOTHING;
 
-   if (VG_(disassemble))
-      VG_(printf)("\n");
-
    /* Process each insn in turn. */
    for (i = 0; i < c1->used; i++) {
 
@@ -1721,14 +1751,14 @@
       }
 #     endif
 
-      if (VG_(disassemble))
+      if (dis)
          VG_(ppUInstr)(i, &c1->instrs[i]);
 
       /* First, free up enough real regs for this insn.  This may
          generate spill stores since we may have to evict some TempRegs
          currently in real regs.  Also generates spill loads. */
 
-      k = getTempUsage(&c1->instrs[i], &tempUse[0]);
+      k = VG_(getRegUsage)(&c1->instrs[i], TempReg, &tempUse[0]);
       vg_assert(k >= 0 && k <= 3);
 
       /* For each ***different*** temp mentioned in the insn .... */
@@ -1739,14 +1769,14 @@
             used by the insn once, even if it is mentioned more than
             once. */
          defer = False;
-         tno = tempUse[j].tempNo;
+         tno = tempUse[j].num;
          for (m = j+1; m < k; m++)
-            if (tempUse[m].tempNo == tno) 
+            if (tempUse[m].num == tno) 
                defer = True;
          if (defer) 
             continue;
 
-         /* Now we're trying to find a register for tempUse[j].tempNo.
+         /* Now we're trying to find a register for tempUse[j].num.
             First of all, if it already has a register assigned, we
             don't need to do anything more. */
          if (temp_info[tno].real_no != VG_NOTHING)
@@ -1772,7 +1802,7 @@
 
             Select r in 0 .. VG_MAX_REALREGS-1 such that
             real_to_temp[r] is not mentioned in 
-            tempUse[0 .. k-1].tempNo, since it would be just plain 
+            tempUse[0 .. k-1].num, since it would be just plain 
             wrong to eject some other TempReg which we need to use in 
             this insn.
 
@@ -1783,7 +1813,7 @@
          for (r = 0; r < VG_MAX_REALREGS; r++) {
             is_spill_cand[r] = True;
             for (m = 0; m < k; m++) {
-               if (real_to_temp[r] == tempUse[m].tempNo) {
+               if (real_to_temp[r] == tempUse[m].num) {
                   is_spill_cand[r] = False;
                   break;
                }
@@ -1834,28 +1864,28 @@
          temp_info[real_to_temp[r]].real_no = VG_NOTHING;
          if (temp_info[real_to_temp[r]].dead_before > i) {
             uInstr2(c2, PUT, 4, 
-                        RealReg, VG_(rankToRealRegNo)(r), 
+                        RealReg, VG_(rankToRealRegNum)(r), 
                         SpillNo, temp_info[real_to_temp[r]].spill_no);
             VG_(uinstrs_spill)++;
             spill_reqd = True;
-            if (VG_(disassemble))
+            if (dis)
                VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
          }
 
          /* Decide if tno is read. */
          isRead = False;
          for (m = 0; m < k; m++)
-            if (tempUse[m].tempNo == tno && !tempUse[m].isWrite) 
+            if (tempUse[m].num == tno && !tempUse[m].isWrite) 
                isRead = True;
 
          /* If so, generate a spill load. */
          if (isRead) {
             uInstr2(c2, GET, 4, 
                         SpillNo, temp_info[tno].spill_no, 
-                        RealReg, VG_(rankToRealRegNo)(r) );
+                        RealReg, VG_(rankToRealRegNum)(r) );
             VG_(uinstrs_spill)++;
             spill_reqd = True;
-            if (VG_(disassemble))
+            if (dis)
                VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
          }
 
@@ -1869,19 +1899,18 @@
          and use patchUInstr to convert its rTempRegs into
          realregs. */
       for (j = 0; j < k; j++)
-         tempUse[j].realNo 
-            = VG_(rankToRealRegNo)(temp_info[tempUse[j].tempNo].real_no);
+         realUse[j] = VG_(rankToRealRegNum)(temp_info[tempUse[j].num].real_no);
       VG_(copyUInstr)(c2, &c1->instrs[i]);
-      patchUInstr(&LAST_UINSTR(c2), &tempUse[0], k);
+      patchUInstr(&LAST_UINSTR(c2), &tempUse[0], &realUse[0], k);
 
-      if (VG_(disassemble)) {
+      if (dis) {
          VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
          VG_(printf)("\n");
       }
    }
 
    if (temp_info != NULL)
-      VG_(jitfree)(temp_info);
+      VG_(arena_free)(VG_AR_JITTER, temp_info);
 
    VG_(freeCodeBlock)(c1);
 
@@ -1893,1170 +1922,37 @@
 #  undef VG_NOTHING
 
 }
+extern void fooble(int);
+/* Analysis records liveness of all general-use RealRegs in the UCode. */
+static void vg_realreg_liveness_analysis ( UCodeBlock* cb )
+{        
+   Int      i, j, k;
+   RRegSet  rregs_live;
+   RegUse   regUse[3];
+   UInstr*  u;
 
-
-/*------------------------------------------------------------*/
-/*--- New instrumentation machinery.                       ---*/
-/*------------------------------------------------------------*/
-
-static
-VgTagOp get_VgT_ImproveOR_TQ ( Int sz )
-{
-   switch (sz) {
-      case 4: return VgT_ImproveOR4_TQ;
-      case 2: return VgT_ImproveOR2_TQ;
-      case 1: return VgT_ImproveOR1_TQ;
-      default: VG_(panic)("get_VgT_ImproveOR_TQ");
-   }
-}
-
-
-static
-VgTagOp get_VgT_ImproveAND_TQ ( Int sz )
-{
-   switch (sz) {
-      case 4: return VgT_ImproveAND4_TQ;
-      case 2: return VgT_ImproveAND2_TQ;
-      case 1: return VgT_ImproveAND1_TQ;
-      default: VG_(panic)("get_VgT_ImproveAND_TQ");
-   }
-}
-
-
-static
-VgTagOp get_VgT_Left ( Int sz )
-{
-   switch (sz) {
-      case 4: return VgT_Left4;
-      case 2: return VgT_Left2;
-      case 1: return VgT_Left1;
-      default: VG_(panic)("get_VgT_Left");
-   }
-}
-
-
-static
-VgTagOp get_VgT_UifU ( Int sz )
-{
-   switch (sz) {
-      case 4: return VgT_UifU4;
-      case 2: return VgT_UifU2;
-      case 1: return VgT_UifU1;
-      case 0: return VgT_UifU0;
-      default: VG_(panic)("get_VgT_UifU");
-   }
-}
-
-
-static
-VgTagOp get_VgT_DifD ( Int sz )
-{
-   switch (sz) {
-      case 4: return VgT_DifD4;
-      case 2: return VgT_DifD2;
-      case 1: return VgT_DifD1;
-      default: VG_(panic)("get_VgT_DifD");
-   }
-}
-
-
-static 
-VgTagOp get_VgT_PCast ( Int szs, Int szd )
-{
-   if (szs == 4 && szd == 0) return VgT_PCast40;
-   if (szs == 2 && szd == 0) return VgT_PCast20;
-   if (szs == 1 && szd == 0) return VgT_PCast10;
-   if (szs == 0 && szd == 1) return VgT_PCast01;
-   if (szs == 0 && szd == 2) return VgT_PCast02;
-   if (szs == 0 && szd == 4) return VgT_PCast04;
-   if (szs == 1 && szd == 4) return VgT_PCast14;
-   if (szs == 1 && szd == 2) return VgT_PCast12;
-   if (szs == 1 && szd == 1) return VgT_PCast11;
-   VG_(printf)("get_VgT_PCast(%d,%d)\n", szs, szd);
-   VG_(panic)("get_VgT_PCast");
-}
-
-
-static 
-VgTagOp get_VgT_Widen ( Bool syned, Int szs, Int szd )
-{
-   if (szs == 1 && szd == 2 && syned)  return VgT_SWiden12;
-   if (szs == 1 && szd == 2 && !syned) return VgT_ZWiden12;
-
-   if (szs == 1 && szd == 4 && syned)  return VgT_SWiden14;
-   if (szs == 1 && szd == 4 && !syned) return VgT_ZWiden14;
-
-   if (szs == 2 && szd == 4 && syned)  return VgT_SWiden24;
-   if (szs == 2 && szd == 4 && !syned) return VgT_ZWiden24;
-
-   VG_(printf)("get_VgT_Widen(%d,%d,%d)\n", (Int)syned, szs, szd);
-   VG_(panic)("get_VgT_Widen");
-}
-
-/* Pessimally cast the spec'd shadow from one size to another. */
-static 
-void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg )
-{
-   if (szs == 0 && szd == 0)
-      return;
-   uInstr3(cb, TAG1, 0, TempReg, tempreg, 
-                        NoValue, 0, 
-                        Lit16,   get_VgT_PCast(szs,szd));
-}
-
-
-/* Create a signed or unsigned widen of the spec'd shadow from one
-   size to another.  The only allowed size transitions are 1->2, 1->4
-   and 2->4. */
-static 
-void create_Widen ( UCodeBlock* cb, Bool signed_widen,
-                    Int szs, Int szd, Int tempreg )
-{
-   if (szs == szd) return;
-   uInstr3(cb, TAG1, 0, TempReg, tempreg, 
-                        NoValue, 0, 
-                        Lit16,   get_VgT_Widen(signed_widen,szs,szd));
-}
-
-
-/* Get the condition codes into a new shadow, at the given size. */
-static
-Int create_GETVF ( UCodeBlock* cb, Int sz )
-{
-   Int tt = newShadow(cb);
-   uInstr1(cb, GETVF, 0, TempReg, tt);
-   create_PCast(cb, 0, sz, tt);
-   return tt;
-}
-
-
-/* Save the condition codes from the spec'd shadow. */
-static
-void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg )
-{
-   if (sz == 0) {
-      uInstr1(cb, PUTVF, 0, TempReg, tempreg);
-   } else { 
-      Int tt = newShadow(cb);
-      uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt);
-      create_PCast(cb, sz, 0, tt);
-      uInstr1(cb, PUTVF, 0, TempReg, tt);
-   }
-}
-
-
-/* Do Left on the spec'd shadow. */
-static 
-void create_Left ( UCodeBlock* cb, Int sz, Int tempreg )
-{
-   uInstr3(cb, TAG1, 0, 
-               TempReg, tempreg,
-               NoValue, 0, 
-               Lit16, get_VgT_Left(sz));
-}
-
-
-/* Do UifU on ts and td, putting the result in td. */
-static 
-void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td )
-{
-   uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
-               Lit16, get_VgT_UifU(sz));
-}
-
-
-/* Do DifD on ts and td, putting the result in td. */
-static 
-void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td )
-{
-   uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
-               Lit16, get_VgT_DifD(sz));
-}
-
-
-/* Do HelpAND on value tval and tag tqqq, putting the result in
-   tqqq. */
-static 
-void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
-{
-   uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
-               Lit16, get_VgT_ImproveAND_TQ(sz));
-}
-
-
-/* Do HelpOR on value tval and tag tqqq, putting the result in
-   tqqq. */
-static 
-void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
-{
-   uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
-               Lit16, get_VgT_ImproveOR_TQ(sz));
-}
-
-
-/* Get the shadow for an operand described by (tag, val).  Emit code
-   to do this and return the identity of the shadow holding the
-   result.  The result tag is always copied into a new shadow, so it
-   can be modified without trashing the original.*/
-static
-Int /* TempReg */ getOperandShadow ( UCodeBlock* cb, 
-                                     Int sz, Int tag, Int val )
-{
-   Int sh;
-   sh = newShadow(cb);
-   if (tag == TempReg) {
-      uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh);
-      return sh;
-   }
-   if (tag == Literal) {
-      uInstr1(cb, SETV, sz, TempReg, sh);
-      return sh;
-   }
-   if (tag == ArchReg) {
-      uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh);
-      return sh;
-   }
-   VG_(panic)("getOperandShadow");
-}
-
-
-
-/* Create and return an instrumented version of cb_in.  Free cb_in
-   before returning. */
-static UCodeBlock* vg_instrument ( UCodeBlock* cb_in )
-{
-   UCodeBlock* cb;
-   Int         i, j;
-   UInstr*     u_in;
-   Int         qs, qd, qt, qtt;
-   cb = VG_(allocCodeBlock)();
-   cb->nextTemp = cb_in->nextTemp;
-
-   for (i = 0; i < cb_in->used; i++) {
-      qs = qd = qt = qtt = INVALID_TEMPREG;
-      u_in = &cb_in->instrs[i];
-
-      /* if (i > 0) uInstr1(cb, NOP, 0, NoValue, 0); */
-
-      /* VG_(ppUInstr)(0, u_in); */
-      switch (u_in->opcode) {
-
-         case NOP:
-            break;
-
-         case INCEIP:
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Loads and stores.  Test the V bits for the address.  24
-            Mar 02: since the address is A-checked anyway, there's not
-            really much point in doing the V-check too, unless you
-            think that you might use addresses which are undefined but
-            still addressible.  Hence the optionalisation of the V
-            check.
-
-            The LOADV/STOREV does an addressibility check for the
-            address. */
-
-         case LOAD: 
-            if (VG_(clo_check_addrVs)) {
-               uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
-               uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
-            }
-            uInstr2(cb, LOADV, u_in->size, 
-                        TempReg, u_in->val1,
-                        TempReg, SHADOW(u_in->val2));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         case STORE:
-            if (VG_(clo_check_addrVs)) {
-               uInstr1(cb, TESTV,  4, TempReg, SHADOW(u_in->val2));
-               uInstr1(cb, SETV,   4, TempReg, SHADOW(u_in->val2));
-            }
-            uInstr2(cb, STOREV, u_in->size,
-                        TempReg, SHADOW(u_in->val1), 
-                        TempReg, u_in->val2);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Moving stuff around.  Make the V bits follow accordingly,
-            but don't do anything else.  */
-
-         case GET:
-            uInstr2(cb, GETV, u_in->size,
-                        ArchReg, u_in->val1,
-                        TempReg, SHADOW(u_in->val2));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         case PUT:
-            uInstr2(cb, PUTV, u_in->size, 
-                        TempReg, SHADOW(u_in->val1),
-                        ArchReg, u_in->val2);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         case GETF:
-            /* This is not the smartest way to do it, but should work. */
-            qd = create_GETVF(cb, u_in->size);
-            uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         case PUTF:
-            create_PUTVF(cb, u_in->size, SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         case MOV:
-            switch (u_in->tag1) {
-               case TempReg: 
-                  uInstr2(cb, MOV, 4,
-                              TempReg, SHADOW(u_in->val1),
-                              TempReg, SHADOW(u_in->val2));
-                  break;
-               case Literal: 
-                  uInstr1(cb, SETV, u_in->size, 
-                              TempReg, SHADOW(u_in->val2));
-                  break;
-               default: 
-                  VG_(panic)("vg_instrument: MOV");
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Special case of add, where one of the operands is a literal.
-            lea1(t) = t + some literal.
-            Therefore: lea1#(qa) = left(qa) 
-         */
-         case LEA1:
-            vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
-            qs = SHADOW(u_in->val1);
-            qd = SHADOW(u_in->val2);
-            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd);
-            create_Left(cb, u_in->size, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Another form of add.  
-            lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal
-                                and is 0,1,2 or 3.
-            lea2#(qs,qt) = left(qs `UifU` (qt << shift)).
-            Note, subtly, that the shift puts zeroes at the bottom of qt,
-            meaning Valid, since the corresponding shift of tt puts 
-            zeroes at the bottom of tb.
-         */
-         case LEA2: {
-            Int shift;
-            vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
-            switch (u_in->extra4b) {
-               case 1: shift = 0; break;
-               case 2: shift = 1; break;
-               case 4: shift = 2; break;
-               case 8: shift = 3; break;
-               default: VG_(panic)( "vg_instrument(LEA2)" );
-            }
-            qs = SHADOW(u_in->val1);
-            qt = SHADOW(u_in->val2);
-            qd = SHADOW(u_in->val3);
-            uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd);
-            if (shift > 0) {
-               uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd);
-               uLiteral(cb, shift);
-            }
-            create_UifU(cb, 4, qs, qd);
-            create_Left(cb, u_in->size, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         }
-
-         /* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */
-         case INC: case DEC:
-            qd = SHADOW(u_in->val1);
-            create_Left(cb, u_in->size, qd);
-            if (u_in->flags_w != FlagsEmpty)
-               create_PUTVF(cb, u_in->size, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* This is a HACK (approximation :-) */
-         /* rcl#/rcr#(qs,qd) 
-               = let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags#
-                 eflags# = q0
-                 qd =pcast-0-sz(q0)
-            Ie, cast everything down to a single bit, then back up.
-            This assumes that any bad bits infect the whole word and 
-            the eflags.
-         */
-         case RCL: case RCR:
-	    vg_assert(u_in->flags_r != FlagsEmpty);
-            /* The following assertion looks like it makes sense, but is
-               actually wrong.  Consider this:
-                  rcll    %eax
-                  imull   %eax, %eax
-               The rcll writes O and C but so does the imull, so the O and C 
-               write of the rcll is annulled by the prior improvement pass.
-               Noticed by Kevin Ryde <user42@zip.com.au>
-            */
-	    /* vg_assert(u_in->flags_w != FlagsEmpty); */
-            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
-            /* We can safely modify qs; cast it to 0-size. */
-            create_PCast(cb, u_in->size, 0, qs);
-            qd = SHADOW(u_in->val2);
-            create_PCast(cb, u_in->size, 0, qd);
-            /* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */
-            create_UifU(cb, 0, qs, qd);
-            /* qs is now free; reuse it for the flag definedness. */
-            qs = create_GETVF(cb, 0);
-            create_UifU(cb, 0, qs, qd);
-            create_PUTVF(cb, 0, qd);
-            create_PCast(cb, 0, u_in->size, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* for OP in shl shr sar rol ror
-            (qs is shift count#, qd is value to be OP#d)
-            OP(ts,td)
-            OP#(qs,qd)
-               = pcast-1-sz(qs) `UifU` OP(ts,qd)
-            So we apply OP to the tag bits too, and then UifU with
-            the shift count# to take account of the possibility of it
-            being undefined.
+   /* All regs are dead at the end of the block */
+   rregs_live = ALL_RREGS_DEAD;
             
-            A bit subtle:
-               ROL/ROR rearrange the tag bits as per the value bits.
-               SHL/SHR shifts zeroes into the value, and corresponding 
-                  zeroes indicating Definedness into the tag.
-               SAR copies the top bit of the value downwards, and therefore
-                  SAR also copies the definedness of the top bit too.
-            So in all five cases, we just apply the same op to the tag 
-            bits as is applied to the value bits.  Neat!
-         */
-         case SHL:
-         case SHR: case SAR:
-         case ROL: case ROR: {
-            Int t_amount = INVALID_TEMPREG;
-            vg_assert(u_in->tag1 == TempReg || u_in->tag1 == Literal);
-            vg_assert(u_in->tag2 == TempReg);
-            qd = SHADOW(u_in->val2);
-
-            /* Make qs hold shift-count# and make
-               t_amount be a TempReg holding the shift count. */
-            if (u_in->tag1 == Literal) {
-               t_amount = newTemp(cb);
-               uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount);
-               uLiteral(cb, u_in->lit32);
-               qs = SHADOW(t_amount);
-               uInstr1(cb, SETV, 1, TempReg, qs);
-            } else {
-               t_amount = u_in->val1;
-               qs = SHADOW(u_in->val1);
-            }
-
-            uInstr2(cb, u_in->opcode, 
-                        u_in->size, 
-                        TempReg, t_amount, 
-                        TempReg, qd);
-            qt = newShadow(cb);
-            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
-            create_PCast(cb, 1, u_in->size, qt);
-            create_UifU(cb, u_in->size, qt, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         }
-
-         /* One simple tag operation. */
-         case WIDEN:
-            vg_assert(u_in->tag1 == TempReg);
-            create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size, 
-                             SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* not#(x) = x (since bitwise independent) */
-         case NOT:
-            vg_assert(u_in->tag1 == TempReg);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* neg#(x) = left(x) (derivable from case for SUB) */
-         case NEG:
-            vg_assert(u_in->tag1 == TempReg);
-            create_Left(cb, u_in->size, SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* bswap#(x) = bswap(x) */
-         case BSWAP:
-            vg_assert(u_in->tag1 == TempReg);
-            vg_assert(u_in->size == 4);
-            qd = SHADOW(u_in->val1);
-            uInstr1(cb, BSWAP, 4, TempReg, qd);
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* cc2val#(qd) = pcast-0-to-size(eflags#) */
-         case CC2VAL:
-            vg_assert(u_in->tag1 == TempReg);
-            vg_assert(u_in->flags_r != FlagsEmpty);
-            qt = create_GETVF(cb, u_in->size);
-            uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* cmov#(qs,qd) = cmov(qs,qd)
-            That is, do the cmov of tags using the same flags as for
-            the data (obviously).  However, first do a test on the 
-            validity of the flags.
-         */
-         case CMOV:
-            vg_assert(u_in->size == 4);
-            vg_assert(u_in->tag1 == TempReg);
-            vg_assert(u_in->tag2 == TempReg);
-            vg_assert(u_in->flags_r != FlagsEmpty);
-            vg_assert(u_in->flags_w == FlagsEmpty);
-            qs = SHADOW(u_in->val1);
-            qd = SHADOW(u_in->val2);
-            qt = create_GETVF(cb, 0);
-            uInstr1(cb, TESTV, 0, TempReg, qt);
-            /* qt should never be referred to again.  Nevertheless
-               ... */
-            uInstr1(cb, SETV, 0, TempReg, qt);
-
-            uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd);
-            LAST_UINSTR(cb).cond    = u_in->cond;
-            LAST_UINSTR(cb).flags_r = u_in->flags_r;
-
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* add#/sub#(qs,qd) 
-               = qs `UifU` qd `UifU` left(qs) `UifU` left(qd)
-               = left(qs) `UifU` left(qd)
-               = left(qs `UifU` qd)
-            adc#/sbb#(qs,qd)
-               = left(qs `UifU` qd) `UifU` pcast(eflags#)
-            Second arg (dest) is TempReg.
-            First arg (src) is Literal or TempReg or ArchReg. 
-         */
-         case ADD: case SUB:
-         case ADC: case SBB:
-            qd = SHADOW(u_in->val2);
-            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
-            create_UifU(cb, u_in->size, qs, qd);
-            create_Left(cb, u_in->size, qd);
-            if (u_in->opcode == ADC || u_in->opcode == SBB) {
-               vg_assert(u_in->flags_r != FlagsEmpty);
-               qt = create_GETVF(cb, u_in->size);
-               create_UifU(cb, u_in->size, qt, qd);
-            }
-            if (u_in->flags_w != FlagsEmpty) {
-               create_PUTVF(cb, u_in->size, qd);
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* xor#(qs,qd) = qs `UifU` qd */
-         case XOR:
-            qd = SHADOW(u_in->val2);
-            qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
-            create_UifU(cb, u_in->size, qs, qd);
-            if (u_in->flags_w != FlagsEmpty) {
-               create_PUTVF(cb, u_in->size, qd);
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* and#/or#(qs,qd) 
-               = (qs `UifU` qd) `DifD` improve(vs,qs) 
-                                `DifD` improve(vd,qd)
-            where improve is the relevant one of
-                Improve{AND,OR}_TQ
-            Use the following steps, with qt as a temp:
-               qt = improve(vd,qd)
-               qd = qs `UifU` qd
-               qd = qt `DifD` qd
-               qt = improve(vs,qs)
-               qd = qt `DifD` qd
-         */
-         case AND: case OR:
-            vg_assert(u_in->tag1 == TempReg);
-            vg_assert(u_in->tag2 == TempReg);
-            qd = SHADOW(u_in->val2);
-            qs = SHADOW(u_in->val1);
-            qt = newShadow(cb);
-
-            /* qt = improve(vd,qd) */
-            uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt);
-            if (u_in->opcode == AND)
-               create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt);
-            else
-               create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt);
-            /* qd = qs `UifU` qd */
-            create_UifU(cb, u_in->size, qs, qd);
-            /* qd = qt `DifD` qd */
-            create_DifD(cb, u_in->size, qt, qd);
-            /* qt = improve(vs,qs) */
-            uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
-            if (u_in->opcode == AND)
-               create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt);
-            else
-               create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt);
-            /* qd = qt `DifD` qd */
-               create_DifD(cb, u_in->size, qt, qd);
-            /* So, finally qd is the result tag. */
-            if (u_in->flags_w != FlagsEmpty) {
-               create_PUTVF(cb, u_in->size, qd);
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Machinery to do with supporting CALLM.  Copy the start and
-            end markers only to make the result easier to read
-            (debug); they generate no code and have no effect. 
-         */
-         case CALLM_S: case CALLM_E:
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Copy PUSH and POP verbatim.  Arg/result absval
-            calculations are done when the associated CALL is
-            processed.  CLEAR has no effect on absval calculations but
-            needs to be copied.  
-         */
-         case PUSH: case POP: case CLEAR:
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* In short:
-               callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#)
-            We have to decide on a size to do the computation at,
-            although the choice doesn't affect correctness.  We will
-            do a pcast to the final size anyway, so the only important
-            factor is to choose a size which minimises the total
-            number of casts needed.  Valgrind: just use size 0,
-            regardless.  It may not be very good for performance
-            but does simplify matters, mainly by reducing the number
-            of different pessimising casts which have to be implemented.
-         */
-         case CALLM: {
-            UInstr* uu;
-            Bool res_used;
-
-            /* Now generate the code.  Get the final result absval
-               into qt. */
-            qt  = newShadow(cb);
-            qtt = newShadow(cb);
-            uInstr1(cb, SETV, 0, TempReg, qt);
-            for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) {
-               uu = & cb_in->instrs[j];
-               if (uu->opcode != PUSH) continue;
-               /* cast via a temporary */
-               uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1),
-                                   TempReg, qtt);
-               create_PCast(cb, uu->size, 0, qtt);
-               create_UifU(cb, 0, qtt, qt);
-            }
-            /* Remembering also that flags read count as inputs. */
-            if (u_in->flags_r != FlagsEmpty) {
-               qtt = create_GETVF(cb, 0);
-               create_UifU(cb, 0, qtt, qt);
-            }
-
-            /* qt now holds the result tag.  If any results from the
-               call are used, either by fetching with POP or
-               implicitly by writing the flags, we copy the result
-               absval to the relevant location.  If not used, the call
-               must have been for its side effects, so we test qt here
-               and now.  Note that this assumes that all values
-               removed by POP continue to be live.  So dead args
-               *must* be removed with CLEAR, not by POPping them into
-               a dummy tempreg. 
-            */
-            res_used = False;
-            for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) {
-               uu = & cb_in->instrs[j];
-               if (uu->opcode != POP) continue;
-               /* Cast via a temp. */
-               uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt);
-               create_PCast(cb, 0, uu->size, qtt);
-               uInstr2(cb, MOV, 4, TempReg, qtt, 
-                                   TempReg, SHADOW(uu->val1));
-               res_used = True;
-            }
-            if (u_in->flags_w != FlagsEmpty) {
-               create_PUTVF(cb, 0, qt);
-               res_used = True;
-            }
-            if (!res_used) {
-               uInstr1(cb, TESTV, 0, TempReg, qt);
-               /* qt should never be referred to again.  Nevertheless
-                  ... */
-               uInstr1(cb, SETV, 0, TempReg, qt);
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-         }
-         /* Whew ... */
-
-         case JMP:
-            if (u_in->tag1 == TempReg) {
-               uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
-               uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
-            } else {
-               vg_assert(u_in->tag1 == Literal);
-            }
-            if (u_in->cond != CondAlways) {
-               vg_assert(u_in->flags_r != FlagsEmpty);
-               qt = create_GETVF(cb, 0);
-               uInstr1(cb, TESTV, 0, TempReg, qt);
-               /* qt should never be referred to again.  Nevertheless
-                  ... */
-               uInstr1(cb, SETV, 0, TempReg, qt);
-            }
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         case JIFZ:
-            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
-            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val1));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* Emit a check on the address used.  For FPU_R, the value
-            loaded into the FPU is checked at the time it is read from
-            memory (see synth_fpu_mem_check_actions).  */
-         case FPU_R: case FPU_W:
-            vg_assert(u_in->tag2 == TempReg);
-            uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
-            uInstr1(cb, SETV,  4, TempReg, SHADOW(u_in->val2));
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         /* For FPU insns not referencing memory, just copy thru. */
-         case FPU: 
-            VG_(copyUInstr)(cb, u_in);
-            break;
-
-         default:
-            VG_(ppUInstr)(0, u_in);
-            VG_(panic)( "vg_instrument: unhandled case");
-
-      } /* end of switch (u_in->opcode) */
-
-   } /* end of for loop */
-
-   VG_(freeCodeBlock)(cb_in);
-   return cb;
-}
-
-/*------------------------------------------------------------*/
-/*--- Clean up mem check instrumentation.                  ---*/
-/*------------------------------------------------------------*/
-
-#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1)
-#define VGC_UNDEF ((UChar)100)
-#define VGC_VALUE ((UChar)101)
-
-#define NOP_no_msg(uu)                                         \
-   do { uu->opcode = NOP; } while (False)
-
-#define NOP_tag1_op(uu)                                        \
-   do { uu->opcode = NOP;                                      \
-        if (VG_(disassemble))                                  \
-           VG_(printf)("at %d: delete %s due to defd arg\n",   \
-                       i, VG_(nameOfTagOp(u->val3)));          \
-   } while (False)
-
-#define SETV_tag1_op(uu,newsz)                                 \
-   do { uu->opcode = SETV;                                     \
-        uu->size = newsz;                                      \
-        uu->tag2 = uu->tag3 = NoValue;                         \
-        if (VG_(disassemble))                                  \
-           VG_(printf)("at %d: convert %s to SETV%d "          \
-                       "due to defd arg\n",                    \
-                       i, VG_(nameOfTagOp(u->val3)), newsz);   \
-   } while (False)
-
-
-
-/* Run backwards and delete SETVs on shadow temps for which the next
-   action is a write.  Needs an env saying whether or not the next
-   action is a write.  The supplied UCodeBlock is destructively
-   modified.
-*/
-static void vg_delete_redundant_SETVs ( UCodeBlock* cb )
-{
-   Bool*   next_is_write;
-   Int     i, j, k, n_temps;
-   UInstr* u;
-   TempUse tempUse[3];
-
-   n_temps = cb->nextTemp;
-   if (n_temps == 0) return;
-
-   next_is_write = VG_(jitmalloc)(n_temps * sizeof(Bool));
-
-   for (i = 0; i < n_temps; i++) next_is_write[i] = True;
-
    for (i = cb->used-1; i >= 0; i--) {
       u = &cb->instrs[i];
 
-      /* If we're not checking address V bits, there will be a lot of
-         GETVs, TAG1s and TAG2s calculating values which are never
-         used.  These first three cases get rid of them. */
+      u->regs_live_after = rregs_live;
 
-      if (u->opcode == GETV && VGC_IS_SHADOW(u->val2) 
-                            && next_is_write[u->val2]
-                            && !VG_(clo_check_addrVs)) {
-         u->opcode = NOP;
-         u->size = 0;
-         if (VG_(disassemble)) 
-            VG_(printf)("at %d: delete GETV\n", i);
-      } else
+      k = VG_(getRegUsage)(u, RealReg, regUse);
 
-      if (u->opcode == TAG1 && VGC_IS_SHADOW(u->val1) 
-                            && next_is_write[u->val1]
-                            && !VG_(clo_check_addrVs)) {
-         u->opcode = NOP;
-         u->size = 0;
-         if (VG_(disassemble)) 
-            VG_(printf)("at %d: delete TAG1\n", i);
-      } else
-
-      if (u->opcode == TAG2 && VGC_IS_SHADOW(u->val2) 
-                            && next_is_write[u->val2]
-                            && !VG_(clo_check_addrVs)) {
-         u->opcode = NOP;
-         u->size = 0;
-         if (VG_(disassemble)) 
-            VG_(printf)("at %d: delete TAG2\n", i);
-      } else
-
-      /* We do the rest of these regardless of whether or not
-         addresses are V-checked. */
-
-      if (u->opcode == MOV && VGC_IS_SHADOW(u->val2) 
-                           && next_is_write[u->val2]) {
-         /* This MOV is pointless because the target is dead at this
-            point.  Delete it. */
-         u->opcode = NOP;
-         u->size = 0;
-         if (VG_(disassemble)) 
-            VG_(printf)("at %d: delete MOV\n", i);
-      } else
-
-      if (u->opcode == SETV) {
-         if (u->tag1 == TempReg) {
-            vg_assert(VGC_IS_SHADOW(u->val1));
-            if (next_is_write[u->val1]) {
-               /* This write is pointless, so annul it. */
-               u->opcode = NOP;
-               u->size = 0;
-               if (VG_(disassemble)) 
-                  VG_(printf)("at %d: delete SETV\n", i);
-            } else {
-               /* This write has a purpose; don't annul it, but do
-                  notice that we did it. */
-               next_is_write[u->val1] = True;
-            }
-              
-         }
-
-      } else {
-         /* Find out what this insn does to the temps. */
-         k = getTempUsage(u, &tempUse[0]);
-         vg_assert(k <= 3);
-         for (j = k-1; j >= 0; j--) {
-            next_is_write[ tempUse[j].tempNo ]
-                         = tempUse[j].isWrite;
-         }
-      }
-
-   }
-
-   VG_(jitfree)(next_is_write);
-}
-
-
-/* Run forwards, propagating and using the is-completely-defined
-   property.  This removes a lot of redundant tag-munging code.
-   Unfortunately it requires intimate knowledge of how each uinstr and
-   tagop modifies its arguments.  This duplicates knowledge of uinstr
-   tempreg uses embodied in getTempUsage(), which is unfortunate. 
-   The supplied UCodeBlock* is modified in-place.
-
-   For each value temp, def[] should hold VGC_VALUE.
-
-   For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is
-   definitely known to be fully defined at that size.  In all other
-   circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly
-   undefined.  In cases of doubt, VGC_UNDEF is always safe.
-*/
-static void vg_propagate_definedness ( UCodeBlock* cb )
-{
-   UChar*  def;
-   Int     i, j, k, t, n_temps;
-   UInstr* u;
-   TempUse tempUse[3];
-
-   n_temps = cb->nextTemp;
-   if (n_temps == 0) return;
-
-   def = VG_(jitmalloc)(n_temps * sizeof(UChar));
-   for (i = 0; i < n_temps; i++) 
-      def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE;
-
-   /* Run forwards, detecting and using the all-defined property. */
-
-   for (i = 0; i < cb->used; i++) {
-      u = &cb->instrs[i];
-      switch (u->opcode) {
-
-      /* Tag-handling uinstrs. */
-
-         /* Deal with these quickly. */
-         case NOP:
-         case INCEIP:
-            break;
-
-         /* Make a tag defined. */
-         case SETV:
-            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-            def[u->val1] = u->size;
-            break;
-
-         /* Check definedness of a tag. */
-         case TESTV:
-            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-            if (def[u->val1] <= 4) { 
-               vg_assert(def[u->val1] == u->size); 
-               NOP_no_msg(u);
-               if (VG_(disassemble)) 
-                  VG_(printf)("at %d: delete TESTV on defd arg\n", i);
-            }
-            break;
-
-         /* Applies to both values and tags.  Propagate Definedness
-            property through copies.  Note that this isn't optional;
-            we *have* to do this to keep def[] correct. */
-         case MOV:
-            vg_assert(u->tag2 == TempReg);
-            if (u->tag1 == TempReg) {
-               if (VGC_IS_SHADOW(u->val1)) {
-                  vg_assert(VGC_IS_SHADOW(u->val2));
-                  def[u->val2] = def[u->val1];
-               }
-            }
-            break;
-
-         case PUTV:
-            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-            if (def[u->val1] <= 4) {
-               vg_assert(def[u->val1] == u->size);
-               u->tag1 = Literal;
-               u->val1 = 0;
-               switch (u->size) {
-                  case 4: u->lit32 = 0x00000000; break;
-                  case 2: u->lit32 = 0xFFFF0000; break;
-                  case 1: u->lit32 = 0xFFFFFF00; break;
-                  default: VG_(panic)("vg_cleanup(PUTV)");
-               }
-               if (VG_(disassemble)) 
-                  VG_(printf)(
-                     "at %d: propagate definedness into PUTV\n", i);
-            }
-            break;
-
-         case STOREV:
-            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-            if (def[u->val1] <= 4) {
-               vg_assert(def[u->val1] == u->size);
-               u->tag1 = Literal;
-               u->val1 = 0;
-               switch (u->size) {
-                  case 4: u->lit32 = 0x00000000; break;
-                  case 2: u->lit32 = 0xFFFF0000; break;
-                  case 1: u->lit32 = 0xFFFFFF00; break;
-                  default: VG_(panic)("vg_cleanup(STOREV)");
-               }
-               if (VG_(disassemble)) 
-                  VG_(printf)(
-                     "at %d: propagate definedness into STandV\n", i);
-            }
-            break;
-
-         /* Nothing interesting we can do with this, I think. */
-         case PUTVF:
-            break;
-
-         /* Tag handling operations. */
-         case TAG2:
-            vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
-            vg_assert(u->tag3 == Lit16);
-            /* Ultra-paranoid "type" checking. */
-            switch (u->val3) {
-               case VgT_ImproveAND4_TQ: case VgT_ImproveAND2_TQ:
-               case VgT_ImproveAND1_TQ: case VgT_ImproveOR4_TQ:
-               case VgT_ImproveOR2_TQ: case VgT_ImproveOR1_TQ:
-                  vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1));
-                  break;
-               default:
-                  vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-                  break;
-            }
-            switch (u->val3) {
-               Int sz;
-               case VgT_UifU4: 
-                  sz = 4; goto do_UifU;
-               case VgT_UifU2: 
-                  sz = 2; goto do_UifU;
-               case VgT_UifU1:
-                  sz = 1; goto do_UifU;
-               case VgT_UifU0:
-                  sz = 0; goto do_UifU;
-               do_UifU:
-                  vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-                  vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
-                  if (def[u->val1] <= 4) {
-                     /* UifU.  The first arg is defined, so result is
-                        simply second arg.  Delete this operation. */
-                     vg_assert(def[u->val1] == sz);
-                     NOP_no_msg(u);
-                     if (VG_(disassemble)) 
-                        VG_(printf)(
-                           "at %d: delete UifU%d due to defd arg1\n", 
-                           i, sz);
-                  }
-                  else 
-                  if (def[u->val2] <= 4) {
-                     /* UifU.  The second arg is defined, so result is
-                        simply first arg.  Copy to second. */
-                     vg_assert(def[u->val2] == sz);
-                     u->opcode = MOV; 
-                     u->size = 4;
-                     u->tag3 = NoValue;
-                     def[u->val2] = def[u->val1];
-                     if (VG_(disassemble)) 
-                        VG_(printf)(
-                           "at %d: change UifU%d to MOV due to defd"
-                           " arg2\n", 
-                           i, sz);
-                  }
-                  break;
-               case VgT_ImproveAND4_TQ:
-                  sz = 4; goto do_ImproveAND;
-               case VgT_ImproveAND1_TQ:
-                  sz = 1; goto do_ImproveAND;
-               do_ImproveAND:
-                  /* Implements Q = T OR Q.  So if Q is entirely defined,
-                     ie all 0s, we get MOV T, Q. */
-		  if (def[u->val2] <= 4) {
-                     vg_assert(def[u->val2] == sz);
-                     u->size = 4; /* Regardless of sz */
-                     u->opcode = MOV;
-                     u->tag3 = NoValue;
-                     def[u->val2] = VGC_UNDEF;
-                     if (VG_(disassemble)) 
-                        VG_(printf)(
-                            "at %d: change ImproveAND%d_TQ to MOV due "
-                            "to defd arg2\n", 
-                            i, sz);
-                  }
-                  break;
-               default: 
-                  goto unhandled;
-            }
-            break;
-
-         case TAG1:
-            vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
-            if (def[u->val1] > 4) break;
-            /* We now know that the arg to the op is entirely defined.
-               If the op changes the size of the arg, we must replace
-               it with a SETV at the new size.  If it doesn't change
-               the size, we can delete it completely. */
-            switch (u->val3) {
-               /* Maintain the same size ... */
-               case VgT_Left4: 
-                  vg_assert(def[u->val1] == 4);
-                  NOP_tag1_op(u);
-                  break;
-               case VgT_PCast11: 
-                  vg_assert(def[u->val1] == 1);
-                  NOP_tag1_op(u);
-                  break;
-               /* Change size ... */
-               case VgT_PCast40: 
-                  vg_assert(def[u->val1] == 4);
-                  SETV_tag1_op(u,0);
-                  def[u->val1] = 0;
-                  break;
-               case VgT_PCast14: 
-                  vg_assert(def[u->val1] == 1);
-                  SETV_tag1_op(u,4);
-                  def[u->val1] = 4;
-                  break;
-               case VgT_PCast12: 
-                  vg_assert(def[u->val1] == 1);
-                  SETV_tag1_op(u,2);
-                  def[u->val1] = 2;
-                  break;
-               case VgT_PCast10: 
-                  vg_assert(def[u->val1] == 1);
-                  SETV_tag1_op(u,0);
-                  def[u->val1] = 0;
-                  break;
-               case VgT_PCast02: 
-                  vg_assert(def[u->val1] == 0);
-                  SETV_tag1_op(u,2);
-                  def[u->val1] = 2;
-                  break;
-               default: 
-                  goto unhandled;
-            }
-            if (VG_(disassemble)) 
-               VG_(printf)(
-                  "at %d: delete TAG1 %s due to defd arg\n",
-                  i, VG_(nameOfTagOp(u->val3)));
-            break;
-
-         default:
-         unhandled:
-            /* We don't know how to handle this uinstr.  Be safe, and 
-               set to VGC_VALUE or VGC_UNDEF all temps written by it. */
-            k = getTempUsage(u, &tempUse[0]);
-            vg_assert(k <= 3);
-            for (j = 0; j < k; j++) {
-               t = tempUse[j].tempNo;
-               vg_assert(t >= 0 && t < n_temps);
-               if (!tempUse[j].isWrite) {
-                  /* t is read; ignore it. */
-                  if (0&& VGC_IS_SHADOW(t) && def[t] <= 4)
-                     VG_(printf)("ignoring def %d at %s %s\n", 
-                                 def[t], 
-                                 VG_(nameUOpcode)(True, u->opcode),
-                                 (u->opcode == TAG1 || u->opcode == TAG2)
-                                    ? VG_(nameOfTagOp)(u->val3) 
-                                    : (Char*)"");
-               } else {
-                  /* t is written; better nullify it. */
-                  def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE;
-               }
-            }
+      /* For each reg usage ... bwds in program order.  Variable is live
+         before this UInstr if it is read by this UInstr.
+         Note that regUse[j].num holds the Intel reg number, so we must
+         convert it to our rank number.  */
+      for (j = k-1; j >= 0; j--) {
+         SET_RREG_LIVENESS ( VG_(realRegNumToRank)(regUse[j].num),
+                             rregs_live,
+                             !regUse[j].isWrite );
       }
    }
-
-   VG_(jitfree)(def);
 }
 
-
-/* Top level post-instrumentation cleanup function. */
-static void vg_cleanup ( UCodeBlock* cb )
-{
-   vg_propagate_definedness ( cb );
-   vg_delete_redundant_SETVs ( cb );
-}
-
-
 /*------------------------------------------------------------*/
 /*--- Main entry point for the JITter.                     ---*/
 /*------------------------------------------------------------*/
@@ -3068,13 +1964,14 @@
    this call is being done for debugging purposes, in which case (a)
    throw away the translation once it is made, and (b) produce a load
    of debugging output. 
+
+   'tst' is the identity of the thread needing this block.
 */
-void VG_(translate) ( ThreadState* tst, 
-                         /* Identity of thread needing this block */
-                      Addr  orig_addr,
-                      UInt* orig_size,
-                      Addr* trans_addr,
-                      UInt* trans_size )
+void VG_(translate) ( /*IN*/  ThreadState* tst, 
+		      /*IN*/  Addr  orig_addr,  
+                      /*OUT*/ UInt* orig_size,
+                      /*OUT*/ Addr* trans_addr, 
+                      /*OUT*/ UInt* trans_size )
 {
    Int         n_disassembled_bytes, final_code_size;
    Bool        debugging_translation;
@@ -3085,109 +1982,82 @@
    debugging_translation
       = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
 
-   dis = True;
-   dis = debugging_translation;
+   if (!debugging_translation)
+      VG_TRACK( pre_mem_read, Vg_CoreTranslate, tst, "", orig_addr, 1 );
 
-   /* Check if we're being asked to jump to a silly address, and if so
-      record an error message before potentially crashing the entire
-      system. */
-   if (VG_(clo_instrument) && !debugging_translation && !dis) {
-      Addr bad_addr;
-      Bool ok = VGM_(check_readable) ( orig_addr, 1, &bad_addr );
-      if (!ok) {
-         VG_(record_jump_error)(tst, bad_addr);
-      }
-   }
-
-   /* if (VG_(overall_in_count) >= 4800) dis=True; */
-   if (VG_(disassemble))
-      VG_(printf)("\n");
-   if (0 || dis 
-       || (VG_(overall_in_count) > 0 &&
-           (VG_(overall_in_count) % 1000 == 0))) {
-      if (0&& (VG_(clo_verbosity) > 1 || dis))
-         VG_(message)(Vg_UserMsg,
-              "trans# %d, bb# %lu, in %d, out %d",
-              VG_(overall_in_count), 
-              VG_(bbs_done),
-              VG_(overall_in_osize), VG_(overall_in_tsize),
-              orig_addr );
-   }
    cb = VG_(allocCodeBlock)();
 
+   /* If doing any code printing, print a basic block start marker */
+   if (VG_(clo_trace_codegen)) {
+      Char fnname[64] = "";
+      VG_(get_fnname_if_entry)(orig_addr, fnname, 64);
+      VG_(printf)(
+              "==== BB %d %s(%p) in %dB, out %dB, BBs exec'd %lu ====\n\n",
+              VG_(overall_in_count), fnname, orig_addr, 
+              VG_(overall_in_osize), VG_(overall_in_tsize),
+              VG_(bbs_done));
+   }
+
+   /* True if a debug trans., or if bit N set in VG_(clo_trace_codegen). */
+#  define DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(n) \
+      ( debugging_translation || (VG_(clo_trace_codegen) & (1 << (n-1))) )
+
    /* Disassemble this basic block into cb. */
-   /* VGP_PUSHCC(VgpToUCode); */
+   VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(1);
+   VGP_PUSHCC(VgpToUCode);
    n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
-   /* VGP_POPCC; */
-   /* dis=True; */
-   /* if (0&& VG_(translations_done) < 617)  */
-   /*    dis=False; */
+   VGP_POPCC(VgpToUCode);
+
    /* Try and improve the code a bit. */
    if (VG_(clo_optimise)) {
-      /* VGP_PUSHCC(VgpImprove); */
+      VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(2);
+      VGP_PUSHCC(VgpImprove);
       vg_improve ( cb );
-      if (VG_(disassemble)) 
-         VG_(ppUCodeBlock) ( cb, "Improved code:" );
-      /* VGP_POPCC; */
-   }
-   /* dis=False; */
-   /* Add instrumentation code. */
-   if (VG_(clo_instrument)) {
-      /* VGP_PUSHCC(VgpInstrument); */
-      cb = vg_instrument(cb);
-      /* VGP_POPCC; */
-      if (VG_(disassemble)) 
-         VG_(ppUCodeBlock) ( cb, "Instrumented code:" );
-      if (VG_(clo_cleanup)) {
-         /* VGP_PUSHCC(VgpCleanup); */
-         vg_cleanup(cb);
-         /* VGP_POPCC; */
-         if (VG_(disassemble)) 
-            VG_(ppUCodeBlock) ( cb, "Cleaned-up instrumented code:" );
-      }
+      VGP_POPCC(VgpImprove);
    }
 
-   //VG_(disassemble) = True;
+   /* Skin's instrumentation (Nb: must set VG_(print_codegen) in case
+      SK_(instrument) looks at it. */
+   VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(3);
+   VGP_PUSHCC(VgpInstrument);
+   cb = SK_(instrument) ( cb, orig_addr );
+   if (VG_(print_codegen))
+      VG_(ppUCodeBlock) ( cb, "Instrumented UCode:" );
+   VG_(saneUCodeBlock)( cb );
+   VGP_POPCC(VgpInstrument);
 
-   /* Add cache simulation code. */
-   if (VG_(clo_cachesim)) {
-      /* VGP_PUSHCC(VgpCacheInstrument); */
-      cb = VG_(cachesim_instrument)(cb, orig_addr);
-      /* VGP_POPCC; */
-      if (VG_(disassemble)) 
-         VG_(ppUCodeBlock) ( cb, "Cachesim instrumented code:" );
-   }
-   
-   //VG_(disassemble) = False;
-   
    /* Allocate registers. */
-   /* VGP_PUSHCC(VgpRegAlloc); */
+   VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(4);
+   VGP_PUSHCC(VgpRegAlloc);
    cb = vg_do_register_allocation ( cb );
-   /* VGP_POPCC; */
-   /* dis=False; */
-   /* 
-   if (VG_(disassemble))
-      VG_(ppUCodeBlock) ( cb, "After Register Allocation:");
-   */
+   VGP_POPCC(VgpRegAlloc);
 
-   /* VGP_PUSHCC(VgpFromUcode); */
-   /* NB final_code is allocated with VG_(jitmalloc), not VG_(malloc)
-      and so must be VG_(jitfree)'d. */
+   /* Do post reg-alloc %e[acd]x liveness analysis (too boring to print
+    * anything;  results can be seen when emitting final code). */
+   VGP_PUSHCC(VgpLiveness);
+   vg_realreg_liveness_analysis ( cb );
+   VGP_POPCC(VgpLiveness);
+
+   /* Emit final code */
+   VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(5);
+
+   VGP_PUSHCC(VgpFromUcode);
    final_code = VG_(emit_code)(cb, &final_code_size );
-   /* VGP_POPCC; */
+   VGP_POPCC(VgpFromUcode);
    VG_(freeCodeBlock)(cb);
 
+#undef DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE
+
    if (debugging_translation) {
       /* Only done for debugging -- throw away final result. */
-      VG_(jitfree)(final_code);
+      VG_(arena_free)(VG_AR_JITTER, final_code);
    } else {
       /* Doing it for real -- return values to caller. */
-      //VG_(printf)("%d %d\n", n_disassembled_bytes, final_code_size);
       *orig_size = n_disassembled_bytes;
       *trans_addr = (Addr)final_code;
       *trans_size = final_code_size;
    }
-   VGP_POPCC;
+   VGP_POPCC(VgpTranslate);
 }
 
 /*--------------------------------------------------------------------*/
diff --git a/vg_transtab.c b/vg_transtab.c
index a6e15b3..09e8fa2 100644
--- a/vg_transtab.c
+++ b/vg_transtab.c
@@ -26,11 +26,10 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 #include "vg_include.h"
-#include "vg_constants.h"
 
 /* #define DEBUG_TRANSTAB */
 
@@ -319,14 +318,13 @@
          vg_tt_used, vg_tc_used / 1000
       );
 
-   /* Reconstruct the SMC detection structures. */
 #  ifdef DEBUG_TRANSTAB
    for (i = 0; i < VG_TT_SIZE; i++)
       vg_assert(vg_tt[i].orig_addr != VG_TTE_DELETED);
 #  endif
    VG_(sanity_check_tc_tt)();
 
-   VGP_POPCC;
+   VGP_POPCC(VgpDoLRU);
 }
 
 
@@ -460,7 +458,7 @@
    if (tte == NULL) {
       /* We didn't find it.  vg_run_innerloop will have to request a
          translation. */
-      VGP_POPCC;
+      VGP_POPCC(VgpSlowFindT);
       return (Addr)0;
    } else {
       /* Found it.  Put the search result into the fast cache now.
@@ -469,7 +467,7 @@
       VG_(tt_fast)[cno] = (Addr)tte;
       VG_(tt_fast_misses)++;
       tte->mru_epoch = VG_(current_epoch);
-      VGP_POPCC;
+      VGP_POPCC(VgpSlowFindT);
       return tte->trans_addr;
    }
 }
@@ -498,8 +496,11 @@
       o_end = o_start + vg_tt[i].orig_size - 1;
       if (o_end < i_start || o_start > i_end)
          continue;
-      if (VG_(clo_cachesim))
-         VG_(cachesim_notify_discard)( & vg_tt[i] );
+
+      if (VG_(needs).basic_block_discards)
+         SK_(discard_basic_block_info)( vg_tt[i].orig_addr, 
+                                         vg_tt[i].orig_size );
+
       vg_tt[i].orig_addr = VG_TTE_DELETED;
       VG_(this_epoch_out_count) ++;
       VG_(this_epoch_out_osize) += vg_tt[i].orig_size;
diff --git a/vg_unsafe.h b/vg_unsafe.h
index 0f72646..0862e0e 100644
--- a/vg_unsafe.h
+++ b/vg_unsafe.h
@@ -27,7 +27,7 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 
diff --git a/vg_valgrinq_dummy.c b/vg_valgrinq_dummy.c
index a0b1441..332085a 100644
--- a/vg_valgrinq_dummy.c
+++ b/vg_valgrinq_dummy.c
@@ -26,11 +26,11 @@
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307, USA.
 
-   The GNU General Public License is contained in the file LICENSE.
+   The GNU General Public License is contained in the file COPYING.
 */
 
 /* For the rationale behind this file, look at
-   VG_(mash_LD_PRELOAD_string) in vg_main.c. */
+   VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH) in vg_main.c. */
 
 /* Remember not to use a variable of this name in any program you want
    to debug :-) */
diff --git a/vg_vtagops.c b/vg_vtagops.c
deleted file mode 100644
index 6e9f41f..0000000
--- a/vg_vtagops.c
+++ /dev/null
@@ -1,95 +0,0 @@
-
-/*--------------------------------------------------------------------*/
-/*--- Supporting routines for v-tag operations.                    ---*/
-/*---                                                 vg_vtagops.c ---*/
-/*--------------------------------------------------------------------*/
-
-/*
-   This file is part of Valgrind, an x86 protected-mode emulator 
-   designed for debugging and profiling binaries on x86-Unixes.
-
-   Copyright (C) 2000-2002 Julian Seward 
-      jseward@acm.org
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307, USA.
-
-   The GNU General Public License is contained in the file LICENSE.
-*/
-
-#include "vg_include.h"
-#include "vg_constants.h"
-
-
-/* ---------------------------------------------------------------------
-   Names of the tag ops.
-   ------------------------------------------------------------------ */
-
-Char* VG_(nameOfTagOp) ( VgTagOp h )
-{
-   switch (h) {
-      case VgT_PCast40:        return "PCast40";
-      case VgT_PCast20:        return "PCast20";
-      case VgT_PCast10:        return "PCast10";
-      case VgT_PCast01:        return "PCast01";
-      case VgT_PCast02:        return "PCast02";
-      case VgT_PCast04:        return "PCast04";
-      case VgT_PCast14:        return "PCast14";
-      case VgT_PCast12:        return "PCast12";
-      case VgT_PCast11:        return "PCast11";
-      case VgT_Left4:          return "Left4";
-      case VgT_Left2:          return "Left2";
-      case VgT_Left1:          return "Left1";
-      case VgT_SWiden14:       return "SWiden14";
-      case VgT_SWiden24:       return "SWiden24";
-      case VgT_SWiden12:       return "SWiden12";
-      case VgT_ZWiden14:       return "ZWiden14";
-      case VgT_ZWiden24:       return "ZWiden24";
-      case VgT_ZWiden12:       return "ZWiden12";
-      case VgT_UifU4:          return "UifU4";
-      case VgT_UifU2:          return "UifU2";
-      case VgT_UifU1:          return "UifU1";
-      case VgT_UifU0:          return "UifU0";
-      case VgT_DifD4:          return "DifD4";
-      case VgT_DifD2:          return "DifD2";
-      case VgT_DifD1:          return "DifD1";
-      case VgT_ImproveAND4_TQ: return "ImproveAND4_TQ";
-      case VgT_ImproveAND2_TQ: return "ImproveAND2_TQ";
-      case VgT_ImproveAND1_TQ: return "ImproveAND1_TQ";
-      case VgT_ImproveOR4_TQ:  return "ImproveOR4_TQ";
-      case VgT_ImproveOR2_TQ:  return "ImproveOR2_TQ";
-      case VgT_ImproveOR1_TQ:  return "ImproveOR1_TQ";
-      case VgT_DebugFn:        return "DebugFn";
-      default: VG_(panic)("vg_nameOfTagOp");
-   }
-}
-
-
-/* ---------------------------------------------------------------------
-   Debugging stuff.
-   ------------------------------------------------------------------ */
-
-/* Implementation for checking tag values. */
-
-UInt VG_(DebugFn) ( UInt a1, UInt a2 )
-{
-   vg_assert(2+2 == 5);
-   return 0;
-}
-
-
-/*--------------------------------------------------------------------*/
-/*--- end                                             vg_vtagops.c ---*/
-/*--------------------------------------------------------------------*/