This jumbo-checkin is the Full Virtualization checkin.  This eliminates
Valgrind's dependency on the dynamic linker for getting started, and
instead takes things into its own hands.

This checkin doesn't add much in the way of new functionality, but it
is the basis for all future work on Valgrind.  It allows us much more
flexibility in implementation, and well as increasing the reliability
of Valgrind by protecting it more from its clients.

This patch requires some changes to tools to update them to the changes
in the tool API, but they are straightforward.  See the posting "Heads
up: Full Virtualization" on valgrind-developers for a more complete
description of this change and its effects on you.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@2118 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/Makefile.am b/Makefile.am
index 86308e6..74e87b4 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,10 +1,9 @@
 
 AUTOMAKE_OPTIONS = 1.6 dist-bzip2
 
-## coregrind must come before memcheck, addrcheck, helgrind, for
-##   vg_replace_malloc.o.
+## include must be first for vg_skin.h
 ## addrcheck must come after memcheck, for mac_*.o
-SUBDIRS = 	coregrind . docs tests include auxprogs \
+SUBDIRS = 	include coregrind . docs tests auxprogs \
 		memcheck \
 		addrcheck \
 		cachegrind \
@@ -50,3 +49,8 @@
 	rm -f $(DESTDIR)$(valdir)/libpthread.so.0
 	$(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0
 
+all-local:
+	mkdir -p .in_place
+	rm -f $(addprefix .in_place/,default.supp $(SUPP_FILES))
+	ln -s $(addprefix ../,default.supp $(SUPP_FILES)) $(top_srcdir)/.in_place
+
diff --git a/addrcheck/Makefile.am b/addrcheck/Makefile.am
index a3731e7..b2e5394 100644
--- a/addrcheck/Makefile.am
+++ b/addrcheck/Makefile.am
@@ -8,8 +8,9 @@
 		@PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
+inplacedir = $(top_srcdir)/.in_place
 
-val_PROGRAMS = vgskin_addrcheck.so
+val_PROGRAMS = vgskin_addrcheck.so vgpreload_addrcheck.so
 
 vgskin_addrcheck_so_SOURCES = ac_main.c
 vgskin_addrcheck_so_LDFLAGS = -shared
@@ -17,6 +18,14 @@
 	../memcheck/mac_leakcheck.o \
 	../memcheck/mac_malloc_wrappers.o \
 	../memcheck/mac_needs.o \
-	../memcheck/mac_replace_strmem.o \
-	../coregrind/vg_replace_malloc.o
+	../memcheck/mac_replace_strmem.o
 
+vgpreload_addrcheck_so_SOURCES = 
+vgpreload_addrcheck_so_LDADD = $(top_srcdir)/coregrind/vg_replace_malloc.o
+vgpreload_addrcheck_so_DEPENDENCIES = $(top_srcdir)/coregrind/vg_replace_malloc.o
+vgpreload_addrcheck_so_LDFLAGS = -shared -Wl,-z,interpose,-z,initfirst
+
+all-local:
+	mkdir -p $(inplacedir)
+	-rm -f $(addprefix $(inplacedir)/,$(val_PROGRAMS))
+	ln -f -s $(addprefix $(top_srcdir)/$(subdir)/,$(val_PROGRAMS)) $(inplacedir)
diff --git a/addrcheck/ac_main.c b/addrcheck/ac_main.c
index e47fd0a..9f13a9d 100644
--- a/addrcheck/ac_main.c
+++ b/addrcheck/ac_main.c
@@ -35,8 +35,6 @@
 //#include "vg_profile.c"
 
 
-VG_DETERMINE_INTERFACE_VERSION
-
 /*------------------------------------------------------------*/
 /*--- Comparing and printing errors                        ---*/
 /*------------------------------------------------------------*/
@@ -191,7 +189,7 @@
       although this isn't important, so the following assert is
       spurious. */
    sk_assert(0 == (sizeof(AcSecMap) % VKI_BYTES_PER_PAGE));
-   map = VG_(get_memory_from_mmap)( sizeof(AcSecMap), caller );
+   map = (AcSecMap *)VG_(shadow_alloc)(sizeof(AcSecMap));
 
    for (i = 0; i < 8192; i++)
       map->abits[i] = VGM_BYTE_INVALID; /* Invalid address */
@@ -1288,6 +1286,7 @@
    VG_(needs_client_requests)     ();
    VG_(needs_syscall_wrapper)     ();
    VG_(needs_sanity_checks)       ();
+   VG_(needs_shadow_memory)       ();
 
    MAC_( new_mem_heap)             = & ac_new_mem_heap;
    MAC_( ban_mem_heap)             = & ac_make_noaccess;
@@ -1295,38 +1294,38 @@
    MAC_( die_mem_heap)             = & ac_make_noaccess;
    MAC_(check_noaccess)            = & ac_check_noaccess;
 
-   VG_(track_new_mem_startup)      ( & ac_new_mem_startup );
-   VG_(track_new_mem_stack_signal) ( & ac_make_accessible );
-   VG_(track_new_mem_brk)          ( & ac_make_accessible );
-   VG_(track_new_mem_mmap)         ( & ac_set_perms );
+   VG_(init_new_mem_startup)      ( & ac_new_mem_startup );
+   VG_(init_new_mem_stack_signal) ( & ac_make_accessible );
+   VG_(init_new_mem_brk)          ( & ac_make_accessible );
+   VG_(init_new_mem_mmap)         ( & ac_set_perms );
    
-   VG_(track_copy_mem_remap)       ( & ac_copy_address_range_state );
-   VG_(track_change_mem_mprotect)  ( & ac_set_perms );
+   VG_(init_copy_mem_remap)       ( & ac_copy_address_range_state );
+   VG_(init_change_mem_mprotect)  ( & ac_set_perms );
       
-   VG_(track_die_mem_stack_signal) ( & ac_make_noaccess ); 
-   VG_(track_die_mem_brk)          ( & ac_make_noaccess );
-   VG_(track_die_mem_munmap)       ( & ac_make_noaccess ); 
+   VG_(init_die_mem_stack_signal) ( & ac_make_noaccess ); 
+   VG_(init_die_mem_brk)          ( & ac_make_noaccess );
+   VG_(init_die_mem_munmap)       ( & ac_make_noaccess ); 
 
-   VG_(track_new_mem_stack_4)      ( & MAC_(new_mem_stack_4)  );
-   VG_(track_new_mem_stack_8)      ( & MAC_(new_mem_stack_8)  );
-   VG_(track_new_mem_stack_12)     ( & MAC_(new_mem_stack_12) );
-   VG_(track_new_mem_stack_16)     ( & MAC_(new_mem_stack_16) );
-   VG_(track_new_mem_stack_32)     ( & MAC_(new_mem_stack_32) );
-   VG_(track_new_mem_stack)        ( & MAC_(new_mem_stack)    );
+   VG_(init_new_mem_stack_4)      ( & MAC_(new_mem_stack_4)  );
+   VG_(init_new_mem_stack_8)      ( & MAC_(new_mem_stack_8)  );
+   VG_(init_new_mem_stack_12)     ( & MAC_(new_mem_stack_12) );
+   VG_(init_new_mem_stack_16)     ( & MAC_(new_mem_stack_16) );
+   VG_(init_new_mem_stack_32)     ( & MAC_(new_mem_stack_32) );
+   VG_(init_new_mem_stack)        ( & MAC_(new_mem_stack)    );
 
-   VG_(track_die_mem_stack_4)      ( & MAC_(die_mem_stack_4)  );
-   VG_(track_die_mem_stack_8)      ( & MAC_(die_mem_stack_8)  );
-   VG_(track_die_mem_stack_12)     ( & MAC_(die_mem_stack_12) );
-   VG_(track_die_mem_stack_16)     ( & MAC_(die_mem_stack_16) );
-   VG_(track_die_mem_stack_32)     ( & MAC_(die_mem_stack_32) );
-   VG_(track_die_mem_stack)        ( & MAC_(die_mem_stack)    );
+   VG_(init_die_mem_stack_4)      ( & MAC_(die_mem_stack_4)  );
+   VG_(init_die_mem_stack_8)      ( & MAC_(die_mem_stack_8)  );
+   VG_(init_die_mem_stack_12)     ( & MAC_(die_mem_stack_12) );
+   VG_(init_die_mem_stack_16)     ( & MAC_(die_mem_stack_16) );
+   VG_(init_die_mem_stack_32)     ( & MAC_(die_mem_stack_32) );
+   VG_(init_die_mem_stack)        ( & MAC_(die_mem_stack)    );
    
-   VG_(track_ban_mem_stack)        ( & ac_make_noaccess );
+   VG_(init_ban_mem_stack)        ( & ac_make_noaccess );
 
-   VG_(track_pre_mem_read)         ( & ac_check_is_readable );
-   VG_(track_pre_mem_read_asciiz)  ( & ac_check_is_readable_asciiz );
-   VG_(track_pre_mem_write)        ( & ac_check_is_writable );
-   VG_(track_post_mem_write)       ( & ac_make_accessible );
+   VG_(init_pre_mem_read)         ( & ac_check_is_readable );
+   VG_(init_pre_mem_read_asciiz)  ( & ac_check_is_readable_asciiz );
+   VG_(init_pre_mem_write)        ( & ac_check_is_writable );
+   VG_(init_post_mem_write)       ( & ac_make_accessible );
 
    VG_(register_compact_helper)((Addr) & ac_helperc_LOAD4);
    VG_(register_compact_helper)((Addr) & ac_helperc_LOAD2);
@@ -1354,6 +1353,9 @@
    MAC_(common_fini)( ac_detect_memory_leaks );
 }
 
+VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 1./8)
+
+
 /*--------------------------------------------------------------------*/
 /*--- end                                                ac_main.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/autogen.sh b/autogen.sh
index 117462c..6856d10 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -11,7 +11,7 @@
     fi
 }
 
-run aclocal
+run aclocal-1.6
 run autoheader
-run automake -a
+run automake-1.6 -a
 run autoconf
diff --git a/cachegrind/Makefile.am b/cachegrind/Makefile.am
index 7786699..aea0610 100644
--- a/cachegrind/Makefile.am
+++ b/cachegrind/Makefile.am
@@ -6,6 +6,7 @@
 		@PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
+inplacedir = $(top_srcdir)/.in_place
 
 bin_SCRIPTS = cg_annotate
 
@@ -19,3 +20,8 @@
 
 vgskin_cachegrind_so_SOURCES = cg_main.c
 vgskin_cachegrind_so_LDFLAGS = -shared
+
+all-local:
+	mkdir -p $(inplacedir)
+	-rm -f $(inplacedir)/$(val_PROGRAMS)
+	ln -f -s $(top_srcdir)/$(subdir)/$(val_PROGRAMS) $(inplacedir)/$(val_PROGRAMS)
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index e15b411..a04f13b 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -33,8 +33,6 @@
 #include "vg_skin.h"
 //#include "vg_profile.c"
 
-VG_DETERMINE_INTERFACE_VERSION
-
 /* For cache simulation */
 typedef struct {
     int size;       /* bytes */ 
@@ -2072,6 +2070,8 @@
    init_BBCC_table();
 }
 
+VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 0)
+
 #if 0
 Bool SK_(cheap_sanity_check)(void) { return True; }
 
diff --git a/configure.in b/configure.in
index 7916706..018ec00 100644
--- a/configure.in
+++ b/configure.in
@@ -350,7 +350,7 @@
    coregrind/Makefile 
    coregrind/demangle/Makefile 
    coregrind/docs/Makefile
-   coregrind/valgrind
+   coregrind/x86/Makefile
    addrcheck/Makefile
    addrcheck/tests/Makefile
    addrcheck/docs/Makefile
diff --git a/corecheck/Makefile.am b/corecheck/Makefile.am
index c9fd968..56b2494 100644
--- a/corecheck/Makefile.am
+++ b/corecheck/Makefile.am
@@ -6,8 +6,14 @@
 		@PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
+inplacedir = $(top_srcdir)/.in_place
 
 val_PROGRAMS = vgskin_corecheck.so
 
 vgskin_corecheck_so_SOURCES = cc_main.c
 vgskin_corecheck_so_LDFLAGS = -shared
+
+all-local:
+	mkdir -p $(inplacedir)
+	-rm -f $(inplacedir)/$(val_PROGRAMS)
+	ln -f -s $(top_srcdir)/$(subdir)/$(val_PROGRAMS) $(inplacedir)/$(val_PROGRAMS)
diff --git a/corecheck/cc_main.c b/corecheck/cc_main.c
index 13634c1..e533c96 100644
--- a/corecheck/cc_main.c
+++ b/corecheck/cc_main.c
@@ -31,8 +31,6 @@
 
 #include "vg_skin.h"
 
-VG_DETERMINE_INTERFACE_VERSION
-
 void SK_(pre_clo_init)(void)
 {
    VG_(details_name)            ("Coregrind");
@@ -47,6 +45,8 @@
    /* No core events to track */
 }
 
+VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 0)
+
 void SK_(post_clo_init)(void)
 {
 }
diff --git a/corecheck/tests/Makefile.am b/corecheck/tests/Makefile.am
index c61486f..c906df4 100644
--- a/corecheck/tests/Makefile.am
+++ b/corecheck/tests/Makefile.am
@@ -35,13 +35,19 @@
 	fdleak_socketpair sigkill res_search \
 	pth_atfork1 pth_cancel2 pth_cvsimple pth_empty \
 	pth_exit pth_mutexspeed pth_once \
+	as_mmap as_shm \
 	vgprintf
 
 AM_CFLAGS   = $(WERROR) -Winline -Wall -Wshadow -g -O0 -I$(top_srcdir)/include
 AM_CXXFLAGS = $(AM_CFLAGS)
 
-# C ones
-erringfds_SOURCES 	= erringfds.c
+vgprintf_SOURCES	= vgprintf.c
+
+# Client address space checks
+as_mmap_SOURCES		= as_mmap.c
+as_shm_SOURCES		= as_shm.c
+
+# Leak tests
 fdleak_cmsg_SOURCES	= fdleak_cmsg.c
 fdleak_creat_SOURCES	= fdleak_creat.c
 fdleak_dup_SOURCES	= fdleak_dup.c
@@ -52,7 +58,6 @@
 fdleak_pipe_SOURCES	= fdleak_pipe.c
 fdleak_socketpair_SOURCES = fdleak_socketpair.c
 sigkill_SOURCES		= sigkill.c
-vgprintf_SOURCES	= vgprintf.c
 
 # Pthread ones
 pth_atfork1_SOURCES	= pth_atfork1.c
diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am
index 4f5a6fc..4105775 100644
--- a/coregrind/Makefile.am
+++ b/coregrind/Makefile.am
@@ -1,40 +1,49 @@
 
-SUBDIRS = demangle . docs
+SUBDIRS = demangle . docs x86
 
-add_includes = -I$(srcdir)/demangle -I$(top_srcdir)/include
-
-AM_CPPFLAGS = $(add_includes) -DVG_LIBDIR="\"$(libdir)"\"
-AM_CFLAGS = $(WERROR) -Winline -Wall -Wshadow -O -fno-omit-frame-pointer \
-		@PREFERRED_STACK_BOUNDARY@ -g -fpic
-AM_CCASFLAGS = $(add_includes) -I..
+add_includes = -I$(srcdir)/demangle -I$(top_srcdir)/include -I$(srcdir)/x86
 
 valdir = $(libdir)/valgrind
+inplacedir = $(top_srcdir)/.in_place
 
-bin_SCRIPTS = valgrind
+AM_CPPFLAGS = $(add_includes) -DVG_LIBDIR="\"$(valdir)"\"
+AM_CFLAGS = $(WERROR) -Winline -Wall -Wshadow -O -fno-omit-frame-pointer \
+		@PREFERRED_STACK_BOUNDARY@ -g -DELFSZ=32
+AM_CCASFLAGS = $(add_includes) -I..
 
 default.supp: $(SUPP_FILES)
 
+bin_PROGRAMS = \
+	valgrind
+
 val_PROGRAMS = \
-	valgrind.so \
-	valgrinq.so \
-	libpthread.so
+	valgrind stage2 \
+	libpthread.so \
+	vg_inject.so
 
-EXTRA_DIST = vg_libpthread.vs valgrind.vs
+EXTRA_DIST = \
+	vg_libpthread.vs valgrind.vs \
+	gen_toolint.pl toolfuncs.def \
+	vg_replace_malloc.c
 
-libpthread_so_SOURCES = \
-	vg_libpthread.c \
-	vg_libpthread_unimp.c
-libpthread_so_DEPENDENCIES = $(srcdir)/vg_libpthread.vs
-libpthread_so_LDFLAGS	   = -Werror -fno-omit-frame-pointer -UVG_LIBDIR \
-	-shared -fpic \
-	-Wl,-version-script $(srcdir)/vg_libpthread.vs \
-	-Wl,-z,nodelete,-z,initfirst \
-	-Wl,--soname=libpthread.so.0
+BUILT_SOURCES = vg_toolint.c vg_toolint.h
+CLEANFILES = vg_toolint.c vg_toolint.h
 
-valgrinq_so_SOURCES = vg_valgrinq_dummy.c
-valgrinq_so_LDFLAGS = -shared
+valgrind_SOURCES = \
+	stage1.c \
+	ume.c \
+	x86/ume_entry.S \
+	x86/ume_go.c 
+valgrind_DEPENDENCIES =
+valgrind_LDFLAGS=-static -g -Wl,-e,_ume_entry
+valgrind_LDADD=
 
-valgrind_so_SOURCES = \
+stage2_SOURCES = \
+	stage2.c \
+	ume.c \
+	x86/ume_entry.S \
+	x86/ume_go.c \
+	\
 	vg_scheduler.c \
 	vg_default.c \
 	vg_demangle.c \
@@ -45,7 +54,6 @@
 	vg_hashtable.c \
 	vg_helpers.S \
 	vg_instrument.c \
-	vg_intercept.c \
 	vg_main.c \
 	vg_malloc2.c \
 	vg_memory.c \
@@ -60,47 +68,83 @@
 	vg_symtab2.c \
 	vg_dwarf.c \
 	vg_stabs.c \
+	vg_skiplist.c \
 	vg_symtypes.c \
 	vg_syscalls.c \
 	vg_syscall.S \
 	vg_to_ucode.c \
+	vg_toolint.c \
 	vg_translate.c \
 	vg_transtab.c \
 	vg_ldt.c
-valgrind_so_DEPENDENCIES = $(srcdir)/valgrind.vs
-valgrind_so_LDFLAGS = \
-	-Wl,-z,initfirst -shared \
+stage2_DEPENDENCIES = $(srcdir)/valgrind.vs $(srcdir)/x86/stage2.lds
+stage2_LDFLAGS=-Wl,--export-dynamic -Wl,-e,_ume_entry  -g \
+	-Wl,-defsym,kickstart_base=0xb8000000 \
+	-Wl,-T,$(srcdir)/x86/stage2.lds \
 	-Wl,-version-script $(srcdir)/valgrind.vs 
-
-valgrind_so_LDADD = \
+stage2_LDADD= \
 	demangle/cp-demangle.o \
 	demangle/cplus-dem.o \
 	demangle/dyn-string.o \
-	demangle/safe-ctype.o
+	demangle/safe-ctype.o \
+	-ldl
 
-## Build a .a library, but we don't actually use it;  just a ploy to ensure
-## vg_replace_malloc.o is built.
-noinst_LIBRARIES = lib_replace_malloc.a
+vg_toolint.c: $(srcdir)/gen_toolint.pl $(srcdir)/toolfuncs.def $(srcdir)/Makefile
+	rm -f $@
+	$(PERL) $(srcdir)/gen_toolint.pl callwrap     < $(srcdir)/toolfuncs.def >  $@ || rm -f $@
+	$(PERL) $(srcdir)/gen_toolint.pl missingfuncs < $(srcdir)/toolfuncs.def >> $@ || rm -f $@
+	$(PERL) $(srcdir)/gen_toolint.pl initfunc     < $(srcdir)/toolfuncs.def >> $@ || rm -f $@
+	$(PERL) $(srcdir)/gen_toolint.pl initdlsym    < $(srcdir)/toolfuncs.def >> $@ || rm -f $@
+	$(PERL) $(srcdir)/gen_toolint.pl structdef    < $(srcdir)/toolfuncs.def >> $@ || rm -f $@
 
-lib_replace_malloc_a_SOURCES = vg_replace_malloc.c
+vg_toolint.h:  $(srcdir)/gen_toolint.pl $(srcdir)/toolfuncs.def $(srcdir)/Makefile
+	rm -f $@
+	$(PERL) $(srcdir)/gen_toolint.pl proto  < $(srcdir)/toolfuncs.def >  $@ || rm -f $@
+	$(PERL) $(srcdir)/gen_toolint.pl struct < $(srcdir)/toolfuncs.def >> $@ || rm -f $@
+
+libpthread_so_SOURCES = \
+	vg_libpthread.c \
+	vg_libpthread_unimp.c \
+	vg_syscall.S
+libpthread_so_DEPENDENCIES = $(srcdir)/vg_libpthread.vs
+libpthread_so_LDFLAGS	   = -Werror -fno-omit-frame-pointer -UVG_LIBDIR \
+	-shared -fpic \
+	-Wl,-version-script $(srcdir)/vg_libpthread.vs \
+	-Wl,-z,nodelete \
+	-Wl,--soname=libpthread.so.0
+
+vg_inject_so_SOURCES = \
+	vg_intercept.c 
+
+# Not really true, but we need to build vg_replace_malloc.o somehow
+vg_inject_so_DEPENDENCIES = \
+	vg_replace_malloc.o
+
+vg_inject_so_LDFLAGS = \
+	-shared \
+	-Wl,--soname,vg_inject.so \
+	-Wl,-z,initfirst
 
 noinst_HEADERS = \
+	ume.h			\
+	ume_arch.h		\
         vg_include.h            \
         vg_constants.h          \
 	vg_symtab2.h		\
         vg_unistd.h             \
 	vg_symtypes.h		\
+	vg_toolint.h		\
 	vg_unsafe.h
 
-MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) .in_place/libpthread.so.0
+MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) $(inplacedir)/libpthread.so.0
 
-vg_intercept.o vg_libpthread.o vg_replace_malloc.o: CFLAGS += -fno-omit-frame-pointer
+vg_replace_malloc.o vg_intercept.o vg_libpthread.o: CFLAGS += -fno-omit-frame-pointer -g -fpic
 
 all-local:
-	mkdir -p .in_place
-	-rm -f .in_place/libpthread.so.0
-	-rm -f .in_place/valgrind.so
-	-rm -f .in_place/valgrinq.so
-	ln -f -s ../libpthread.so .in_place/libpthread.so.0
-	ln -f -s ../valgrind.so .in_place/valgrind.so
-	ln -f -s ../valgrinq.so .in_place/valgrinq.so
+	mkdir -p $(inplacedir)
+	for i in $(val_PROGRAMS); do \
+		to=$(inplacedir)/$$(echo $$i | sed 's,libpthread.so,libpthread.so.0,'); \
+		rm -f $$$to; \
+		ln -sf $(top_srcdir)/$(subdir)/$$i $$to; \
+	done
+
diff --git a/coregrind/arch/x86-linux/vg_libpthread.c b/coregrind/arch/x86-linux/vg_libpthread.c
index 8528496..c13063f 100644
--- a/coregrind/arch/x86-linux/vg_libpthread.c
+++ b/coregrind/arch/x86-linux/vg_libpthread.c
@@ -68,6 +68,7 @@
 #include <stdio.h>
 #include <errno.h>
 
+#include <stdlib.h>
 
 # define strong_alias(name, aliasname) \
   extern __typeof (name) aliasname __attribute__ ((alias (#name)));
@@ -128,40 +129,19 @@
    return res;
 }
 
-static
-void my_exit ( int arg )
-{
-   VG_(do_syscall)(__NR_exit, arg);
-   /*NOTREACHED*/
-}
-
-/* Apparently unused. 
-static
-void my_write ( int fd, const void *buf, int count )
-{
-   VG_(do_syscall)(__NR_write, fd, (int)buf, count );
-}
-*/
-
-/* We need this guy -- it's in valgrind.so. */
-extern void VG_(startup) ( void );
-
-
-/* Just start up Valgrind if it's not already going.  VG_(startup)()
-   detects and ignores second and subsequent calls. */
+/* Don't do anything if we're not under Valgrind */
 static __inline__
 void ensure_valgrind ( char* caller )
 {
-   VG_(startup)();
+   if (!RUNNING_ON_VALGRIND) {
+      const char msg[] = "Warning: this libpthread.so should only be run with Valgrind\n";
+      VG_(do_syscall)(__NR_write, 2, msg, sizeof(msg)-1);
+      VG_(do_syscall)(__NR_exit, 1);
+   }
 }
 
 /* While we're at it ... hook our own startup function into this
    game. */
-__asm__ (
-   ".section .init\n"
-   "\tcall vgPlain_startup"
-);
-
 
 static
 __attribute__((noreturn))
@@ -173,8 +153,8 @@
    strcat(buf, "\nPlease report this bug at: ");
    strcat(buf, VG_BUGS_TO);
    strcat(buf, "\n\n");
-   VALGRIND_NON_SIMD_CALL2(VG_(message), Vg_UserMsg, buf);
-   my_exit(1);
+   VALGRIND_INTERNAL_PRINTF(buf);
+   _exit(1);
    /* We have to persuade gcc into believing this doesn't return. */
    while (1) { };
 }
@@ -186,7 +166,7 @@
    if (get_pt_trace_level() >= 0) {
       snprintf(buf, sizeof(buf), "%s%s%s", s1, s2, s3);
       buf[sizeof(buf)-1] = '\0';
-      VALGRIND_NON_SIMD_CALL2(VG_(message), Vg_UserMsg, buf);
+      VALGRIND_INTERNAL_PRINTF(buf);
    }
 }
 
@@ -223,13 +203,14 @@
    char buf[1000];
    static Bool entered = False;
    if (entered) 
-      my_exit(2);
+      _exit(2);
    entered = True;
    sprintf(buf, "\n%s: %s:%d (%s): Assertion `%s' failed.\n",
                 "valgrind", file, line, fn, expr );
    cat_n_send ( "", buf, "" );
    sprintf(buf, "Please report this bug at: %s\n\n", VG_BUGS_TO);
-   my_exit(1);
+   cat_n_send ( "", buf, "" );
+   _exit(1);
 }
 
 #define MY__STRING(__str)  #__str
@@ -243,10 +224,14 @@
 static
 void my_free ( void* ptr )
 {
+#if 0
    int res;
    VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
                            VG_USERREQ__FREE, ptr, 0, 0, 0);
    my_assert(res == 0);
+#else
+   free(ptr);
+#endif
 }
 
 
@@ -254,8 +239,12 @@
 void* my_malloc ( int nbytes )
 {
    void* res;
+#if 0
    VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
                            VG_USERREQ__MALLOC, nbytes, 0, 0, 0);
+#else
+   res = malloc(nbytes);
+#endif
    my_assert(res != (void*)0);
    return res;
 }
@@ -1033,7 +1022,7 @@
 }
 
 int pthread_cond_init( pthread_cond_t *cond,
-                       const pthread_condattr_t *cond_attr)
+		       const pthread_condattr_t *cond_attr)
 {
    cond->__c_waiting = (_pthread_descr)VG_INVALID_THREADID;
    return 0;
@@ -1278,14 +1267,12 @@
 int sigwait ( const sigset_t* set, int* sig )
 {
    int res;
-   vki_ksiginfo_t si;
+   siginfo_t si;
    
    __my_pthread_testcancel();
 
-   /* As with pthread_sigmask we deliberately confuse sigset_t with
-      vki_ksigset_t. */
    si.si_signo = 0;
-   res = VG_(ksigtimedwait)((const vki_ksigset_t *)set, &si, NULL);
+   res = sigtimedwait(set, &si, NULL);
    *sig = si.si_signo;
 
    return 0;			/* always returns 0 */
@@ -1642,7 +1629,9 @@
 int* __errno_location ( void )
 {
    int tid;
-   /* ensure_valgrind("__errno_location"); */
+   int *ret;
+
+   ensure_valgrind("__errno_location");
    VALGRIND_MAGIC_SEQUENCE(tid, 1 /* default */,
                            VG_USERREQ__PTHREAD_GET_THREADID,
                            0, 0, 0, 0);
@@ -1650,8 +1639,11 @@
    if (tid < 1 || tid >= VG_N_THREADS)
       barf("__errno_location: invalid ThreadId");
    if (tid == 1)
-      return &errno;
-   return & thread_specific_errno[tid];
+      ret = &errno;
+   else
+      ret = &thread_specific_errno[tid];
+
+   return ret;
 }
 
 #undef h_errno
@@ -2805,17 +2797,35 @@
    ------------------------------------------------------------------ */
 int __libc_current_sigrtmin (void)
 {
-   return VG_(sig_rtmin);
+   int res;
+
+   VALGRIND_MAGIC_SEQUENCE(res, 0, 
+			   VG_USERREQ__GET_SIGRT_MIN,
+			   0, 0, 0, 0);
+
+   return res;
 }
 
 int __libc_current_sigrtmax (void)
 {
-   return VG_(sig_rtmax);
+   int res;
+
+   VALGRIND_MAGIC_SEQUENCE(res, 0, 
+			   VG_USERREQ__GET_SIGRT_MAX,
+			   0, 0, 0, 0);
+
+   return res;
 }
 
 int __libc_allocate_rtsig (int high)
 {
-   return VG_(sig_alloc_rtsig)(high);
+   int res;
+
+   VALGRIND_MAGIC_SEQUENCE(res, 0, 
+			   VG_USERREQ__ALLOC_RTSIG,
+			   high, 0, 0, 0);
+
+   return res;
 }
 
 /* ---------------------------------------------------------------------
diff --git a/coregrind/valgrind.vs b/coregrind/valgrind.vs
index 190a75e..bffd77c 100644
--- a/coregrind/valgrind.vs
+++ b/coregrind/valgrind.vs
@@ -1,9 +1,10 @@
-VALGRIND_2.0 {
+{
 	global:
 		vgPlain_*;
 		vgSkin_*;
 		vgProf_*;
                 vgOff_*;
+
 	local:
 		*;		# default to hidden
 };
diff --git a/coregrind/vg_constants.h b/coregrind/vg_constants.h
index 43f7044..7416f9d 100644
--- a/coregrind/vg_constants.h
+++ b/coregrind/vg_constants.h
@@ -65,6 +65,9 @@
 /* Offset of code in a TCEntry */
 #define VG_CODE_OFFSET		(8 + VG_MAX_JUMPS * 2)
 
+/* Client address space segment limit descriptor entry */
+#define VG_POINTERCHECK_SEGIDX	1
+
 /* Debugging hack for assembly code ... sigh. */
 #if 0
 #define OYNK(nnn) pushal;  pushl $nnn; call VG_(oynk) ; addl $4,%esp; popal
@@ -90,6 +93,29 @@
 /* Assembly code stubs make this request */
 #define VG_USERREQ__SIGNAL_RETURNS          0x4001
 
+/* Various environment variables we pay attention to */
+
+/* The directory we look for all our auxillary files in */
+#define VALGRINDLIB	"VALGRINDLIB"
+
+/* Additional command-line arguments; they are overridden by actual
+   command-line option.  Each argument is separated by spaces.  There
+   is no quoting mechanism.
+ */
+#define VALGRINDOPTS	"VALGRIND_OPTS"
+
+/* If this variable is present in the environment, then valgrind will
+   not parse the command line for options at all; all options come
+   from this variable.  Arguments are terminated by ^A (\001).  There
+   is no quoting mechanism.
+
+   This variable is not expected to be set by anything other than
+   Valgrind itself, as part of its handling of execve with
+   --trace-children=yes.  This variable should not be present in the
+   client environment.
+ */
+#define VALGRINDCLO	"_VALGRIND_CLO"
+
 #endif /* ndef __VG_CONSTANTS_H */
 
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_default.c b/coregrind/vg_default.c
index b15aef7..30807d6 100644
--- a/coregrind/vg_default.c
+++ b/coregrind/vg_default.c
@@ -44,8 +44,8 @@
 /* If the tool fails to define one or more of the required functions,
  * make it very clear what went wrong! */
 
-static __attribute__ ((noreturn))
-void fund_panic ( const Char* fn )
+__attribute__ ((noreturn))
+void VG_(missing_tool_func) ( const Char* fn )
 {
    VG_(printf)(
       "\nTool error:\n"
@@ -56,17 +56,6 @@
 }
 
 static __attribute__ ((noreturn))
-void non_fund_panic ( const Char* fn )
-{
-   VG_(printf)(
-      "\nTool error:\n"
-      "  The tool you have selected is missing the function `%s'\n"
-      "  required by one of its needs.\n\n",
-      fn);
-   VG_(skin_panic)("Missing tool function");
-}
-
-static __attribute__ ((noreturn))
 void malloc_panic ( const Char* fn )
 {
    VG_(printf)(
@@ -77,20 +66,6 @@
    VG_(skin_panic)("Missing tool function");
 }
 
-#define FUND(proto)                       \
-__attribute__((weak))                     \
-proto                                     \
-{                                         \
-   fund_panic(__PRETTY_FUNCTION__);       \
-}
-
-#define NON_FUND(proto)                   \
-__attribute__((weak))                     \
-proto                                     \
-{                                         \
-   non_fund_panic(__PRETTY_FUNCTION__);   \
-}
-
 #define MALLOC(proto)                     \
 __attribute__((weak))                     \
 proto                                     \
@@ -102,57 +77,6 @@
    Default functions
    ------------------------------------------------------------------ */
 
-/* Fundamental template functions */
-FUND( void        SK_(pre_clo_init) (void) );
-FUND( void        SK_(post_clo_init)(void) );
-FUND( UCodeBlock* SK_(instrument)   (UCodeBlock* cb, Addr not_used) );
-FUND( void        SK_(fini)         (Int exitcode) );
-
-/* For error reporting and suppression handling */
-NON_FUND( Bool  SK_(eq_SkinError)(VgRes res, Error* e1, Error* e2) );
-NON_FUND( void  SK_(pp_SkinError)(Error* err) );
-NON_FUND( UInt  SK_(update_extra)(Error* err) );
-NON_FUND( Bool  SK_(recognised_suppression)(Char* name, Supp* su) );
-NON_FUND( Bool  SK_(read_extra_suppression_info)(Int fd, Char* buf, Int nBuf,
-                                                 Supp* su) );
-NON_FUND( Bool  SK_(error_matches_suppression)(Error* err, Supp* su) );
-NON_FUND( Char* SK_(get_error_name)(Error* err) );
-NON_FUND( void  SK_(print_extra_suppression_info)(Error* err) );
-
-/* For throwing out basic block level info when code is invalidated */
-NON_FUND( void SK_(discard_basic_block_info)(Addr a, UInt size) );
-
-/* For throwing out basic block level info when code is invalidated */
-NON_FUND( void SK_(written_shadow_regs_values)(UInt* gen_reg, UInt* eflags) );
-
-/* Command line arg handling functions */
-NON_FUND( Bool SK_(process_cmd_line_option)(Char* argv) );
-NON_FUND( void SK_(print_usage)(void) );
-NON_FUND( void SK_(print_debug_usage)(void) );
-
-/* Client request template function */
-NON_FUND( Bool SK_(handle_client_request)(ThreadId tid, UInt* arg_block,
-                                          UInt *ret) );
-
-/* UCode extension */
-NON_FUND( void  SK_(emit_XUInstr)  (UInstr* u, RRegSet regs_live_before) );
-NON_FUND( Bool  SK_(sane_XUInstr)  (Bool beforeRA, Bool beforeLiveness, 
-                                    UInstr* u) );
-NON_FUND( Char* SK_(name_XUOpcode) (Opcode opc) );
-NON_FUND( void  SK_(pp_XUInstr)    (UInstr* u) );
-NON_FUND( Int   SK_(get_Xreg_usage)(UInstr* u, Tag tag, Int* regs,
-                                    Bool* isWrites) );               
-
-/* Syscall wrapping */
-NON_FUND( void* SK_(pre_syscall) (ThreadId tid, UInt syscallno,
-                                  Bool is_blocking) );
-NON_FUND( void  SK_(post_syscall)(ThreadId tid, UInt syscallno,
-                                 void* pre_result, Int res, Bool is_blocking) );
-
-/* Sanity checks */
-NON_FUND( Bool SK_(cheap_sanity_check)(void) );
-NON_FUND( Bool SK_(expensive_sanity_check)(void) );
-
 /*------------------------------------------------------------*/
 /*--- Replacing malloc et al                               ---*/
 /*------------------------------------------------------------*/
@@ -188,15 +112,6 @@
       malloc_panic(__PRETTY_FUNCTION__);
 }
 
-MALLOC( void* SK_(__builtin_new)    ( Int size ) );
-MALLOC( void* SK_(__builtin_vec_new)( Int size ) );
-MALLOC( void* SK_(memalign)         ( Int align, Int size ) );
-MALLOC( void* SK_(calloc)           ( Int nmemb, Int size ) );
-
-MALLOC( void  SK_(__builtin_delete)     ( void* p ) );
-MALLOC( void  SK_(__builtin_vec_delete) ( void* p ) );
-MALLOC( void* SK_(realloc)              ( void* p, Int new_size ) );
-
 /*--------------------------------------------------------------------*/
 /*--- end                                            vg_defaults.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_dispatch.S b/coregrind/vg_dispatch.S
index fe6a7bc..f8bfaf9 100644
--- a/coregrind/vg_dispatch.S
+++ b/coregrind/vg_dispatch.S
@@ -76,6 +76,16 @@
 	pushl	%edi
 	pushl	%ebp
 
+	/* check to see if we're doing pointer checking */
+	movl	VG_(clo_pointercheck), %eax
+	testl	%eax,%eax
+	jz	1f
+	
+	pushl	%fs						/* save %fs     */
+	mov	$(VG_POINTERCHECK_SEGIDX << 3) + 7, %eax	/* load new %fs */
+	movw	%ax,%fs
+
+1:	
 	/* Set up the baseBlock pointer */
 	movl	$VG_(baseBlock), %ebp
 
@@ -137,7 +147,14 @@
 	jmp	run_innerloop_exit
 	
 run_innerloop_exit:
-	popl	%ebp
+	movl	VG_(clo_pointercheck), %ebx
+	testl	%ebx,%ebx
+	jz	1f
+
+	/* restore %fs */
+	popl	%fs
+	
+1:	popl	%ebp
 	popl	%edi
 	popl	%esi
 	popl	%edx
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c
index e5406c9..5153b4b 100644
--- a/coregrind/vg_from_ucode.c
+++ b/coregrind/vg_from_ucode.c
@@ -215,6 +215,19 @@
    VG_(emitB) ( (l >> 24) & 0x000000FF );
 }
 
+/* This bit is ORd onto the size to indicate that it's a client
+   pointer which needs bounds checking. */
+#define DO_BOUNDSCHECK	(1<<8)
+
+/* If the user asks for it, generate bounds checks on application
+   pointer dereferences, in the form of a segment override. */
+static __inline__ void boundscheck()
+{
+   if (VG_(clo_pointercheck))
+      VG_(emitB)(0x64);		/* %fs prefix - see vg_dispatch.S */
+}
+
+
 static void emit_get_eflags ( void )
 {
    Int off = 4 * VGOFF_(m_eflags);
@@ -771,6 +784,12 @@
 void VG_(emit_movv_reg_offregmem) ( Int sz, Int reg, Int off, Int areg )
 {
    VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
+
+   if (sz & DO_BOUNDSCHECK) {
+      boundscheck();
+      sz &= ~DO_BOUNDSCHECK;
+   }
+
    if (sz == 2) VG_(emitB) ( 0x66 );
    VG_(emitB) ( 0x89 ); /* MOV Gv, Ev */
    VG_(emit_amode_offregmem_reg) ( off, areg, reg );
@@ -782,6 +801,12 @@
 static void emit_movv_regmem_reg ( Int sz, Int reg1, Int reg2 )
 {
    VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
+
+   if (sz & DO_BOUNDSCHECK) {
+      boundscheck();
+      sz &= ~DO_BOUNDSCHECK;
+   }
+
    if (sz == 2) VG_(emitB) ( 0x66 );
    VG_(emitB) ( 0x8B ); /* MOV Ev, Gv */
    emit_amode_regmem_reg ( reg1, reg2 );
@@ -793,6 +818,12 @@
 static void emit_movv_reg_regmem ( Int sz, Int reg1, Int reg2 )
 {
    VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
+
+   if (sz & DO_BOUNDSCHECK) {
+      boundscheck();
+      sz &= ~DO_BOUNDSCHECK;
+   }
+
    if (sz == 2) VG_(emitB) ( 0x66 );
    VG_(emitB) ( 0x89 ); /* MOV Gv, Ev */
    emit_amode_regmem_reg ( reg2, reg1 );
@@ -1205,9 +1236,13 @@
                    nameIReg(1,reg1), nameIReg(1,reg2));
 }
 
-static void emit_movb_reg_regmem ( Int reg1, Int reg2 )
+static void emit_movb_reg_regmem ( Bool bounds, Int reg1, Int reg2 )
 {
    VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
+
+   if (bounds)
+      boundscheck();
+
    VG_(emitB) ( 0x88 ); /* MOV G1, E1 */
    emit_amode_regmem_reg ( reg2, reg1 );
    if (dis)
@@ -1290,9 +1325,11 @@
 /*--- zero-extended load emitters                  ---*/
 /*----------------------------------------------------*/
 
-void VG_(emit_movzbl_offregmem_reg) ( Int off, Int regmem, Int reg )
+void VG_(emit_movzbl_offregmem_reg) ( Bool bounds, Int off, Int regmem, Int reg )
 {
    VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
+   if (bounds)
+      boundscheck();
    VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB6 ); /* MOVZBL */
    VG_(emit_amode_offregmem_reg) ( off, regmem, reg );
    if (dis)
@@ -1300,9 +1337,13 @@
                    off, nameIReg(4,regmem), nameIReg(4,reg));
 }
 
-static void emit_movzbl_regmem_reg ( Int reg1, Int reg2 )
+static void emit_movzbl_regmem_reg ( Bool bounds, Int reg1, Int reg2 )
 {
    VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
+
+   if (bounds)
+      boundscheck();
+
    VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB6 ); /* MOVZBL */
    emit_amode_regmem_reg ( reg1, reg2 );
    if (dis)
@@ -1310,9 +1351,13 @@
                                                nameIReg(4,reg2));
 }
 
-void VG_(emit_movzwl_offregmem_reg) ( Int off, Int areg, Int reg )
+void VG_(emit_movzwl_offregmem_reg) ( Bool bounds, Int off, Int areg, Int reg )
 {
    VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
+
+   if (bounds)
+      boundscheck();
+
    VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB7 ); /* MOVZWL */
    VG_(emit_amode_offregmem_reg) ( off, areg, reg );
    if (dis)
@@ -1320,9 +1365,13 @@
                    off, nameIReg(4,areg), nameIReg(4,reg));
 }
 
-void VG_( emit_movzwl_regmem_reg ) ( Int reg1, Int reg2 )
+void VG_( emit_movzwl_regmem_reg ) ( Bool bounds, Int reg1, Int reg2 )
 {
    VG_(new_emit)(False, FlagsEmpty, FlagsEmpty);
+
+   if (bounds)
+      boundscheck();
+
    VG_(emitB) ( 0x0F ); VG_(emitB) ( 0xB7 ); /* MOVZWL */
    emit_amode_regmem_reg ( reg1, reg2 );
    if (dis)
@@ -1394,6 +1443,9 @@
                               Int reg )
 {
    VG_(new_emit)(True, uses_sflags, sets_sflags);
+
+   boundscheck();		/* assume all FPU ops are the client's */
+
    VG_(emitB) ( first_byte );
    emit_amode_regmem_reg ( reg, second_byte_masked >> 3 );
    if (dis)
@@ -1409,6 +1461,9 @@
                                Int ireg )
 {
    VG_(new_emit)(True, uses_sflags, sets_sflags);
+
+   boundscheck();
+
    VG_(emitB) ( 0x0F );
    VG_(emitB) ( first_byte );
    second_byte &= 0x38; /* mask out mod and rm fields */
@@ -1427,6 +1482,9 @@
                          Int ireg )
 {
    VG_(new_emit)(True, uses_sflags, sets_sflags);
+
+   boundscheck();
+
    VG_(emitB) ( first_byte );
    VG_(emitB) ( second_byte );
    third_byte &= 0x38; /* mask out mod and rm fields */
@@ -1446,6 +1504,9 @@
                           Int ireg )
 {
    VG_(new_emit)(True, uses_sflags, sets_sflags);
+
+   boundscheck();
+
    VG_(emitB) ( first_byte );
    VG_(emitB) ( second_byte );
    third_byte &= 0x38; /* mask out mod and rm fields */
@@ -1467,6 +1528,9 @@
                          Int ireg )
 {
    VG_(new_emit)(True, uses_sflags, sets_sflags);
+
+   boundscheck();
+
    VG_(emitB) ( first_byte );
    VG_(emitB) ( second_byte );
    VG_(emitB) ( third_byte );
@@ -1593,6 +1657,9 @@
                           Int ireg )
 {
    VG_(new_emit)(True, uses_sflags, sets_sflags);
+
+   boundscheck();
+
    VG_(emitB) ( first_byte );
    VG_(emitB) ( second_byte );
    VG_(emitB) ( third_byte );
@@ -2067,13 +2134,13 @@
 	 dispatch loop.  We still need to keep it the same size as the
 	 call sequence. */
       VG_(emitB) ( 0xC3 );	/* ret */
-      VG_(emitB) ( 0x90 );	/* nop */
-      VG_(emitB) ( 0x90 );	/* nop */
-      VG_(emitB) ( 0x90 );	/* nop */
-      VG_(emitB) ( 0x90 );	/* nop */
+      VG_(emitB) ( 0x8d );	/* 4 byte nop (lea    0x0(%esi,1),%esi) */
+      VG_(emitB) ( 0x74 );
+      VG_(emitB) ( 0x26 );
+      VG_(emitB) ( 0x00 );
 
       if (dis)
-	 VG_(printf)("\n\t\tret; nop; nop; nop; nop\n");
+	 VG_(printf)("\n\t\tret; nop4\n");
 
       if (0 && VG_(clo_verbosity))
 	 VG_(message)(Vg_DebugMsg, "too many chained jumps in basic-block");
@@ -2087,7 +2154,7 @@
       VG_(emitB) ( 0x90 );		/* NOP */
 
       if (dis)
-	 VG_(printf)("\n\t\tud2; ud2; nop\n");
+	 VG_(printf)("\n\t\tud2; ud2; nop /* call VG_(patchme) */\n");
    }
 }   
 
@@ -2670,10 +2737,10 @@
 
 static void synth_mov_regmem_reg ( Int size, Int reg1, Int reg2 ) 
 {
-   switch (size) {
-      case 4: emit_movv_regmem_reg ( 4, reg1, reg2 ); break;
-      case 2: VG_(emit_movzwl_regmem_reg) ( reg1, reg2 ); break;
-      case 1: emit_movzbl_regmem_reg ( reg1, reg2 ); break;
+   switch (size & ~DO_BOUNDSCHECK) {
+      case 4: emit_movv_regmem_reg ( size, reg1, reg2 ); break;
+      case 2: VG_(emit_movzwl_regmem_reg) ( size & DO_BOUNDSCHECK, reg1, reg2 ); break;
+      case 1: emit_movzbl_regmem_reg ( size & DO_BOUNDSCHECK, reg1, reg2 ); break;
       default: VG_(core_panic)("synth_mov_regmem_reg");
    }  
 }
@@ -2681,10 +2748,10 @@
 
 static void synth_mov_offregmem_reg ( Int size, Int off, Int areg, Int reg ) 
 {
-   switch (size) {
+   switch (size & ~DO_BOUNDSCHECK) {
       case 4: VG_(emit_movv_offregmem_reg) ( 4, off, areg, reg ); break;
-      case 2: VG_(emit_movzwl_offregmem_reg) ( off, areg, reg ); break;
-      case 1: VG_(emit_movzbl_offregmem_reg) ( off, areg, reg ); break;
+      case 2: VG_(emit_movzwl_offregmem_reg) ( size & DO_BOUNDSCHECK, off, areg, reg ); break;
+      case 1: VG_(emit_movzbl_offregmem_reg) ( size & DO_BOUNDSCHECK, off, areg, reg ); break;
       default: VG_(core_panic)("synth_mov_offregmem_reg");
    }  
 }
@@ -2713,17 +2780,17 @@
 static void synth_mov_reg_memreg ( Int size, Int reg1, Int reg2 )
 {
    Int s1;
-   switch (size) {
-      case 4: emit_movv_reg_regmem ( 4, reg1, reg2 ); break;
-      case 2: emit_movv_reg_regmem ( 2, reg1, reg2 ); break;
+   switch (size & ~DO_BOUNDSCHECK) {
+      case 4: 
+      case 2: emit_movv_reg_regmem ( size, reg1, reg2 ); break;
       case 1: if (reg1 < 4) {
-                 emit_movb_reg_regmem ( reg1, reg2 ); 
+                 emit_movb_reg_regmem ( size & DO_BOUNDSCHECK, reg1, reg2 ); 
               }
               else {
                  /* Choose a swap reg which is < 4 and not reg1 or reg2. */
                  for (s1 = 0; s1 == reg1 || s1 == reg2; s1++) ;
                  emit_swapl_reg_reg ( s1, reg1 );
-                 emit_movb_reg_regmem ( s1, reg2 );
+                 emit_movb_reg_regmem ( size & DO_BOUNDSCHECK, s1, reg2 );
                  emit_swapl_reg_reg ( s1, reg1 );
               }
               break;
@@ -3473,14 +3540,14 @@
       case STORE: {
          vg_assert(u->tag1 == RealReg);
          vg_assert(u->tag2 == RealReg);
-         synth_mov_reg_memreg ( u->size, u->val1, u->val2 );
+         synth_mov_reg_memreg ( u->size | DO_BOUNDSCHECK, u->val1, u->val2 );
          break;
       }
 
       case LOAD: {
          vg_assert(u->tag1 == RealReg);
          vg_assert(u->tag2 == RealReg);
-         synth_mov_regmem_reg ( u->size, u->val1, u->val2 );
+         synth_mov_regmem_reg ( u->size | DO_BOUNDSCHECK, u->val1, u->val2 );
          break;
       }
 
diff --git a/coregrind/vg_helpers.S b/coregrind/vg_helpers.S
index 80d8662..091f8e8 100644
--- a/coregrind/vg_helpers.S
+++ b/coregrind/vg_helpers.S
@@ -1,4 +1,3 @@
-
 ##--------------------------------------------------------------------##
 ##--- Support routines for the JITter output.                      ---##
 ##---                                                 vg_helpers.S ---##
@@ -38,8 +37,13 @@
    to the request.  In both cases we use the user request mechanism.
    You need to to read the definition of VALGRIND_MAGIC_SEQUENCE
    in valgrind.h to make sense of this.
+
+   This isn't used in-place.  It is copied into the client address space
+   at an arbitary address.  Therefore, this code must be completely
+   position-independent.
 */
 .global VG_(signalreturn_bogusRA)
+.global VG_(signalreturn_bogusRA_length)
 VG_(signalreturn_bogusRA):
 	subl	$20, %esp	# allocate arg block
 	movl	%esp, %edx	# %edx == &_zzq_args[0]
@@ -57,15 +61,10 @@
 	roll $13, %eax
 	roll $19, %eax
 	# should never get here
-	pushl	$signalreturn_bogusRA_panic_msg
-	call	VG_(core_panic)
-	
-.data
-signalreturn_bogusRA_panic_msg:
-.ascii	"vg_signalreturn_bogusRA: VG_USERREQ__SIGNAL_RETURNS was missed"
-.byte	0
-.text	
-	
+	ud2
+
+VG_(signalreturn_bogusRA_length):
+	.long . - VG_(signalreturn_bogusRA)
 
 
 	
diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h
index de0ab98..1d8ab02 100644
--- a/coregrind/vg_include.h
+++ b/coregrind/vg_include.h
@@ -49,6 +49,9 @@
 #include "vg_skin.h"
 #include "valgrind.h"
 
+#undef SK_
+#define SK_(x)	vgSkinInternal_##x
+
 /* Total number of spill slots available for allocation, if a TempReg
    doesn't make it into a RealReg.  Just bomb the entire system if
    this value is too small; we don't expect it will ever get
@@ -137,6 +140,14 @@
 #define VG_STACK_SIZE_W       10000
 #define VG_SIGSTACK_SIZE_W    10000
 
+/* Useful macros */
+/* a - alignment - must be a power of 2 */
+#define ROUNDDN(p, a)	((Addr)(p) & ~((a)-1))
+#define ROUNDUP(p, a)	ROUNDDN((p)+(a)-1, (a))
+#define PGROUNDDN(p)	ROUNDDN(p, VKI_BYTES_PER_PAGE)
+#define PGROUNDUP(p)	ROUNDUP(p, VKI_BYTES_PER_PAGE)
+
+
 /* ---------------------------------------------------------------------
    Basic types
    ------------------------------------------------------------------ */
@@ -261,7 +272,9 @@
 extern Bool VG_(clo_chain_bb);
 /* Continue stack traces below main()?  Default: NO */
 extern Bool VG_(clo_show_below_main);
-
+/* Test each client pointer dereference to check it's within the
+   client address space bounds */
+extern Bool VG_(clo_pointercheck);
 
 /* ---------------------------------------------------------------------
    Debugging and profiling stuff
@@ -319,96 +332,15 @@
       Bool syscall_wrapper;
       Bool sanity_checks;
       Bool data_syms;
+      Bool shadow_memory;
    } 
    VgNeeds;
 
 extern VgNeeds VG_(needs);
 
-/* Events happening in core to track.  To be notified, assign a function
-   to the function pointer.  To ignore an event, don't do anything
-   (default assignment is to NULL in which case the call is skipped). */
-typedef
-   struct {
-      /* Memory events */
-      void (*new_mem_startup)( Addr a, UInt len, Bool rr, Bool ww, Bool xx );
-      void (*new_mem_stack_signal)  ( Addr a, UInt len );
-      void (*new_mem_brk)    ( Addr a, UInt len );
-      void (*new_mem_mmap)   ( Addr a, UInt len, Bool rr, Bool ww, Bool xx );
+extern void VG_(tool_init_dlsym)(void *dlhandle);
 
-      void (*copy_mem_remap) ( Addr from, Addr to, UInt len );
-      void (*change_mem_mprotect) ( Addr a, UInt len, Bool rr, Bool ww, Bool xx );
-      void (*die_mem_stack_signal)  ( Addr a, UInt len );
-      void (*die_mem_brk)    ( Addr a, UInt len );
-      void (*die_mem_munmap) ( Addr a, UInt len );
-
-      void (*new_mem_stack_4)  ( Addr new_ESP );
-      void (*new_mem_stack_8)  ( Addr new_ESP );
-      void (*new_mem_stack_12) ( Addr new_ESP );
-      void (*new_mem_stack_16) ( Addr new_ESP );
-      void (*new_mem_stack_32) ( Addr new_ESP );
-      void (*new_mem_stack)    ( Addr a, UInt len );
-
-      void (*die_mem_stack_4)  ( Addr die_ESP );
-      void (*die_mem_stack_8)  ( Addr die_ESP );
-      void (*die_mem_stack_12) ( Addr die_ESP );
-      void (*die_mem_stack_16) ( Addr die_ESP );
-      void (*die_mem_stack_32) ( Addr die_ESP );
-      void (*die_mem_stack)    ( Addr a, UInt len );
-
-      void (*ban_mem_stack)  ( Addr a, UInt len );
-
-      void (*pre_mem_read)   ( CorePart part, ThreadId tid,
-                               Char* s, Addr a, UInt size );
-      void (*pre_mem_read_asciiz) ( CorePart part, ThreadId tid,
-                                    Char* s, Addr a );
-      void (*pre_mem_write)  ( CorePart part, ThreadId tid,
-                               Char* s, Addr a, UInt size );
-      /* Not implemented yet -- have to add in lots of places, which is a
-         pain.  Won't bother unless/until there's a need. */
-      /* void (*post_mem_read)  ( ThreadState* tst, Char* s, 
-                                  Addr a, UInt size ); */
-      void (*post_mem_write) ( Addr a, UInt size );
-
-
-      /* Register events */
-      void (*post_regs_write_init)             ( void );
-      void (*post_reg_write_syscall_return)    ( ThreadId tid, UInt reg );
-      void (*post_reg_write_deliver_signal)    ( ThreadId tid, UInt reg );
-      void (*post_reg_write_pthread_return)    ( ThreadId tid, UInt reg );
-      void (*post_reg_write_clientreq_return)  ( ThreadId tid, UInt reg );
-      void (*post_reg_write_clientcall_return) ( ThreadId tid, UInt reg,
-                                                 Addr f );
-
-
-      /* Scheduler events (not exhaustive) */
-      void (*thread_run) ( ThreadId tid );
-
-
-      /* Thread events (not exhaustive) */
-      void (*post_thread_create) ( ThreadId tid, ThreadId child );
-      void (*post_thread_join)   ( ThreadId joiner, ThreadId joinee );
-
-
-      /* Mutex events (not exhaustive) */
-      void (*pre_mutex_lock)    ( ThreadId tid, 
-                                  void* /*pthread_mutex_t* */ mutex );
-      void (*post_mutex_lock)   ( ThreadId tid, 
-                                  void* /*pthread_mutex_t* */ mutex );
-      void (*post_mutex_unlock) ( ThreadId tid, 
-                                  void* /*pthread_mutex_t* */ mutex );
-
-      /* Signal events (not exhaustive) */
-      void (* pre_deliver_signal) ( ThreadId tid, Int sigNo, Bool alt_stack );
-      void (*post_deliver_signal) ( ThreadId tid, Int sigNo );
-
-      
-      /* Others... condition variable... */
-      /* ... */
-   }
-   VgTrackEvents;
-
-extern VgTrackEvents VG_(track_events);
-
+#include "vg_toolint.h"
 
 /* ---------------------------------------------------------------------
    Exports of vg_needs.c
@@ -550,6 +482,19 @@
 
 /* Denote the finish of VG_(__libc_freeres_wrapper). */
 #define VG_USERREQ__LIBC_FREERES_DONE       0x3029
+#define VG_USERREQ__REGISTER_LIBC_FREERES   0x302A
+
+/* Allocate RT signals */
+#define VG_USERREQ__GET_SIGRT_MIN	    0x302B
+#define VG_USERREQ__GET_SIGRT_MAX	    0x302C
+#define VG_USERREQ__ALLOC_RTSIG		    0x302D
+
+/* Hook for replace_malloc.o to get malloc functions */
+#define VG_USERREQ__GET_MALLOCFUNCS	    0x3030
+
+/* Hook for interface to vg_inject.so */
+#define VG_USERREQ__REGISTER_REDIRECT_SYM   0x3031
+#define VG_USERREQ__REGISTER_REDIRECT_ADDR  0x3032
 
 /* Cosmetic ... */
 #define VG_USERREQ__GET_PTHREAD_TRACE_LEVEL 0x3101
@@ -566,9 +511,23 @@
 */
 
 
-/* The scheduler does need to know the address of it so it can be
-   called at program exit. */
-extern void VG_(__libc_freeres_wrapper)( void );
+struct vg_mallocfunc_info {
+   /* things vg_replace_malloc.o needs to know about */
+   Addr	sk_malloc;
+   Addr	sk_calloc;
+   Addr	sk_realloc;
+   Addr	sk_memalign;
+   Addr	sk___builtin_new;
+   Addr	sk___builtin_vec_new;
+   Addr	sk_free;
+   Addr	sk___builtin_delete;
+   Addr	sk___builtin_vec_delete;
+
+   Addr	arena_payload_szB;
+
+   Bool	clo_sloppy_malloc;
+   Bool	clo_trace_malloc;
+};
 
 __attribute__((weak))
 int
@@ -1101,6 +1060,13 @@
 extern Int VG_(nanosleep)( const struct vki_timespec *req, 
                            struct vki_timespec *rem );
 
+/* system/mman.h */
+extern void* VG_(mmap)( void* start, UInt length,
+                        UInt prot, UInt flags, UInt fd, UInt offset );
+extern Int  VG_(munmap)( void* start, Int length );
+extern Int  VG_(mprotect)( void *start, Int length, UInt prot );
+
+
 /* Move an fd into the Valgrind-safe range */
 Int VG_(safe_fd)(Int oldfd);
 
@@ -1109,6 +1075,10 @@
 /* --- Connecting over the network --- */
 extern Int VG_(connect_via_socket)( UChar* str );
 
+/* Environment manipulations */
+extern Char* VG_(env_getenv) ( Char **env, Char* varname );
+extern Char **VG_(env_setenv) ( Char ***envp, const Char* varname, const Char *val );
+extern void  VG_(env_unsetenv) ( Char **env, const Char *varname );
 
 /* ---------------------------------------------------------------------
    Exports of vg_message.c
@@ -1318,7 +1288,8 @@
    it's read from the buffer filled by VG_(read_procselfmaps_contents)(). */
 extern 
 void VG_(parse_procselfmaps) (
-   void (*record_mapping)( Addr, UInt, Char, Char, Char, UInt, UChar* )
+   void (*record_mapping)( Addr addr, UInt len, Char rr, Char ww, Char xx, 
+			   UInt dev, UInt ino, ULong foff, const UChar *filename )
 );
 
 
@@ -1326,32 +1297,65 @@
    Exports of vg_symtab2.c
    ------------------------------------------------------------------ */
 
+typedef struct _Segment Segment;
+
+extern Bool VG_(is_object_file)   ( const void *hdr );
 extern void VG_(mini_stack_dump)  ( Addr eips[], UInt n_eips );
-extern void VG_(read_all_symbols) ( void );
-extern void VG_(read_seg_symbols) ( Addr start, UInt size, 
-                                    Char rr, Char ww, Char xx,
-                                    UInt foffset, UChar* filename );
+extern SegInfo * VG_(read_seg_symbols) ( Segment *seg );
 extern void VG_(unload_symbols)   ( Addr start, UInt length );
+extern void VG_(symtab_incref)	  ( SegInfo * );
+extern void VG_(symtab_decref)	  ( SegInfo *, Addr a, UInt len );
 
 extern Bool VG_(get_fnname_nodemangle)( Addr a, Char* fnname, Int n_fnname );
-extern Int  VG_(setup_code_redirect_table) ( void );
 
-typedef
-   struct {
-      Addr entry_pt_orig;
-      Addr entry_pt_subst;
-   }
-   CodeRedirect;
+/* Set up some default redirects */
+extern void VG_(setup_code_redirect_table) ( void );
 
-#define VG_N_CODE_REDIRECTS 10
-extern CodeRedirect VG_(code_redirect_table)[VG_N_CODE_REDIRECTS];
-/* Table is terminated by a NULL entry_pt_orig field. */
-
+/* Redirection machinery */
+extern void VG_(add_redirect_sym)(const Char *from_lib, const Char *from_sym,
+				  const Char *to_lib, const Char *to_sym);
+extern void VG_(add_redirect_addr)(const Char *from_lib, const Char *from_sym,
+				   Addr to_addr);
+extern Addr VG_(code_redirect)	  (Addr orig);
 
 /* ---------------------------------------------------------------------
    Exports of vg_main.c
    ------------------------------------------------------------------ */
 
+/* structure used for transporting values from stage2 into Valgrind
+   proper */
+typedef struct {
+   Addr	client_esp;		/* initial client ESP			*/
+   Addr client_eip;		/* initial client EIP			*/
+   Char **client_envp;		/* client envp				*/
+   UInt	*client_auxv;		/* client auxv				*/
+   Addr client_brkbase;		/* initial value of brk			*/
+
+   Int	argc;			/* Valgrind's argc/argv			*/
+   Char **argv;
+   const Char *libdir;		/* library directory                    */
+
+   Int  execfd;			/* fd of our own (stage1) executable    */
+
+   Addr client_base;		/* start of client address space	*/
+   Addr	client_end;		/* end of client address space		*/
+   Addr client_mapbase;		/* base address of !MAP_FIXED mappings  */
+   Addr	shadow_base;		/* start of skin's shadow memory	*/
+   Addr shadow_end;		/* end of skin's shadow memory		*/
+   Addr	vg_base;		/* start of Valgrind's memory		*/
+   Addr vg_mmap_end;		/* end of Valgrind's mmap area		*/
+   Addr	vg_end;			/* end of Valgrind's memory		*/
+   Addr	clstk_base;		/* lowest address of client stack	*/
+   Addr	clstk_end;		/* highest address of client stack	*/
+} KickstartParams;
+
+/* Entrypoint for kickstart */
+typedef void (kickstart_main_t)(const KickstartParams *kp, 
+				void (*tool_init)(void), void *tool_dlhandle);
+extern kickstart_main_t VG_(main);
+
+extern void VG_(usage)(void);
+
 /* Is this a SSE/SSE2-capable CPU?  If so, we had better save/restore
    the SSE state all over the place.  This is set up very early, in
    vg_startup.S.  We have to determine it early since we can't even
@@ -1365,6 +1369,26 @@
 /* Sanity checks which may be done at any time.  The scheduler decides when. */
 extern void VG_(do_sanity_checks) ( Bool force_expensive );
 
+/* Address space */
+extern Addr VG_(client_base);	/* client address space limits */
+extern Addr VG_(client_end);
+extern Addr VG_(client_mapbase); /* base of mappings */
+extern Addr VG_(clstk_base);	/* client stack range */
+extern Addr VG_(clstk_end);
+extern Addr VG_(brk_base);	/* start of brk */
+extern Addr VG_(brk_limit);	/* current brk */
+extern Addr VG_(shadow_base);	/* skin's shadow memory */
+extern Addr VG_(shadow_end);
+extern Addr VG_(valgrind_base);	/* valgrind's address range */
+extern Addr VG_(valgrind_mmap_end);
+extern Addr VG_(valgrind_end);
+
+/* stage1 executable file descriptor */
+extern Int  VG_(execfd);
+
+/* Path to all our library/aux files */
+extern const Char *VG_(libdir);
+
 /* A structure used as an intermediary when passing the simulated
    CPU's state to some assembly fragments, particularly system calls.
    Stuff is copied from baseBlock to here, the assembly magic runs,
@@ -1394,16 +1418,16 @@
 extern void VG_(unimplemented) ( Char* msg )
             __attribute__((__noreturn__));
 
-/* The stack on which Valgrind runs.  We can't use the same stack as the
-   simulatee -- that's an important design decision.  */
-extern UInt VG_(stack)[VG_STACK_SIZE_W];
-
 /* Similarly, we have to ask for signals to be delivered on an alternative
    stack, since it is possible, although unlikely, that we'll have to run
    client code from inside the Valgrind-installed signal handler.  If this
    happens it will be done by vg_deliver_signal_immediately(). */
 extern UInt VG_(sigstack)[VG_SIGSTACK_SIZE_W];
 
+/* Valgrind's argc and argv */
+extern Int    VG_(vg_argc);
+extern Char **VG_(vg_argv);
+
 /* Holds client's %esp at the point we gained control.  From this the
    client's argc, argv and envp are deduced. */
 extern Addr   VG_(esp_at_startup);
@@ -1413,11 +1437,9 @@
 extern Bool VG_(sysinfo_page_exists);
 extern Addr VG_(sysinfo_page_addr);
 
-/* Remove valgrind.so and skin's .so from a LD_PRELOAD=... string so child
-   processes don't get traced into.  Also mess up $libdir/valgrind so that
-   our libpthread.so disappears from view. */
-void VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH) ( Char* ld_preload_str,
-                                                Char* ld_library_path_str );
+/* Walk through a colon separated list variable, removing entries
+   which match pattern. */
+extern void VG_(mash_colon_env)(Char *varp, const Char *pattern);
 
 /* Something of a function looking for a home ... start up GDB.  This
    is called from VG_(swizzle_esp_then_start_GDB) and so runs on the
@@ -1433,9 +1455,6 @@
 /* Counts downwards in vg_run_innerloop. */
 extern UInt VG_(dispatch_ctr);
 
-/* Is the client running on the simulated CPU or the real one? */
-extern Bool VG_(running_on_simd_CPU); /* Initially False */
-
 /* This is the ThreadId of the last thread the scheduler ran. */
 extern ThreadId VG_(last_run_tid);
 
@@ -1499,27 +1518,73 @@
 UInt VG_(insertDflag)(UInt eflags, Int d);
 Int VG_(extractDflag)(UInt eflags);
 
-/* start address and size of the initial stack */
-extern Addr VG_(foundstack_start);
-extern UInt VG_(foundstack_size);
-
-
 /* ---------------------------------------------------------------------
    Exports of vg_memory.c
    ------------------------------------------------------------------ */
 
+/* A Segment is mapped piece of client memory.  This covers all kinds
+   of mapped memory (exe, brk, mmap, .so, shm, stack, etc)
+
+   We try to encode everything we know about a particular segment here.
+*/
+#define SF_FIXED	(1 <<  0) /* client asked for MAP_FIXED			*/
+#define SF_SHARED	(1 <<  1) /* shared					*/
+#define SF_SHM		(1 <<  2) /* SYSV SHM (also SF_SHARED)			*/
+#define SF_MMAP		(1 <<  3) /* mmap memory				*/
+#define SF_FILE		(1 <<  4) /* mapping is backed by a file		*/
+#define SF_STACK	(1 <<  5) /* is a stack					*/
+#define SF_GROWDOWN	(1 <<  6) /* segment grows down				*/
+#define SF_GROWUP	(1 <<  7) /* segment grows up				*/
+#define SF_EXEC		(1 <<  8) /* segment created by exec			*/
+#define SF_DYNLIB	(1 <<  9) /* mapped from dynamic library		*/
+#define SF_NOSYMS	(1 << 10) /* don't load syms, even if present           */
+#define SF_BRK		(1 << 11) /* brk segment                                */
+#define SF_CORE		(1 << 12) /* allocated by core on behalf of the client  */
+#define SF_VALGRIND	(1 << 13) /* a valgrind-internal mapping - not in client*/
+#define SF_CODE		(1 << 14) /* segment contains cached code               */
+
+struct _Segment {
+   UInt		prot;		/* VKI_PROT_*				*/
+   UInt		flags;		/* SF_*					*/
+
+   Addr		addr;		/* mapped addr (page aligned)		*/
+   UInt		len;		/* size of mapping (page aligned)	*/
+
+   /* These are valid if (flags & SF_FILE) */
+   ULong	offset;		/* file offset				*/
+   const Char	*filename;	/* filename (NULL if unknown)		*/
+   UInt		dev;		/* device				*/
+   UInt		ino;		/* inode				*/
+
+   SegInfo	*symtab;	/* symbol table				*/
+};
+
+/* segment mapped from a file descriptor */
+extern void VG_(map_fd_segment)  (Addr addr, UInt len, UInt prot, UInt flags, 
+				  Int fd, ULong off, const Char *filename);
+
+/* segment mapped from a file */
+extern void VG_(map_file_segment)(Addr addr, UInt len, UInt prot, UInt flags, 
+				  UInt dev, UInt ino, ULong off, const Char *filename);
+
+/* simple segment */
+extern void VG_(map_segment)     (Addr addr, UInt len, UInt prot, UInt flags);
+
+extern void VG_(unmap_range)   (Addr addr, UInt len);
+extern void VG_(mprotect_range)(Addr addr, UInt len, UInt prot);
+extern Addr VG_(find_map_space)(Addr base, UInt len, Bool for_client);
+
+extern Segment *VG_(find_segment)(Addr a);
+extern Segment *VG_(next_segment)(Segment *);
+
+extern Bool     VG_(seg_contains)(const Segment *s, Addr ptr, UInt size);
+extern Bool     VG_(seg_overlaps)(const Segment *s, Addr ptr, UInt size);
+
 extern void VG_(init_memory)        ( void );
-extern void VG_(new_exeseg_startup) ( Addr a, UInt len, Char rr, Char ww,
-                                      Char xx, UInt foffset,
-                                      UChar* filename );
-extern void VG_(new_exeseg_mmap)    ( Addr a, UInt len );
-extern void VG_(remove_if_exeseg)   ( Addr a, UInt len );
 
 extern __attribute__((regparm(1))) 
        void VG_(unknown_esp_update) ( Addr new_ESP );
 
-extern Bool VG_(is_addressable)(Addr p, Int sz);
-
 /* ---------------------------------------------------------------------
    Exports of vg_proxylwp.c
    ------------------------------------------------------------------ */
@@ -1556,7 +1621,7 @@
    Exports of vg_syscalls.c
    ------------------------------------------------------------------ */
 
-extern void VG_(init_dataseg_end_for_brk) ( void );
+extern Char *VG_(resolve_filename)(Int fd);
 
 extern Bool VG_(pre_syscall) ( ThreadId tid );
 extern void VG_(post_syscall)( ThreadId tid );
@@ -1674,18 +1739,21 @@
 extern void VG_(helper_undefined_instruction);
 
 /* NOT A FUNCTION; this is a bogus RETURN ADDRESS. */
-extern void VG_(signalreturn_bogusRA)( void );
+extern Char VG_(signalreturn_bogusRA);
+extern Int  VG_(signalreturn_bogusRA_length);	/* length */
 
 /* ---------------------------------------------------------------------
    Things relating to the used skin
    ------------------------------------------------------------------ */
 
-#define VG_TRACK(fn, args...)          \
-   do {                                \
-      if (VG_(track_events).fn)        \
-         VG_(track_events).fn(args);   \
-   } while (0)
+#define VG_TRACK(fn, args...) 			\
+   do {						\
+      if (VG_(defined_##fn)())			\
+	 SK_(fn)(args);				\
+   } while(0)
 
+__attribute__ ((noreturn))
+extern void VG_(missing_tool_func) ( const Char* fn );
 
 /* ---------------------------------------------------------------------
    The state of the simulated CPU.
diff --git a/coregrind/vg_intercept.c b/coregrind/vg_intercept.c
index 8af508b..bf29bd7 100644
--- a/coregrind/vg_intercept.c
+++ b/coregrind/vg_intercept.c
@@ -33,138 +33,43 @@
 /* ---------------------------------------------------------------------
    ALL THE CODE IN THIS FILE RUNS ON THE SIMULATED CPU.  It is
    intended for various reasons as drop-in replacements for libc
-   functions.  These functions have global visibility (obviously) and
-   have no prototypes in vg_include.h, since they are not intended to
-   be called from within Valgrind.
+   functions.  These functions are not called directly - they're the
+   targets of code redirection.  They're named the same as the library
+   functions they replace so that messages printing their names are
+   sensible, but the we don't really require the dynamic linker to find
+   them.
    ------------------------------------------------------------------ */
 
-/* General idea (2003-Apr-26) is that master implementations of
-   selected functions are done as VGR_(fnname).  Then we route
-   all calls to the master, both here and in vg_libpthread.c.
-   This means we no longer have to rely on the semantics of weak
-   symbols, which seems to have changed in glibc >= 2.3.2 in such
-   a way as to make the previous interception scheme stop working.
-*/
-
 #include "valgrind.h"
 #include "vg_include.h"
-#include "vg_kerneliface.h"
+#include <unistd.h>
+#include <signal.h>
 
-/* This has some nasty duplication of stuff from vg_libpthread.c */
+static void init(void) __attribute__((constructor));
+static int init_done;
 
-#include <errno.h>
-#include <sys/types.h>
-#include <stdio.h>
-#include <sys/ipc.h>
-#include <sys/msg.h>
-#ifdef KERNEL_2_6
-#include <linux/compiler.h>
-#endif
-#include <asm/ipc.h>		/* for ipc_kludge */
-#include <sys/poll.h>
-#include <sys/socket.h>
-#include <sys/uio.h>
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
-
-/* --------------------------------------------------------------- */
-
-/* Just start up Valgrind if it's not already going.  VG_(startup)()
-   detects and ignores second and subsequent calls. */
-
-/* We need this guy -- it's in valgrind.so. */
-extern void VG_(startup) ( void );
-
-static __inline__
-void ensure_valgrind ( char* caller )
+int raise(int sig)
 {
-   VG_(startup)();
-}
+   if (!init_done)
+      init();
 
-static __inline__
-int is_kerror ( int res )
+   return kill(getpid(), sig);
+}
+int __libc_raise(int) __attribute__((alias("raise"), visibility("protected")));
+int __GI_raise(int) __attribute__((alias("raise"), visibility("protected")));
+
+/* Don't alias, so there's no chance that "gsignal" will appear in a
+   message instead of "raise" */
+int gsignal(int sig)
 {
-   if (res >= -4095 && res <= -1)
-      return 1;
-   else
-      return 0;
+   raise(sig);
 }
 
-/* --------------------------------------------------------------- */
-
-/* Extract from Valgrind the value of VG_(clo_trace_pthread_level).
-   Returns 0 (none) if not running on Valgrind. */
-static
-int get_pt_trace_level ( void )
-{
-   int res;
-   VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
-                           VG_USERREQ__GET_PTHREAD_TRACE_LEVEL,
-                           0, 0, 0, 0);
-   return res;
-}
-
-static 
-void cat_n_send ( char* pre, char* msg )
-{
-   char  buf[1000];
-   if (get_pt_trace_level() >= 0) {
-      snprintf(buf, sizeof(buf), "%s%s", pre, msg );
-      buf[sizeof(buf)-1] = '\0';
-      VALGRIND_NON_SIMD_CALL2(VG_(message), Vg_UserMsg, buf);
-   }
-}
-
-static
-void my_exit ( int arg )
-{
-   VG_(do_syscall)(__NR_exit_group, arg);
-   VG_(do_syscall)(__NR_exit, arg);
-}
-
-static
-void my_assert_fail ( const Char* expr, const Char* file, Int line, const Char* fn )
-{
-   char buf[1000];
-   static Bool entered = False;
-   if (entered) 
-      my_exit(2);
-   entered = True;
-   sprintf(buf, "\n%s: %s:%d (%s): Assertion `%s' failed.\n",
-                "valgrind", file, line, fn, expr );
-   cat_n_send ( "", buf );
-   sprintf(buf, "Please report this bug at: %s\n\n", VG_BUGS_TO);
-   my_exit(1);
-}
-
-#define MY__STRING(__str)  #__str
-
-#define my_assert(expr)                                               \
-  ((void) ((expr) ? 0 :						      \
-	   (my_assert_fail  (MY__STRING(expr),			      \
-			      __FILE__, __LINE__,                     \
-                              __PRETTY_FUNCTION__), 0)))
-
-/* --------------------------------------------------------------- */
-
-static __inline__
-void __my_pthread_testcancel(void)
-{
-   int res;
-   ensure_valgrind("__my_pthread_testcancel");
-   VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
-                           VG_USERREQ__TESTCANCEL,
-                           0, 0, 0, 0);
-   my_assert(res == 0);
-}
-
-
 /* ---------------------------------------------------------------------
    Hook for running __libc_freeres once the program exits.
    ------------------------------------------------------------------ */
 
-void VG_(__libc_freeres_wrapper)( void )
+static void VGINJ_(__libc_freeres_wrapper)( void )
 {
    int res;
 #ifndef __UCLIBC__
@@ -174,18 +79,45 @@
    VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
                            VG_USERREQ__LIBC_FREERES_DONE, 0, 0, 0, 0);
    /*NOTREACHED*/
-   vg_assert(12345+54321 == 999999);
+   *(int *)0 = 'x';
 }
 
-/* ---------------------------------------------------------------------
-   Useful for skins that want to replace certain functions
-   ------------------------------------------------------------------ */
+static const struct {
+   const char *fromlib, *fromsym;
+   const void *toaddr;
+} redirects[] = {
+#define _S(x)	#x
+#define S(x)	_S(x)
+#define E(l, pfx, s)	{ "soname:" l, pfx #s, (void *)s }
+#define R(l, s)						\
+   E(l, "", s),						\
+   E(l, "__", s),					\
+   E(l, "__libc_", s),					\
+   E(l, "__GI_", s)
 
-Bool VG_(is_running_on_simd_CPU)(void)
+   R("libc.so.6", raise),
+   R("libc.so.6", gsignal),
+#undef R
+};
+
+static void init(void)
 {
-   return VG_(running_on_simd_CPU);
-}
+   int i;
+   int res;
 
+   if (init_done)
+      return;
+   init_done = 1;
+
+   VALGRIND_MAGIC_SEQUENCE(res, -1, VG_USERREQ__REGISTER_LIBC_FREERES,
+			   (Addr)VGINJ_(__libc_freeres_wrapper), 0, 0, 0);
+
+   for(i = 0; i < sizeof(redirects)/sizeof(*redirects); i++) {
+      VALGRIND_MAGIC_SEQUENCE(res, -1, VG_USERREQ__REGISTER_REDIRECT_ADDR,
+			      redirects[i].fromlib, redirects[i].fromsym,
+			      redirects[i].toaddr, 0);
+   }
+}
 
 
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_libpthread.c b/coregrind/vg_libpthread.c
index 8528496..c13063f 100644
--- a/coregrind/vg_libpthread.c
+++ b/coregrind/vg_libpthread.c
@@ -68,6 +68,7 @@
 #include <stdio.h>
 #include <errno.h>
 
+#include <stdlib.h>
 
 # define strong_alias(name, aliasname) \
   extern __typeof (name) aliasname __attribute__ ((alias (#name)));
@@ -128,40 +129,19 @@
    return res;
 }
 
-static
-void my_exit ( int arg )
-{
-   VG_(do_syscall)(__NR_exit, arg);
-   /*NOTREACHED*/
-}
-
-/* Apparently unused. 
-static
-void my_write ( int fd, const void *buf, int count )
-{
-   VG_(do_syscall)(__NR_write, fd, (int)buf, count );
-}
-*/
-
-/* We need this guy -- it's in valgrind.so. */
-extern void VG_(startup) ( void );
-
-
-/* Just start up Valgrind if it's not already going.  VG_(startup)()
-   detects and ignores second and subsequent calls. */
+/* Don't do anything if we're not under Valgrind */
 static __inline__
 void ensure_valgrind ( char* caller )
 {
-   VG_(startup)();
+   if (!RUNNING_ON_VALGRIND) {
+      const char msg[] = "Warning: this libpthread.so should only be run with Valgrind\n";
+      VG_(do_syscall)(__NR_write, 2, msg, sizeof(msg)-1);
+      VG_(do_syscall)(__NR_exit, 1);
+   }
 }
 
 /* While we're at it ... hook our own startup function into this
    game. */
-__asm__ (
-   ".section .init\n"
-   "\tcall vgPlain_startup"
-);
-
 
 static
 __attribute__((noreturn))
@@ -173,8 +153,8 @@
    strcat(buf, "\nPlease report this bug at: ");
    strcat(buf, VG_BUGS_TO);
    strcat(buf, "\n\n");
-   VALGRIND_NON_SIMD_CALL2(VG_(message), Vg_UserMsg, buf);
-   my_exit(1);
+   VALGRIND_INTERNAL_PRINTF(buf);
+   _exit(1);
    /* We have to persuade gcc into believing this doesn't return. */
    while (1) { };
 }
@@ -186,7 +166,7 @@
    if (get_pt_trace_level() >= 0) {
       snprintf(buf, sizeof(buf), "%s%s%s", s1, s2, s3);
       buf[sizeof(buf)-1] = '\0';
-      VALGRIND_NON_SIMD_CALL2(VG_(message), Vg_UserMsg, buf);
+      VALGRIND_INTERNAL_PRINTF(buf);
    }
 }
 
@@ -223,13 +203,14 @@
    char buf[1000];
    static Bool entered = False;
    if (entered) 
-      my_exit(2);
+      _exit(2);
    entered = True;
    sprintf(buf, "\n%s: %s:%d (%s): Assertion `%s' failed.\n",
                 "valgrind", file, line, fn, expr );
    cat_n_send ( "", buf, "" );
    sprintf(buf, "Please report this bug at: %s\n\n", VG_BUGS_TO);
-   my_exit(1);
+   cat_n_send ( "", buf, "" );
+   _exit(1);
 }
 
 #define MY__STRING(__str)  #__str
@@ -243,10 +224,14 @@
 static
 void my_free ( void* ptr )
 {
+#if 0
    int res;
    VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */,
                            VG_USERREQ__FREE, ptr, 0, 0, 0);
    my_assert(res == 0);
+#else
+   free(ptr);
+#endif
 }
 
 
@@ -254,8 +239,12 @@
 void* my_malloc ( int nbytes )
 {
    void* res;
+#if 0
    VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */,
                            VG_USERREQ__MALLOC, nbytes, 0, 0, 0);
+#else
+   res = malloc(nbytes);
+#endif
    my_assert(res != (void*)0);
    return res;
 }
@@ -1033,7 +1022,7 @@
 }
 
 int pthread_cond_init( pthread_cond_t *cond,
-                       const pthread_condattr_t *cond_attr)
+		       const pthread_condattr_t *cond_attr)
 {
    cond->__c_waiting = (_pthread_descr)VG_INVALID_THREADID;
    return 0;
@@ -1278,14 +1267,12 @@
 int sigwait ( const sigset_t* set, int* sig )
 {
    int res;
-   vki_ksiginfo_t si;
+   siginfo_t si;
    
    __my_pthread_testcancel();
 
-   /* As with pthread_sigmask we deliberately confuse sigset_t with
-      vki_ksigset_t. */
    si.si_signo = 0;
-   res = VG_(ksigtimedwait)((const vki_ksigset_t *)set, &si, NULL);
+   res = sigtimedwait(set, &si, NULL);
    *sig = si.si_signo;
 
    return 0;			/* always returns 0 */
@@ -1642,7 +1629,9 @@
 int* __errno_location ( void )
 {
    int tid;
-   /* ensure_valgrind("__errno_location"); */
+   int *ret;
+
+   ensure_valgrind("__errno_location");
    VALGRIND_MAGIC_SEQUENCE(tid, 1 /* default */,
                            VG_USERREQ__PTHREAD_GET_THREADID,
                            0, 0, 0, 0);
@@ -1650,8 +1639,11 @@
    if (tid < 1 || tid >= VG_N_THREADS)
       barf("__errno_location: invalid ThreadId");
    if (tid == 1)
-      return &errno;
-   return & thread_specific_errno[tid];
+      ret = &errno;
+   else
+      ret = &thread_specific_errno[tid];
+
+   return ret;
 }
 
 #undef h_errno
@@ -2805,17 +2797,35 @@
    ------------------------------------------------------------------ */
 int __libc_current_sigrtmin (void)
 {
-   return VG_(sig_rtmin);
+   int res;
+
+   VALGRIND_MAGIC_SEQUENCE(res, 0, 
+			   VG_USERREQ__GET_SIGRT_MIN,
+			   0, 0, 0, 0);
+
+   return res;
 }
 
 int __libc_current_sigrtmax (void)
 {
-   return VG_(sig_rtmax);
+   int res;
+
+   VALGRIND_MAGIC_SEQUENCE(res, 0, 
+			   VG_USERREQ__GET_SIGRT_MAX,
+			   0, 0, 0, 0);
+
+   return res;
 }
 
 int __libc_allocate_rtsig (int high)
 {
-   return VG_(sig_alloc_rtsig)(high);
+   int res;
+
+   VALGRIND_MAGIC_SEQUENCE(res, 0, 
+			   VG_USERREQ__ALLOC_RTSIG,
+			   high, 0, 0, 0);
+
+   return res;
 }
 
 /* ---------------------------------------------------------------------
diff --git a/coregrind/vg_libpthread.vs b/coregrind/vg_libpthread.vs
index 091044e..6248e49 100644
--- a/coregrind/vg_libpthread.vs
+++ b/coregrind/vg_libpthread.vs
@@ -1,22 +1,198 @@
+  GLIBC_2.0 {
+    pthread_join; pthread_self; pthread_equal;
+    pthread_exit; pthread_detach;
 
-GLIBC_2.0 {
-};
+    pthread_getschedparam; pthread_setschedparam;
 
-GLIBC_2.1 {
-} GLIBC_2.0;
+    pthread_attr_destroy;
+    pthread_attr_getdetachstate; pthread_attr_setdetachstate;
+    pthread_attr_getschedparam; pthread_attr_setschedparam;
+    pthread_attr_getschedpolicy; pthread_attr_setschedpolicy;
+    pthread_attr_getinheritsched; pthread_attr_setinheritsched;
+    pthread_attr_getscope; pthread_attr_setscope;
 
-GLIBC_2.2 {
-} GLIBC_2.1;
+    pthread_mutex_init; pthread_mutex_destroy;
+    pthread_mutex_lock; pthread_mutex_trylock; pthread_mutex_unlock;
 
-GLIBC_2.2.3 {
-   __pthread_clock_gettime;
-   __pthread_clock_settime;
-} GLIBC_2.2;
+    pthread_mutexattr_init; pthread_mutexattr_destroy;
 
-GLIBC_2.3.2 {
-} GLIBC_2.2;
+    # Don't version these, because it doesn't matter for Valgrind's libpthread
+    #pthread_cond_init; pthread_cond_destroy;
+    #pthread_cond_wait; pthread_cond_timedwait;
+    #pthread_cond_signal; pthread_cond_broadcast;
 
-GLIBC_PRIVATE {
-   __pthread_clock_gettime;
-   __pthread_clock_settime;
-};
+    pthread_condattr_destroy; pthread_condattr_init;
+
+    pthread_cancel; pthread_testcancel;
+    pthread_setcancelstate; pthread_setcanceltype;
+
+    pthread_sigmask; pthread_kill;
+
+    pthread_key_create; pthread_key_delete;
+    pthread_getspecific; pthread_setspecific;
+
+    pthread_once;
+
+    pthread_atfork;
+
+    flockfile; funlockfile; ftrylockfile;
+
+    # Non-standard POSIX1.x functions.
+    pthread_mutexattr_getkind_np; pthread_mutexattr_setkind_np;
+
+    # Protected names for functions used in other shared objects.
+    __pthread_mutex_init; __pthread_mutex_destroy;
+    __pthread_mutex_lock; __pthread_mutex_trylock; __pthread_mutex_unlock;
+    __pthread_mutexattr_init; __pthread_mutexattr_destroy;
+    __pthread_mutexattr_settype;
+    __pthread_key_create; __pthread_getspecific; __pthread_setspecific;
+    __pthread_once; __pthread_atfork;
+    _IO_flockfile; _IO_ftrylockfile; _IO_funlockfile;
+
+    # Hidden entry point (through macros).
+    #_pthread_cleanup_pop; _pthread_cleanup_pop_restore; _pthread_cleanup_push;
+    #_pthread_cleanup_push_defer;
+
+    # Semaphores.
+    #sem_destroy; sem_getvalue; sem_init; sem_post; sem_trywait; sem_wait;
+
+    # Special fork handling.
+    fork; __fork; vfork;
+
+    # Cancellation points.
+    close; __close; fcntl; __fcntl; read; __read; write; __write; accept;
+    connect; __connect; recv; recvfrom; recvmsg; send; __send; sendmsg; sendto;
+    fsync; lseek; __lseek; msync; nanosleep; open; __open; pause; tcdrain;
+    system; wait; __wait; waitpid;
+
+    # Hidden entry point (through macros).
+    _pthread_cleanup_push; _pthread_cleanup_pop;
+    _pthread_cleanup_push_defer; _pthread_cleanup_pop_restore;
+
+    pthread_kill_other_threads_np;
+
+    # The error functions.
+    __errno_location; __h_errno_location;
+
+    # Functions which previously have been overwritten.
+    sigwait; sigaction; __sigaction; _exit; _Exit; longjmp; siglongjmp;
+    raise;
+  };
+
+  GLIBC_2.1 {
+    pthread_create;
+    pthread_attr_init;
+
+    pthread_attr_getguardsize; pthread_attr_setguardsize;
+    pthread_attr_getstackaddr; pthread_attr_setstackaddr;
+    pthread_attr_getstacksize; pthread_attr_setstacksize;
+
+    pthread_mutexattr_gettype; pthread_mutexattr_settype;
+
+    pthread_rwlock_init; pthread_rwlock_destroy;
+    pthread_rwlock_rdlock; pthread_rwlock_wrlock; pthread_rwlock_unlock;
+    pthread_rwlock_tryrdlock; pthread_rwlock_trywrlock;
+
+    pthread_rwlockattr_init; pthread_rwlockattr_destroy;
+    pthread_rwlockattr_getpshared; pthread_rwlockattr_setpshared;
+    pthread_rwlockattr_getkind_np; pthread_rwlockattr_setkind_np;
+
+    pthread_getconcurrency; pthread_setconcurrency;
+
+    # Semaphores.
+    sem_destroy; sem_getvalue; sem_init; sem_post; sem_trywait; sem_wait;
+
+    __libc_current_sigrtmin; __libc_current_sigrtmax;
+    __libc_allocate_rtsig;
+  } GLIBC_2.0;
+
+  GLIBC_2.1.1 {
+    sem_close; sem_open; sem_unlink;
+  } GLIBC_2.1;
+
+  GLIBC_2.1.2 {
+    __vfork;
+  } GLIBC_2.1.1;
+
+  GLIBC_2.2 {
+    pthread_mutexattr_getpshared; pthread_mutexattr_setpshared;
+
+    pthread_condattr_getpshared; pthread_condattr_setpshared;
+
+    # New functions from IEEE Std. 1003.1-2001.
+    pthread_mutex_timedlock;
+
+    pthread_rwlock_timedrdlock; pthread_rwlock_timedwrlock;
+
+    pthread_attr_getstack; pthread_attr_setstack;
+
+    pthread_spin_destroy; pthread_spin_init; pthread_spin_lock;
+    pthread_spin_trylock; pthread_spin_unlock;
+
+    pthread_barrier_init; pthread_barrier_destroy; pthread_barrier_wait;
+    pthread_barrierattr_destroy; pthread_barrierattr_init;
+    pthread_barrierattr_setpshared;
+
+    sem_timedwait;
+
+    pthread_yield;
+
+    pthread_getcpuclockid;
+
+    # Cancellation points.
+    lseek64; open64; __open64; pread; pread64; __pread64; pwrite; pwrite64;
+    __pwrite64;
+
+    # Names used internally.
+    __pthread_rwlock_init; __pthread_rwlock_destroy;
+    __pthread_rwlock_rdlock; __pthread_rwlock_tryrdlock;
+    __pthread_rwlock_wrlock; __pthread_rwlock_trywrlock;
+    __pthread_rwlock_unlock;
+
+    __res_state;
+  } GLIBC_2.1.2;
+
+  GLIBC_2.2.3 {
+    # Extensions.
+    pthread_getattr_np;
+  } GLIBC_2.2;
+
+  GLIBC_2.2.6 {
+    # Cancellation wrapper
+    __nanosleep;
+  } GLIBC_2.2.3;
+
+  GLIBC_2.3.2 {
+    # Changed pthread_cond_t.
+    # Don't version these, because it doesn't matter for Valgrind's libpthread
+    #pthread_cond_init; pthread_cond_destroy;
+    #pthread_cond_wait; pthread_cond_timedwait;
+    #pthread_cond_signal; pthread_cond_broadcast;
+  } GLIBC_2.2.6;
+
+  GLIBC_2.3.3 {
+    # 1003.1-2001 function accidentally left out in 2.2.
+    pthread_barrierattr_getpshared;
+
+    # Unix CS option.
+    pthread_condattr_getclock; pthread_condattr_setclock;
+
+    # Proposed API extensions.
+    pthread_tryjoin_np; pthread_timedjoin_np;
+
+    # New cancellation cleanup handling.
+    __pthread_register_cancel; __pthread_unregister_cancel;
+    __pthread_register_cancel_defer; __pthread_unregister_cancel_restore;
+    __pthread_unwind_next;
+    __pthread_cleanup_routine;
+
+    # New affinity interfaces.
+    pthread_getaffinity_np; pthread_setaffinity_np;
+    pthread_attr_getaffinity_np; pthread_attr_setaffinity_np;
+  } GLIBC_2.3.2;
+
+  GLIBC_PRIVATE {
+    __pthread_initialize_minimal; __pthread_cleanup_upto;
+    __pthread_clock_gettime; __pthread_clock_settime;
+    __pthread_unwind;
+  };
diff --git a/coregrind/vg_main.c b/coregrind/vg_main.c
index 91c0d11..1585cd0 100644
--- a/coregrind/vg_main.c
+++ b/coregrind/vg_main.c
@@ -120,6 +120,30 @@
 /* This is the actual defn of baseblock. */
 UInt VG_(baseBlock)[VG_BASEBLOCK_WORDS];
 
+/* Client address space */
+Addr VG_(client_base);	/* client address space limits */
+Addr VG_(client_end);
+Addr VG_(client_mapbase);
+Addr VG_(clstk_base);
+Addr VG_(clstk_end);
+Addr VG_(brk_base);	/* start of brk */
+Addr VG_(brk_limit);	/* current brk */
+Addr VG_(shadow_base);	/* skin's shadow memory */
+Addr VG_(shadow_end);
+Addr VG_(valgrind_base);	/* valgrind's address range */
+Addr VG_(valgrind_mmap_end);	/* valgrind's mmaps are between valgrind_base and here */
+Addr VG_(valgrind_end);
+
+/* stage1 (main) executable */
+Int  VG_(execfd) = -1;
+
+/* Path to library directory */
+const Char *VG_(libdir) = VG_LIBDIR;
+
+/* our argc/argv */
+Int  VG_(vg_argc);
+Char **VG_(vg_argv);
+
 /* PID of the main thread */
 Int VG_(main_pid);
 
@@ -200,18 +224,18 @@
 
 Bool VG_(need_to_handle_esp_assignment)(void)
 {
-   return ( VG_(track_events).new_mem_stack_4  ||
-            VG_(track_events).die_mem_stack_4  ||
-            VG_(track_events).new_mem_stack_8  ||
-            VG_(track_events).die_mem_stack_8  ||
-            VG_(track_events).new_mem_stack_12 ||
-            VG_(track_events).die_mem_stack_12 ||
-            VG_(track_events).new_mem_stack_16 ||
-            VG_(track_events).die_mem_stack_16 ||
-            VG_(track_events).new_mem_stack_32 ||
-            VG_(track_events).die_mem_stack_32 ||
-            VG_(track_events).new_mem_stack    ||
-            VG_(track_events).die_mem_stack
+   return ( VG_(defined_new_mem_stack_4)()  ||
+            VG_(defined_die_mem_stack_4)()  ||
+            VG_(defined_new_mem_stack_8)()  ||
+            VG_(defined_die_mem_stack_8)()  ||
+            VG_(defined_new_mem_stack_12)() ||
+            VG_(defined_die_mem_stack_12)() ||
+            VG_(defined_new_mem_stack_16)() ||
+            VG_(defined_die_mem_stack_16)() ||
+            VG_(defined_new_mem_stack_32)() ||
+            VG_(defined_die_mem_stack_32)() ||
+            VG_(defined_new_mem_stack)()    ||
+            VG_(defined_die_mem_stack)()
           );
 }
 
@@ -253,11 +277,11 @@
 
    /* Make these most-frequently-called specialised ones compact, if they
       are used. */
-   if (VG_(track_events).new_mem_stack_4)
-      VG_(register_compact_helper)( (Addr) VG_(track_events).new_mem_stack_4);
+   if (VG_(defined_new_mem_stack_4)())
+      VG_(register_compact_helper)( (Addr) VG_(tool_interface).track_new_mem_stack_4);
 
-   if (VG_(track_events).die_mem_stack_4)
-      VG_(register_compact_helper)( (Addr) VG_(track_events).die_mem_stack_4);
+   if (VG_(defined_die_mem_stack_4)())
+      VG_(register_compact_helper)( (Addr) VG_(tool_interface).track_die_mem_stack_4);
 
    /* (9 or 18) + n_compact_helpers  */
    /* Allocate slots for compact helpers */
@@ -300,9 +324,9 @@
    VG_(register_noncompact_helper)( (Addr) & VG_(do_useseg) );
 
 #define REG(kind, size) \
-   if (VG_(track_events).kind##_mem_stack##size) \
+   if (VG_(defined_##kind##_mem_stack##size)()) \
       VG_(register_noncompact_helper)(           \
-          (Addr) VG_(track_events).kind##_mem_stack##size );
+          (Addr) VG_(tool_interface).track_##kind##_mem_stack##size );
 
    REG(new, _8);
    REG(new, _12);
@@ -427,10 +451,6 @@
    Global entities which are not referenced from generated code.
    ------------------------------------------------------------------ */
 
-/* The stack on which Valgrind runs.  We can't use the same stack as
-   the simulatee -- that's an important design decision.  */
-UInt VG_(stack)[VG_STACK_SIZE_W];
-
 /* Ditto our signal delivery stack. */
 UInt VG_(sigstack)[VG_SIGSTACK_SIZE_W];
 
@@ -562,6 +582,7 @@
 Bool   VG_(clo_track_fds)      = False;
 Bool   VG_(clo_chain_bb)       = True;
 Bool   VG_(clo_show_below_main) = False;
+Bool   VG_(clo_pointercheck)   = True;
 
 static Bool   VG_(clo_wait_for_gdb)   = False;
 
@@ -592,10 +613,6 @@
 Char** VG_(client_argv);
 Char** VG_(client_envp);
 
-/* A place into which to copy the value of env var VG_ARGS, so we
-   don't have to modify the original. */
-static Char vg_cmdline_copy[M_VG_CMDLINE_STRLEN];
-
 /* ---------------------------------------------------------------------
    Processing of command-line options.
    ------------------------------------------------------------------ */
@@ -615,23 +632,13 @@
    VG_(clo_log_to)     = VgLogTo_Fd;
    VG_(clo_logfile_fd) = 2; /* stderr */
    VG_(printf)(
-      "valgrind.so: Startup or configuration error:\n   %s\n", msg);
+      "valgrind: Startup or configuration error:\n   %s\n", msg);
    VG_(printf)(
-      "valgrind.so: Unable to start up properly.  Giving up.\n");
+      "valgrind: Unable to start up properly.  Giving up.\n");
    VG_(exit)(1);
 }
 
-static void args_grok_error ( Char* msg )
-{
-   VG_(shutdown_logging)();
-   VG_(clo_log_to)     = VgLogTo_Fd;
-   VG_(clo_logfile_fd) = 2; /* stderr */
-   VG_(printf)("valgrind.so: When searching for "
-               "client's argc/argc/envp:\n\t%s\n", msg);
-   config_error("couldn't find client's argc/argc/envp");
-}   
-
-static void usage ( void )
+void VG_(usage) ( void )
 {
    Char* usage1 = 
 "usage: valgrind [options] prog-and-args\n"
@@ -656,6 +663,7 @@
 "			       a signal [no]\n"
 "    --lowlat-syscalls=no|yes  improve wake-up latency when a thread's\n"
 "			       syscall completes [no]\n"
+"    --pointercheck=no|yes     enforce client address space limits [yes]\n"
 "\n"
 "  user options for Valgrind tools that report errors:\n"
 "    --logfile-fd=<number>     file descriptor for messages [2=stderr]\n"
@@ -671,8 +679,7 @@
 "    --gdb-attach=no|yes       start GDB when errors detected? [no]\n"
 "    --gdb-path=/path/to/gdb   path to the GDB to use [/usr/bin/gdb]\n"
 "    --input-fd=<number>       file descriptor for (gdb) input [0=stdin]\n"
-"\n"
-"  user options for %s:\n";
+"\n";
 
    Char* usage2 = 
 "\n"
@@ -695,8 +702,7 @@
 "  debugging options for Valgrind tools that report errors\n"
 "    --dump-error=<number>     show translation for basic block associated\n"
 "                              with <number>'th error context [0=show none]\n"
-"\n"
-"  debugging options for %s:\n";
+"\n";
 
    Char* usage3 =
 "\n"
@@ -710,17 +716,25 @@
 "  tool's start-up message for more information.\n"
 "\n";
 
-   VG_(printf)(usage1, VG_(details).name);
-   /* Don't print skin string directly for security, ha! */
-   if (VG_(needs).command_line_options)
-      SK_(print_usage)();
-   else
-      VG_(printf)("    (none)\n");
-   VG_(printf)(usage2, VG_(details).name);
-   if (VG_(needs).command_line_options)
-      SK_(print_debug_usage)();
-   else
-      VG_(printf)("    (none)\n");
+   VG_(printf)(usage1);
+   if (VG_(details).name) {
+      VG_(printf)("  user options for %s:\n", VG_(details).name);
+      /* Don't print skin string directly for security, ha! */
+      if (VG_(needs).command_line_options)
+	 SK_(print_usage)();
+      else
+	 VG_(printf)("    (none)\n");
+   }
+   VG_(printf)(usage2);
+
+   if (VG_(details).name) {
+      VG_(printf)("  debugging options for %s:\n", VG_(details).name);
+   
+      if (VG_(needs).command_line_options)
+	 SK_(print_debug_usage)();
+      else
+	 VG_(printf)("    (none)\n");
+   }
    VG_(printf)(usage3, VG_BUGS_TO);
 
    VG_(shutdown_logging)();
@@ -730,37 +744,12 @@
 }
 
 
-/* Callback for looking for the stack segment. */
-Addr VG_(foundstack_start) = (Addr)NULL;
-UInt VG_(foundstack_size)  = 0;
-
-static void vg_findstack_callback ( Addr start, UInt size, 
-                                    Char r, Char w, Char x, 
-                                    UInt foffset, UChar* filename )
+static void process_cmd_line_options ( const KickstartParams *kp )
 {
-   Addr lastword;
-   if (size == 0) return;
-   if (r != 'r' || w != 'w' 
-       /* || x != 'x'  --not necessarily so on x86-64*/
-      ) return;
-   lastword = start + size - 4;
-   if (start <= VG_(esp_at_startup) 
-       && VG_(esp_at_startup) <= lastword) {
-      VG_(foundstack_start) = start;
-      VG_(foundstack_size) = size;
-      vg_assert(VG_(foundstack_size) > 0);
-   }
-}
-
-
-
-static void process_cmd_line_options ( void )
-{
-   Char* argv[M_VG_CMDLINE_OPTS];
-   Int   argc;
-   Char* p;
-   Char* str;
-   Int   i, eventually_logfile_fd, ctr;
+   Int argc;
+   Char **argv;
+   Int   i, eventually_logfile_fd;
+   Int	*auxp;
 
 #  define ISSPACE(cc)      ((cc) == ' ' || (cc) == '\t' || (cc) == '\n')
 
@@ -772,188 +761,42 @@
    VG_(startup_logging)();
 
    /* Check for sane path in ./configure --prefix=... */
-   if (VG_(strlen)(VG_LIBDIR) < 1 
-       || VG_LIBDIR[0] != '/') 
+   if (VG_LIBDIR[0] != '/') 
      config_error("Please use absolute paths in "
                   "./configure --prefix=... or --libdir=...");
 
-   /* (Suggested by Fabrice Bellard ... )
-      We look for the Linux ELF table and go down until we find the
-      envc & envp. It is not fool-proof, but these structures should
-      change less often than the libc ones. */
-   {
-       Int* sp;
-
-       /* Look for the stack segment by parsing /proc/self/maps and
-	  looking for a section bracketing VG_(esp_at_startup) which
-	  has rwx permissions and no associated file.  Note that this uses
-          the /proc/self/maps contents read at the start of VG_(main)(),
-          and doesn't re-read /proc/self/maps. */
-
-       VG_(parse_procselfmaps)( vg_findstack_callback );
-
-       /* Now foundstack_start and foundstack_size should delimit the stack. */
-       if (VG_(foundstack_size) == 0) {
-          args_grok_error("Cannot determine stack segment "
-                          "from /proc/self/maps");
-       }
-
-       if (0)
-          VG_(printf)("stack segment is %p .. %p\n", 
-                      VG_(foundstack_start), 
-                      VG_(foundstack_start) + VG_(foundstack_size) - 4 );
-
-       sp = (UInt*)(VG_(foundstack_start) + VG_(foundstack_size) );
-       if ((((UInt)(sp)) % VKI_BYTES_PER_PAGE) != 0) {
-          args_grok_error("Stack segment is not page aligned?!");
-       }
-
-       /* we locate: NEW_AUX_ENT(1, AT_PAGESZ, ELF_EXEC_PAGESIZE) in
-          the elf interpreter table */
-
-       sp -= 2;
-       while (sp[0] != VKI_AT_PAGESZ || sp[1] != 4096) {
-           /* VG_(printf)("trying %p\n", sp); */
-           sp--;
-       }
-
-       if (sp[2] == VKI_AT_BASE 
-           && sp[0] == VKI_AT_PAGESZ
-           && sp[-2] == VKI_AT_PHNUM
-           && sp[-4] == VKI_AT_PHENT
-           && sp[-6] == VKI_AT_PHDR
-           && sp[-6-1] == 0) {
-          if (0)
-             VG_(printf)("Looks like you've got a 2.2.X kernel here.\n");
-          sp -= 6;
-       } else
-       if (sp[2] == VKI_AT_CLKTCK
-           && sp[0] == VKI_AT_PAGESZ
-           && sp[-2] == VKI_AT_HWCAP
-           && sp[-2-1] == 0) {
-          if (0)
-             VG_(printf)("Looks like you've got a 2.4.X kernel here.\n");
-          sp -= 2;
-       } else
-       if (sp[2] == VKI_AT_CLKTCK
-           && sp[0] == VKI_AT_PAGESZ
-           && sp[-2] == VKI_AT_HWCAP
-           && sp[-4] == VKI_AT_SYSINFO
-           && sp[-4-1] == 0) {
-          if (0)
-             VG_(printf)("Looks like you've got a 2.4.X kernel with "
-                         "a sysinfo page at %x here.\n", sp[-3]);
-	  VG_(sysinfo_page_exists) = True;
-	  VG_(sysinfo_page_addr) = sp[-3];
-          sp -= 4;
-       } else
-       if (sp[2] == VKI_AT_CLKTCK
-           && sp[0] == VKI_AT_PAGESZ
-           && sp[-2] == VKI_AT_HWCAP
-           && sp[-4] == VKI_AT_USER_AUX_SEGMENT
-           && sp[-4-1] == 0) {
-          if (0)
-             VG_(printf)("Looks like you've got a R-H Limbo 2.4.X "
-                         "kernel here.\n");
-          sp -= 4;
-       } else
-       if (sp[2] == VKI_AT_CLKTCK
-           && sp[0] == VKI_AT_PAGESZ
-           && sp[-2] == VKI_AT_HWCAP
-           && sp[-2-20-1] == 0) {
-          if (0)
-             VG_(printf)("Looks like you've got a early 2.4.X kernel here.\n");
-          sp -= 22;
-       } else
-       if (sp[2] == VKI_AT_CLKTCK
-           && sp[0] == VKI_AT_PAGESZ
-           && sp[-2] == VKI_AT_HWCAP
-           && sp[-4-1] == 0) {
-          if (0)
-             VG_(printf)("Looks like a 2.5.43-2.5.67 kernel here.\n");
-          sp -= 4;
-       } else
-       if (sp[2] == VKI_AT_CLKTCK
-           && sp[0] == VKI_AT_PAGESZ
-           && sp[-2] == VKI_AT_HWCAP
-           && sp[-6] == VKI_AT_SYSINFO
-           && sp[-6-1] == 0) {
-          if (0)
-             VG_(printf)("Looks like a >= 2.5.68 kernel with "
-                         "a sysinfo page at %x here.\n", sp[-5]);
-	  VG_(sysinfo_page_exists) = True;
-	  VG_(sysinfo_page_addr) = sp[-5];
-          sp -= 6;
-       } else
-         args_grok_error(
-            "ELF frame does not look like 2.2.X or 2.4.X.\n   "
-            "See kernel sources linux/fs/binfmt_elf.c to make sense of this."
-         );
-
-       sp--;
-       if (*sp != 0)
-	 args_grok_error("can't find NULL at end of env[]");
-
-       /* sp now points to NULL at the end of env[] */
-       ctr = 0;
-       while (True) {
-           sp --;
-           if (*sp == 0) break;
-           if (++ctr >= 2000)
-              args_grok_error(
-                 "suspiciously many (2000) env[] entries; giving up");
-           
-       }
-       /* sp now points to NULL at the end of argv[] */
-       VG_(client_envp) = (Char**)(sp+1);
-
-       ctr = 0;
-       VG_(client_argc) = 0;
-       while (True) {
-          sp--;
-          if (*sp == VG_(client_argc))
-             break;
-          VG_(client_argc)++;
-           if (++ctr >= 1000)
-              args_grok_error(
-                 "suspiciously many (1000) argv[] entries; giving up");
-       }
-
-       VG_(client_argv) = (Char**)(sp+1);
-   }
-
-   /* Now that VG_(client_envp) has been set, we can extract the args
-      for Valgrind itself.  Copy into global var so that we don't have to
-      write zeroes to the getenv'd value itself. */
-   str = VG_(getenv)("VG_ARGS");
-   argc = 0;
-
-   if (!str) {
-      config_error("Can't read options from env var VG_ARGS.");
-   }
-
-   if (VG_(strlen)(str) >= M_VG_CMDLINE_STRLEN-1) {
-      config_error("Command line length exceeds M_CMDLINE_STRLEN.");
-   }
-   VG_(strcpy)(vg_cmdline_copy, str);
-   str = NULL;
-
-   p = &vg_cmdline_copy[0];
-   while (True) {
-      while (ISSPACE(*p)) { *p = 0; p++; }
-      if (*p == 0) break;
-      if (argc < M_VG_CMDLINE_OPTS-1) { 
-         argv[argc] = p; argc++; 
-      } else {
-         config_error(
-            "Found more than M_CMDLINE_OPTS command-line opts.");
+   for(auxp = kp->client_auxv; auxp[0] != VKI_AT_NULL; auxp += 2) {
+      switch(auxp[0]) {
+      case VKI_AT_SYSINFO:
+	 VG_(sysinfo_page_exists) = True;
+	 VG_(sysinfo_page_addr) = auxp[1];
+	 break;
       }
-      while (*p != 0 && !ISSPACE(*p)) p++;
-   }
+   } 
 
-   for (i = 0; i < argc; i++) {
+   VG_(client_envp) = kp->client_envp;
 
-      if      (VG_CLO_STREQ(argv[i], "-v") ||
+   argc = kp->argc;
+   argv = kp->argv;
+
+   VG_(vg_argc) = argc;
+   VG_(vg_argv) = argv;
+
+   /* We know the initial ESP is pointing at argc/argv */
+   VG_(client_argc) = *(Int *)kp->client_esp;
+   VG_(client_argv) = (Char **)(kp->client_esp + sizeof(Int));
+
+   for (i = 1; i < argc; i++) {
+      /* Ignore these options - they've already been handled */
+      if (VG_CLO_STREQN(7, argv[i], "--tool=") ||
+	  VG_CLO_STREQN(7, argv[i], "--skin="))
+	 continue;
+      if (VG_CLO_STREQN(7, argv[i], "--exec="))
+	 continue;
+
+      if (     VG_CLO_STREQ(argv[i], "--"))
+	 continue;
+      else if (VG_CLO_STREQ(argv[i], "-v") ||
                VG_CLO_STREQ(argv[i], "--verbose"))
          VG_(clo_verbosity)++;
       else if (VG_CLO_STREQ(argv[i], "-q") ||
@@ -983,6 +826,11 @@
       else if (VG_CLO_STREQ(argv[i], "--show-below-main=no"))
          VG_(clo_show_below_main) = False;
 
+      else if (VG_CLO_STREQ(argv[i], "--pointercheck=yes"))
+         VG_(clo_pointercheck) = True;
+      else if (VG_CLO_STREQ(argv[i], "--pointercheck=no"))
+         VG_(clo_pointercheck) = False;
+
       else if (VG_CLO_STREQ(argv[i], "--demangle=yes"))
          VG_(clo_demangle) = True;
       else if (VG_CLO_STREQ(argv[i], "--demangle=no"))
@@ -1142,10 +990,10 @@
       else if (VG_(needs).command_line_options) {
          Bool ok = SK_(process_cmd_line_option)(argv[i]);
          if (!ok)
-            usage();
+            VG_(usage)();
       }
       else
-         usage();
+         VG_(usage)();
    }
 
 #  undef ISSPACE
@@ -1294,19 +1142,27 @@
    if (VG_(clo_verbosity) > 1) {
       if (VG_(clo_log_to) != VgLogTo_Fd)
          VG_(message)(Vg_UserMsg, "");
+      VG_(message)(Vg_UserMsg, "Valgrind library directory: %s", VG_(libdir));
       VG_(message)(Vg_UserMsg, "Command line");
       for (i = 0; i < VG_(client_argc); i++)
          VG_(message)(Vg_UserMsg, "   %s", VG_(client_argv)[i]);
 
       VG_(message)(Vg_UserMsg, "Startup, with flags:");
-      for (i = 0; i < argc; i++) {
+      for (i = 1; i < argc; i++) {
          VG_(message)(Vg_UserMsg, "   %s", argv[i]);
       }
    }
 
-   if (VG_(clo_n_suppressions) == 0 && 
+   if (VG_(clo_n_suppressions) < VG_CLO_MAX_SFILES-1 &&
        (VG_(needs).core_errors || VG_(needs).skin_errors)) {
-      config_error("No error-suppression files were specified.");
+      /* If there are no suppression files specified and the skin
+	 needs one, load the default */
+      static const Char default_supp[] = "default.supp";
+      Int len = VG_(strlen)(VG_(libdir)) + 1 + sizeof(default_supp);
+      Char *buf = VG_(arena_malloc)(VG_AR_CORE, len);
+      VG_(sprintf)(buf, "%s/%s", VG_(libdir), default_supp);
+      VG_(clo_suppressions)[VG_(clo_n_suppressions)] = buf;
+      VG_(clo_n_suppressions)++;
    }
 
    if (VG_(clo_gen_suppressions) && 
@@ -1421,30 +1277,6 @@
    return VG_(baseBlock)[VGOFF_(m_esp)];
 }
 
-/* Some random tests needed for leak checking */
-
-Bool VG_(within_stack)(Addr a)
-{
-   if (a >= ((Addr)(&VG_(stack)))
-       && a <= ((Addr)(&VG_(stack))) + sizeof(VG_(stack)))
-      return True;
-   else
-      return False;
-}
-
-Bool VG_(within_m_state_static_OR_threads)(Addr a)
-{
-   if (a >= ((Addr)(&VG_(m_state_static)))
-       && a < ((Addr)(&VG_(m_state_static))) + sizeof(VG_(m_state_static)))
-      return True;
-
-   if (a >= ((Addr)(&VG_(threads)[0]))
-       && a < ((Addr)(&VG_(threads)[VG_N_THREADS])))
-      return True;
-
-   return False;
-}
-
 /* ---------------------------------------------------------------------
    Show accumulated counts.
    ------------------------------------------------------------------ */
@@ -1519,11 +1351,47 @@
 /* Where we jump to once Valgrind has got control, and the real
    machine's state has been copied to the m_state_static. */
 
-void VG_(main) ( void )
+void VG_(main) ( const KickstartParams *kp, void (*tool_init)(void), void *tool_dlhandle )
 {
-   Int               i;
    VgSchedReturnCode src;
 
+   /* initial state */
+   if (0)
+      VG_(printf)("starting esp=%p eip=%p, esp=%p\n", kp->client_esp, kp->client_eip, &src);
+   VG_(esp_at_startup) = kp->client_esp;
+   VG_(memset)(&VG_(m_state_static), 0, sizeof(VG_(m_state_static)));
+   VG_(m_state_static)[40/4] = kp->client_esp;
+   VG_(m_state_static)[60/4] = kp->client_eip;
+
+   /* set up an initial FPU state (doesn't really matter what it is,
+      so long as it's somewhat valid) */
+   if (!VG_(have_ssestate))
+	   asm volatile("fwait; fnsave %0; fwait; frstor %0; fwait" 
+			: : "m" (VG_(m_state_static)[64/4]) : "cc", "memory");
+   else
+	   asm volatile("fwait; fxsave %0; fwait; andl $0xffbf, %1; fxrstor %0; fwait"
+			: : "m" (VG_(m_state_static)[64/4]), "m" (VG_(m_state_static)[(64+24)/4]) : "cc", "memory");
+
+   VG_(brk_base)          = VG_(brk_limit) = kp->client_brkbase;
+   VG_(client_base)       = kp->client_base;
+   VG_(client_end)        = kp->client_end;
+   VG_(client_mapbase)    = kp->client_mapbase;
+   VG_(clstk_base)        = kp->clstk_base;
+   VG_(clstk_end)         = kp->clstk_end;
+
+   VG_(shadow_base)	  = kp->shadow_base;
+   VG_(shadow_end)	  = kp->shadow_end;
+   VG_(valgrind_base)	  = kp->vg_base;
+   VG_(valgrind_mmap_end) = kp->vg_mmap_end;
+   VG_(valgrind_end)	  = kp->vg_end;
+
+   VG_(libdir)            = kp->libdir;
+
+   vg_assert(VG_(clstk_end) == VG_(client_end));
+
+   if (kp->execfd != -1)
+      VG_(execfd) = VG_(safe_fd)(kp->execfd);
+
    if (0) {
       if (VG_(have_ssestate))
          VG_(printf)("Looks like a SSE-capable CPU\n");
@@ -1531,36 +1399,9 @@
          VG_(printf)("Looks like a MMX-only CPU\n");
    }
 
-   /* Check skin and core versions are compatible */
-   if (VG_CORE_INTERFACE_MAJOR_VERSION != VG_(skin_interface_major_version)) {
-      VG_(printf)("Error:\n"
-                  "  Tool and core interface versions do not match.\n"
-                  "  Interface version used by core is: %d.%d\n"
-                  "  Interface version used by tool is: %d.%d\n"
-                  "  The major version numbers must match.\n",
-                  VG_CORE_INTERFACE_MAJOR_VERSION,
-                  VG_CORE_INTERFACE_MINOR_VERSION,
-                  VG_(skin_interface_major_version),
-                  VG_(skin_interface_minor_version));
-      VG_(printf)("  You need to at least recompile, and possibly update,\n");
-      if (VG_CORE_INTERFACE_MAJOR_VERSION > VG_(skin_interface_major_version))
-         VG_(printf)("  your skin to work with this version of Valgrind.\n");
-      else
-         VG_(printf)("  your version of Valgrind to work with this skin.\n");
-      VG_(printf)("  Aborting, sorry.\n");
-      VG_(exit)(1);
-   }
-
    VG_(atfork)(NULL, NULL, newpid);
    newpid(VG_INVALID_THREADID);
 
-   /* Set up our stack sanity-check words. */
-   for (i = 0; i < 10; i++) {
-      VG_(stack)[i] = (UInt)(&VG_(stack)[i])                   ^ 0xA4B3C2D1;
-      VG_(stack)[VG_STACK_SIZE_W-1-i] 
-                    = (UInt)(&VG_(stack)[VG_STACK_SIZE_W-i-1]) ^ 0xABCD4321;
-   }
-
    /* Read /proc/self/maps into a buffer.  Must be before:
       - SK_(pre_clo_init)(): so that if it calls VG_(malloc)(), any mmap'd
         superblocks are not erroneously identified as being owned by the
@@ -1578,11 +1419,13 @@
         and turn on/off 'command_line_options' need
       - init_memory() (to setup memory event trackers).
    */
-   SK_(pre_clo_init)();
+   (*tool_init)();
+   VG_(tool_init_dlsym)(tool_dlhandle);
+
    VG_(sanity_check_needs)();
 
-   /* Process Valgrind's command-line opts (from env var VG_ARGS). */
-   process_cmd_line_options();
+   /* Process Valgrind's command-line opts */
+   process_cmd_line_options(kp);
 
    /* Hook to delay things long enough so we can get the pid and
       attach GDB in another shell. */
@@ -1657,6 +1500,27 @@
 
    VG_(bbs_to_go) = VG_(clo_stop_after);
 
+   if (VG_(clo_pointercheck)) {
+      vki_modify_ldt_t ldt = { VG_POINTERCHECK_SEGIDX,
+			       VG_(client_base),
+			       (VG_(client_end)-VG_(client_base)) / VKI_BYTES_PER_PAGE,
+			       1,		/* 32 bit */
+			       0,		/* contents: data, RW, non-expanding */
+			       0,		/* not read-exec only */
+			       1,		/* limit in pages */
+			       0,		/* !seg not present */
+			       1,		/* usable */
+      };
+      Int ret = VG_(do_syscall)(__NR_modify_ldt, 1, &ldt, sizeof(ldt));
+
+      if (ret < 0) {
+	 VG_(message)(Vg_UserMsg,
+		      "Warning: ignoring --pointercheck=yes, "
+		      "because modify_ldt failed (errno=%d)", -ret);
+	 VG_(clo_pointercheck) = False;
+      }
+   }
+
    /* Run! */
    VG_(running_on_simd_CPU) = True;
    VGP_PUSHCC(VgpSched);
@@ -1711,20 +1575,6 @@
 
    VG_(shutdown_logging)();
 
-   /* Remove valgrind.so from a LD_PRELOAD=... string so child
-      processes don't get traced into.  Also mess up $libdir/valgrind
-      so that our libpthread.so disappears from view. */
-   /* 26 Apr 03: doing this often causes trouble for no reason, and is
-      pointless when we are just about to VgSrc_ExitSyscall.  So don't
-      bother in that case. */
-   if ((!VG_(clo_trace_children))
-       && src != VgSrc_ExitSyscall) { 
-      VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH)(
-         VG_(getenv)("LD_PRELOAD"),
-         VG_(getenv)("LD_LIBRARY_PATH") 
-      );
-   }
-
    /* We're exiting, so nuke all the threads and clean up the proxy LWPs */
    vg_assert(src == VgSrc_FatalSig ||
 	     VG_(threads)[VG_(last_run_tid)].status == VgTs_Runnable ||
@@ -1794,150 +1644,66 @@
 }
 
 
-/* Find "valgrind.so" in a LD_PRELOAD=... string, and convert it to
-   "valgrinq.so", which doesn't do anything.  This is used to avoid
-   tracing into child processes.  To make this work the build system
-   also supplies a dummy file, "valgrinq.so". 
+/* Walk through a colon-separated environment variable, and remove the
+   entries which matches file_pattern.  It slides everything down over
+   the removed entries, and pads the remaining space with '\0'.  It
+   modifies the entries in place (in the client address space), but it
+   shouldn't matter too much, since we only do this just before an
+   execve().
 
-   Also replace "vgskin_<foo>.so" with whitespace, for the same reason;
-   without it, child processes try to find valgrind.so symbols in the 
-   skin .so.
-
-   Also look for $(libdir)/lib/valgrind in LD_LIBRARY_PATH and change
-   it to $(libdir)/lib/valgrinq, so as to make our libpthread.so
-   disappear.  
+   This is also careful to mop up any excess ':'s, since empty strings
+   delimited by ':' are considered to be '.' in a path.
 */
-static void slideleft ( Char* s )
+void VG_(mash_colon_env)(Char *varp, const Char *remove_pattern)
 {
-   vg_assert(s && (*s == ' ' || *s == ':'));
-   while (True) {
-      s[0] = s[1];
-      if (s[0] == '\0') break;
-      s++;
-   }
-}
+   Char *const start = varp;
+   Char *entry_start = varp;
+   Char *output = varp;
 
-
-void VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH) ( Char* ld_preload_str,
-                                                Char* ld_library_path_str )
-{
-   Char* vg_prel  = NULL;
-   Char* sk_prel  = NULL;
-   Char* coredir2 = NULL;
-   Char* p;
-   Char* coredir_first;
-   Char* coredir_last;
-   Int   coredir_len;
-   Int   i;
-   Int   what;
-
-#define MUTANCY(n)   { what = n; goto mutancy; }
-
-   if (ld_preload_str == NULL || ld_library_path_str == NULL) MUTANCY(0);
-
-   /* VG_(printf)("pre:\n%s\n%s\n", ld_preload_str, ld_library_path_str); */
-
-   /* LD_PRELOAD      = "<skindir>/vgskin_foo.so:<coredir>/valgrind.so:X"
-      LD_LIBRARY_PATH = "<coredir>:Y"  */
-
-   /* Setting up, finding things */
-
-   /* LD_PRELOAD: Search for "valgrind.so" */
-   vg_prel = VG_(strstr)(ld_preload_str, "valgrind.so");
-
-   /* LD_PRELOAD: if "valgrind.so" not found, has been done before;
-      "valgrinq.so" should be there instead.  Then stop. */
-   if (NULL == vg_prel) {
-      if (VG_(strstr)(ld_preload_str, "valgrinq.so") == NULL) MUTANCY(1);
+   if (varp == NULL)
       return;
+
+   while(*varp) {
+      if (*varp == ':') {
+	 Char prev;
+	 Bool match;
+
+	 /* This is a bit subtle: we want to match against the entry
+	    we just copied, because it may have overlapped with
+	    itself, junking the original. */
+
+	 prev = *output;
+	 *output = '\0';
+
+	 match = VG_(string_match)(remove_pattern, entry_start);
+
+	 *output = prev;
+	 
+	 if (match) {
+	    output = entry_start;
+	    varp++;			/* skip ':' after removed entry */
+	 } else
+	    entry_start = output+1;	/* entry starts after ':' */
+      }
+
+      *output++ = *varp++;
    }
 
-   /* LD_PRELOAD: find start of <coredir> */
-   p = vg_prel;
+   /* match against the last entry */
+   if (VG_(string_match)(remove_pattern, entry_start)) {
+      output = entry_start;
+      if (output > start) {
+	 /* remove trailing ':' */
+	 output--;
+	 vg_assert(*output == ':');
+      }
+   }	 
 
-   for (p = vg_prel;  *p != ':' && p > ld_preload_str;  p--) { }
-   if (*p != ':') MUTANCY(2);  /* skin.so entry must precede it */
-   coredir_first = p+1;
-   coredir_last  = vg_prel - 1;
-   coredir_len   = coredir_last - coredir_first;
-   
-   /* LD_PRELOAD: find "vgskin_foo.so" */
-   sk_prel = VG_(strstr)(ld_preload_str, "vgskin_");
-   if (sk_prel == NULL) MUTANCY(4);
-
-   /* LD_LIBRARY_PATH: find <coredir> */
-   *coredir_last = '\0';      /* Temporarily zero-terminate coredir */
-   coredir2 = VG_(strstr)(ld_library_path_str, coredir_first);
-   if (coredir2 == NULL) MUTANCY(5);
-   *coredir_last = '/';       /* Undo zero-termination */
-
-   /* Changing things */
-
-   /* LD_PRELOAD: "valgrind.so" --> "valgrinq.so" */
-   if (vg_prel[7] != 'd') MUTANCY(6);
-   vg_prel[7] = 'q';
-
-   /* LD_PRELOAD: "<skindir>/vgskin_foo.so:<coredir>/valgrinq.so:X" -->
-                  "          vgskin_foo.so:<coredir>/valgrinq.so:X" */
-   p = sk_prel-1;
-   while (*p != ':' && p >= ld_preload_str) { 
-      *p = ' ';
-      p--;
-   }
-   /* LD_PRELOAD: "          vgskin_foo.so:<coredir>/valgrinq.so:X" -->
-                  "                       :<coredir>/valgrinq.so:X" */
-   p = sk_prel;
-   while (*p != ':' && *p != '\0') { 
-      *p = ' ';
-      p++;
-   }
-   if (*p == '\0') MUTANCY(7);    /* valgrind.so has disappeared?! */
-
-   /* LD_LIBRARY_PATH: "<coredir>:Y" --> "         :Y"  */
-   for (i = 0; i < coredir_len; i++)
-      coredir2[i] = ' ';
-   
-   /* Zap the leading spaces and : in both strings. */
-   while (ld_preload_str[0] == ' ') slideleft(ld_preload_str);
-   if    (ld_preload_str[0] == ':') slideleft(ld_preload_str);
-
-   while (ld_library_path_str[0] == ' ') slideleft(ld_library_path_str);
-   if    (ld_library_path_str[0] == ':') slideleft(ld_library_path_str);
-
-   /* VG_(printf)("post:\n%s\n%s\n", ld_preload_str, ld_library_path_str); */
-
-   return;
-
-
-mutancy:
-   VG_(printf)(
-      "\nVG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH): internal error:\n"
-      "   what                = %d\n"
-      "   ld_preload_str      = `%s'\n"
-      "   ld_library_path_str = `%s'\n"
-      "   vg_prel             = `%s'\n"
-      "   sk_prel             = `%s'\n"
-      "   coredir2            = `%s'\n"
-      "   VG_LIBDIR           = `%s'\n",
-      what, ld_preload_str, ld_library_path_str, 
-      vg_prel, sk_prel, coredir2, VG_LIBDIR 
-   );
-   VG_(printf)(
-      "\n"
-      "Note that this is often caused by mis-installation of valgrind.\n"
-      "Correct installation procedure is:\n"
-      "   ./configure --prefix=/install/dir\n"
-      "   make install\n"
-      "And then use /install/dir/bin/valgrind\n"
-      "Moving the installation directory elsewhere after 'make install'\n"
-      "will cause the above error.  Hand-editing the paths in the shell\n"
-      "scripts is also likely to cause problems.\n"
-      "\n"
-   );
-   VG_(core_panic)("VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH) failed\n");
+   /* pad out the left-overs with '\0' */
+   while(output < varp)
+      *output++ = '\0';
 }
 
-
 /* RUNS ON THE CLIENT'S STACK, but on the real CPU.  Start GDB and get
    it to attach to this process.  Called if the user requests this
    service after an error has been shown, so she can poke around and
@@ -2001,8 +1767,6 @@
 
 void VG_(do_sanity_checks) ( Bool force_expensive )
 {
-   Int          i;
-
    VGP_PUSHCC(VgpCoreCheapSanity);
 
    if (VG_(sanity_level) < 1) return;
@@ -2011,14 +1775,6 @@
 
    VG_(sanity_fast_count)++;
 
-   /* Check that we haven't overrun our private stack. */
-   for (i = 0; i < 10; i++) {
-      vg_assert(VG_(stack)[i]
-                == ((UInt)(&VG_(stack)[i]) ^ 0xA4B3C2D1));
-      vg_assert(VG_(stack)[VG_STACK_SIZE_W-1-i] 
-                == ((UInt)(&VG_(stack)[VG_STACK_SIZE_W-i-1]) ^ 0xABCD4321));
-   }
-
    /* Check stuff pertaining to the memory check system. */
 
    /* Check that nobody has spuriously claimed that the first or
diff --git a/coregrind/vg_malloc2.c b/coregrind/vg_malloc2.c
index fe8fdeb..5f24c78 100644
--- a/coregrind/vg_malloc2.c
+++ b/coregrind/vg_malloc2.c
@@ -35,6 +35,68 @@
 /* Define to turn on (heavyweight) debugging machinery. */
 /* #define DEBUG_MALLOC */
 
+/*------------------------------------------------------------*/
+/*--- Command line options                                 ---*/
+/*------------------------------------------------------------*/
+
+/* Round malloc sizes upwards to integral number of words? default: NO */
+Bool VG_(clo_sloppy_malloc)  = False;
+
+/* DEBUG: print malloc details?  default: NO */
+Bool VG_(clo_trace_malloc)   = False;
+
+/* Minimum alignment in functions that don't specify alignment explicitly.
+   default: 0, i.e. use default of the machine (== 4) */
+Int  VG_(clo_alignment) = 4;
+
+
+Bool VG_(replacement_malloc_process_cmd_line_option)(Char* arg)
+{
+   if      (VG_CLO_STREQN(12, arg, "--alignment=")) {
+      VG_(clo_alignment) = (Int)VG_(atoll)(&arg[12]);
+
+      if (VG_(clo_alignment) < 4 
+          || VG_(clo_alignment) > 4096
+          || VG_(log2)( VG_(clo_alignment) ) == -1 /* not a power of 2 */) {
+         VG_(message)(Vg_UserMsg, "");
+         VG_(message)(Vg_UserMsg, 
+            "Invalid --alignment= setting.  "
+            "Should be a power of 2, >= 4, <= 4096.");
+         VG_(bad_option)("--alignment");
+      }
+   }
+
+   else if (VG_CLO_STREQ(arg, "--sloppy-malloc=yes"))
+      VG_(clo_sloppy_malloc) = True;
+   else if (VG_CLO_STREQ(arg, "--sloppy-malloc=no"))
+      VG_(clo_sloppy_malloc) = False;
+
+   else if (VG_CLO_STREQ(arg, "--trace-malloc=yes"))
+      VG_(clo_trace_malloc) = True;
+   else if (VG_CLO_STREQ(arg, "--trace-malloc=no"))
+      VG_(clo_trace_malloc) = False;
+
+   else 
+      return False;
+
+   return True;
+}
+
+void VG_(replacement_malloc_print_usage)(void)
+{
+   VG_(printf)(
+"    --sloppy-malloc=no|yes    round malloc sizes to next word? [no]\n"
+"    --alignment=<number>      set minimum alignment of allocations [4]\n"
+   );
+}
+
+void VG_(replacement_malloc_print_debug_usage)(void)
+{
+   VG_(printf)(
+"    --trace-malloc=no|yes     show client malloc details? [no]\n"
+   );
+}
+
 
 /*------------------------------------------------------------*/
 /*--- Structs n stuff                                      ---*/
@@ -66,9 +128,10 @@
 typedef 
    struct {
       Char*       name;
-      Int         rz_szW; /* Red zone size in words */
-      Bool        rz_check; /* Check red-zone on free? */
-      Int         min_sblockW; /* Minimum superblock size */
+      Bool	  clientmem;	/* allocates in the client address space */
+      Int         rz_szW;	/* Red zone size in words */
+      Bool        rz_check;	/* Check red-zone on free? */
+      Int         min_sblockW;	/* Minimum superblock size */
       WordF*      freelist[VG_N_MALLOC_LISTS];
       Superblock* sblocks;
       /* Stats only. */
@@ -143,11 +206,12 @@
 /* Initialise an arena. */
 static
 void arena_init ( Arena* a, Char* name, 
-                  Int rz_szW, Bool rz_check, Int min_sblockW )
+                  Int rz_szW, Bool rz_check, Int min_sblockW, Bool client )
 {
    Int i;
    vg_assert((min_sblockW % VKI_WORDS_PER_PAGE) == 0);
    a->name = name;
+   a->clientmem = client;
    a->rz_szW = rz_szW;
    a->rz_check = rz_check;
    a->min_sblockW = min_sblockW;
@@ -195,27 +259,27 @@
       here, which merely checks at the time of freeing that the red 
       zone words are unchanged. */
 
-   arena_init ( &vg_arena[VG_AR_CORE],      "core",     1, True, 262144 );
+   arena_init ( &vg_arena[VG_AR_CORE],      "core",     1, True, 262144, False );
 
-   arena_init ( &vg_arena[VG_AR_SKIN],      "skin",     1, True, 262144 );
+   arena_init ( &vg_arena[VG_AR_SKIN],      "skin",     1, True, 262144, False );
 
-   arena_init ( &vg_arena[VG_AR_SYMTAB],    "symtab",   1, True, 262144 );
+   arena_init ( &vg_arena[VG_AR_SYMTAB],    "symtab",   1, True, 262144, False );
 
-   arena_init ( &vg_arena[VG_AR_JITTER],    "JITter",   1, True, 8192 );
+   arena_init ( &vg_arena[VG_AR_JITTER],    "JITter",   1, True, 8192,   False );
 
    /* No particular reason for this figure, it's just smallish */
    sk_assert(VG_(vg_malloc_redzone_szB) < 128);
    arena_init ( &vg_arena[VG_AR_CLIENT],    "client",  
-                VG_(vg_malloc_redzone_szB)/4, False, 262144 );
+                VG_(vg_malloc_redzone_szB)/4, False, 262144, True );
 
    arena_init ( &vg_arena[VG_AR_DEMANGLE],  "demangle", 4 /*paranoid*/,
-                                                           True, 16384 );
+                                                           True, 16384, False );
 
-   arena_init ( &vg_arena[VG_AR_EXECTXT],   "exectxt",  1, True, 16384 );
+   arena_init ( &vg_arena[VG_AR_EXECTXT],   "exectxt",  1, True, 16384, False );
 
-   arena_init ( &vg_arena[VG_AR_ERRORS],    "errors",   1, True, 16384 );
+   arena_init ( &vg_arena[VG_AR_ERRORS],    "errors",   1, True, 16384, False );
 
-   arena_init ( &vg_arena[VG_AR_TRANSIENT], "transien", 2, True, 16384 );
+   arena_init ( &vg_arena[VG_AR_TRANSIENT], "transien", 2, True, 16384, False );
 
    init_done = True;
 #  ifdef DEBUG_MALLOC
@@ -257,8 +321,13 @@
    cszW += 2; /* Take into account sb->next and sb->n_words fields */
    if (cszW < a->min_sblockW) cszW = a->min_sblockW;
    while ((cszW % VKI_WORDS_PER_PAGE) > 0) cszW++;
-   sb = VG_(get_memory_from_mmap) ( cszW * sizeof(Word), 
-                                    "newSuperblock" );
+
+   if (a->clientmem) {
+      sb = (Superblock *)VG_(client_alloc)(0, cszW * sizeof(Word), 
+					   VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC, 0);
+   } else
+      sb = VG_(get_memory_from_mmap) ( cszW * sizeof(Word), 
+				       "newSuperblock" );
    sb->n_payload_words = cszW - 2;
    a->bytes_mmaped += cszW * sizeof(Word);
    if (0)
diff --git a/coregrind/vg_memory.c b/coregrind/vg_memory.c
index 549348e..ff6a040 100644
--- a/coregrind/vg_memory.c
+++ b/coregrind/vg_memory.c
@@ -32,180 +32,591 @@
 
 #include "vg_include.h"
 
+#include <stddef.h>
+
 /* Define to debug the memory-leak-detector. */
 /* #define VG_DEBUG_LEAKCHECK */
 
+static const Bool mem_debug = False;
+
+static Int addrcmp(const void *ap, const void *bp)
+{
+   Addr a = *(Addr *)ap;
+   Addr b = *(Addr *)bp;
+   Int ret;
+
+   if (a == b)
+      ret = 0;
+   else
+      ret = (a < b) ? -1 : 1;
+
+   return ret;
+}
+
+static Char *straddr(void *p)
+{
+   static Char buf[16];
+
+   VG_(sprintf)(buf, "%p", *(Addr *)p);
+
+   return buf;
+}
+
+static SkipList sk_segments = SKIPLIST_INIT(Segment, addr, addrcmp, straddr, VG_AR_CORE);
+
+/*--------------------------------------------------------------*/
+/*--- Maintain an ordered list of all the client's mappings  ---*/
+/*--------------------------------------------------------------*/
+
+Bool VG_(seg_contains)(const Segment *s, Addr p, UInt len)
+{
+   Addr se = s->addr+s->len;
+   Addr pe = p+len;
+
+   vg_assert(pe >= p);
+
+   return (p >= s->addr && pe <= se);
+}
+
+Bool VG_(seg_overlaps)(const Segment *s, Addr p, UInt len)
+{
+   Addr se = s->addr+s->len;
+   Addr pe = p+len;
+
+   vg_assert(pe >= p);
+
+   return (p < se && pe > s->addr);
+}
+
+/* Prepare a Segment structure for recycling by freeing everything
+   hanging off it. */
+static void recycleseg(Segment *s)
+{
+   if (s->flags & SF_CODE)
+      VG_(invalidate_translations)(s->addr, s->len, False);
+
+   if (s->filename != NULL)
+      VG_(arena_free)(VG_AR_CORE, (Char *)s->filename);
+
+   /* keep the SegInfo, if any - it probably still applies */
+}
+
+/* When freeing a Segment, also clean up every one else's ideas of
+   what was going on in that range of memory */
+static void freeseg(Segment *s)
+{
+   recycleseg(s);
+   if (s->symtab != NULL) {
+      VG_(symtab_decref)(s->symtab, s->addr, s->len);
+      s->symtab = NULL;
+   }
+
+   VG_(SkipNode_Free)(&sk_segments, s);
+}
+
+/* Split a segment at address a */
+static Segment *split_segment(Addr a)
+{
+   Segment *s = VG_(SkipList_Find)(&sk_segments, &a);
+   Segment *ns;
+   Int delta;
+
+   vg_assert((a & (VKI_BYTES_PER_PAGE-1)) == 0);
+
+   /* missed */
+   if (s == NULL)
+      return NULL;
+
+   /* a at or beyond endpoint */
+   if (s->addr == a || a >= (s->addr+s->len))
+      return NULL;
+
+   vg_assert(a > s->addr && a < (s->addr+s->len));
+
+   ns = VG_(SkipNode_Alloc)(&sk_segments);
+
+   *ns = *s;
+
+   delta = a - s->addr;
+   ns->addr += delta;
+   ns->offset += delta;
+   ns->len -= delta;
+
+   if (ns->symtab != NULL)
+      VG_(symtab_incref)(ns->symtab);
+
+   VG_(SkipList_Insert)(&sk_segments, ns);
+
+   return ns;
+}
+
+/* This unmaps all the segments in the range [addr, addr+len); any
+   partial mappings at the ends are truncated. */
+void VG_(unmap_range)(Addr addr, UInt len)
+{
+   Segment *s;
+   Segment *next;
+   static const Bool debug = False || mem_debug;
+
+   if (len == 0)
+      return;
+
+   if (debug)
+      VG_(printf)("unmap_range(%p, %d)\n", addr, len);
+
+   len = PGROUNDUP(addr+len)-PGROUNDDN(addr);
+   addr = PGROUNDDN(addr);
+
+   /* Everything must be page-aligned */
+   vg_assert((addr & (VKI_BYTES_PER_PAGE-1)) == 0);
+   vg_assert((len  & (VKI_BYTES_PER_PAGE-1)) == 0);
+
+   for(s = VG_(SkipList_Find)(&sk_segments, &addr); 
+       s != NULL && s->addr < (addr+len); 
+       s = next) {
+
+      /* fetch next now in case we end up deleting this segment */
+      next = VG_(SkipNode_Next)(&sk_segments, s);
+
+      if (debug)
+	 VG_(printf)("unmap: addr=%p s=%p ->addr=%p len=%d end=%p\n",
+		     addr, s, s->addr, s->len, s->addr+s->len);
+
+      if (!VG_(seg_overlaps)(s, addr, len))
+	 continue;
+
+      /* 4 cases: */
+      if (addr > s->addr && addr < (s->addr + s->len)) {
+	 /* this segment's tail is truncated by [addr, addr+len)
+	    -> truncate tail
+	 */
+	 s->len = addr - s->addr;
+
+	 if (debug)
+	    VG_(printf)("  case 1: s->len=%d\n", s->len);
+      } else if (addr <= s->addr && (addr+len) >= (s->addr + s->len)) {
+	 /* this segment is completely contained within [addr, addr+len)
+	    -> delete segment
+	 */
+	 Segment *rs = VG_(SkipList_Remove)(&sk_segments, &s->addr);
+	 vg_assert(rs == s);
+	 freeseg(s);
+
+	 if (debug)
+	    VG_(printf)("  case 2: s==%p deleted\n", s);
+      } else if ((addr+len) > s->addr && (addr+len) < (s->addr+s->len)) {
+	 /* this segment's head is truncated by [addr, addr+len)
+	    -> truncate head
+	 */
+	 Int delta = (addr+len) - s->addr;
+
+	 s->addr += delta;
+	 s->offset += delta;
+	 s->len -= delta;
+
+	 if (debug)
+	    VG_(printf)("  case 3: s->addr=%p s->len=%d delta=%d\n", s->addr, s->len, delta);
+      } else if (addr > s->addr && (addr+len) < (s->addr + s->len)) {
+	 /* [addr, addr+len) is contained within a single segment
+	    -> split segment into 3, delete middle portion
+	  */
+	 Segment *middle, *rs;
+
+	 middle = split_segment(addr);
+	 split_segment(addr+len);
+
+	 vg_assert(middle->addr == addr);
+	 rs = VG_(SkipList_Remove)(&sk_segments, &addr);
+	 vg_assert(rs == middle);
+
+	 freeseg(rs);
+
+	 if (debug)
+	    VG_(printf)("  case 4: subrange %p-%p deleted\n",
+			addr, addr+len);
+      }
+   }
+}
+
+/* If possible, merge segment with its neighbours - some segments,
+   including s, may be destroyed in the process */
+static inline Bool neighbours(Segment *s1, Segment *s2)
+{
+   if (s1->addr+s1->len != s2->addr)
+      return False;
+
+   if (s1->flags != s2->flags)
+      return False;
+
+   if (s1->prot != s2->prot)
+      return False;
+
+   if (s1->symtab != s2->symtab)
+      return False;
+
+   if (s1->flags & SF_FILE){
+      if ((s1->offset + s1->len) != s2->offset)
+	 return False;
+      if (s1->dev != s2->dev)
+	 return False;
+      if (s1->ino != s2->ino)
+	 return False;
+   }
+   
+   return True;
+}
+
+/* Merge segments in the address range if they're adjacent and
+   compatible */
+static void merge_segments(Addr a, UInt len)
+{
+   Segment *s;
+   Segment *next;
+
+   vg_assert((a & (VKI_BYTES_PER_PAGE-1)) == 0);
+   vg_assert((len & (VKI_BYTES_PER_PAGE-1)) == 0);
+
+   a -= VKI_BYTES_PER_PAGE;
+   len += VKI_BYTES_PER_PAGE;
+
+   for(s = VG_(SkipList_Find)(&sk_segments, &a);
+       s != NULL && s->addr < (a+len);) {
+      next = VG_(SkipNode_Next)(&sk_segments, s);
+
+      if (next && neighbours(s, next)) {
+	 Segment *rs;
+
+	 if (0)
+	    VG_(printf)("merge %p-%p with %p-%p\n",
+			s->addr, s->addr+s->len,
+			next->addr, next->addr+next->len);
+	 s->len += next->len;
+	 s = VG_(SkipNode_Next)(&sk_segments, next);
+
+	 rs = VG_(SkipList_Remove)(&sk_segments, &next->addr);
+	 vg_assert(next == rs);
+	 freeseg(next);
+      } else
+	 s = next;
+   }
+}
+
+void VG_(map_file_segment)(Addr addr, UInt len, UInt prot, UInt flags, 
+			   UInt dev, UInt ino, ULong off, const Char *filename)
+{
+   Segment *s;
+   static const Bool debug = False || mem_debug;
+   Bool recycled;
+
+   if (debug)
+      VG_(printf)("map_file_segment(%p, %d, %x, %x, %4x, %d, %ld, %s)\n",
+		  addr, len, prot, flags, dev, ino, off, filename);
+
+   /* Everything must be page-aligned */
+   vg_assert((addr & (VKI_BYTES_PER_PAGE-1)) == 0);
+   len = PGROUNDUP(len);
+
+   /* First look to see what already exists around here */
+   s = VG_(SkipList_Find)(&sk_segments, &addr);
+
+   if (s != NULL && s->addr == addr && s->len == len) {
+      /* This probably means we're just updating the flags */
+      recycled = True;
+      recycleseg(s);
+
+      /* If we had a symtab, but the new mapping is incompatible, then
+	 free up the old symtab in preparation for a new one. */
+      if (s->symtab != NULL		&&
+	  (!(s->flags & SF_FILE)	||
+	   !(flags & SF_FILE)		||
+	   s->dev != dev		||
+	   s->ino != ino		||
+	   s->offset != off)) {
+	 VG_(symtab_decref)(s->symtab, s->addr, s->len);
+	 s->symtab = NULL;
+      }
+   } else {
+      recycled = False;
+      VG_(unmap_range)(addr, len);
+
+      s = VG_(SkipNode_Alloc)(&sk_segments);
+
+      s->addr   = addr;
+      s->len    = len;
+      s->symtab = NULL;
+   }
+
+   s->flags  = flags;
+   s->prot   = prot;
+   s->dev    = dev;
+   s->ino    = ino;
+   s->offset = off;
+   
+   if (filename != NULL)
+      s->filename = VG_(arena_strdup)(VG_AR_CORE, filename);
+   else
+      s->filename = NULL;
+
+   if (debug) {
+      Segment *ts;
+      for(ts = VG_(SkipNode_First)(&sk_segments);
+	  ts != NULL;
+	  ts = VG_(SkipNode_Next)(&sk_segments, ts))
+	 VG_(printf)("list: %8p->%8p ->%d (0x%x) prot=%x flags=%x\n",
+		     ts, ts->addr, ts->len, ts->len, ts->prot, ts->flags);
+
+      VG_(printf)("inserting s=%p addr=%p len=%d\n",
+		  s, s->addr, s->len);
+   }
+
+   if (!recycled)
+      VG_(SkipList_Insert)(&sk_segments, s);
+
+   /* If this mapping is of the beginning of a file, isn't part of
+      Valgrind, is at least readable and seems to contain an object
+      file, then try reading symbols from it. */
+   if ((flags & (SF_MMAP|SF_NOSYMS)) == SF_MMAP	&&
+       s->symtab == NULL) {
+      if (off == 0									&&
+	  filename != NULL								&&
+	  (prot & (VKI_PROT_READ|VKI_PROT_EXEC)) == (VKI_PROT_READ|VKI_PROT_EXEC)	&&
+	  len >= VKI_BYTES_PER_PAGE							&&
+	  s->symtab == NULL								&&
+	  VG_(is_object_file)((void *)addr)) {
+
+      s->symtab = VG_(read_seg_symbols)(s);
+
+      if (s->symtab != NULL)
+	 s->flags |= SF_DYNLIB;
+      } else if (flags & SF_MMAP) {
+	 const SegInfo *info;
+
+	 /* Otherwise see if an existing symtab applies to this Segment */
+	 for(info = VG_(next_seginfo)(NULL);
+	     info != NULL;
+	     info = VG_(next_seginfo)(info)) {
+	    if (VG_(seg_overlaps)(s, VG_(seg_start)(info), VG_(seg_size)(info))) {
+	       s->symtab = (SegInfo *)info;
+	       VG_(symtab_incref)((SegInfo *)info);
+	    }
+	 }
+      }
+   }
+
+   /* clean up */
+   merge_segments(addr, len);
+}
+
+void VG_(map_fd_segment)(Addr addr, UInt len, UInt prot, UInt flags, 
+			 Int fd, ULong off, const Char *filename)
+{
+   struct vki_stat st;
+   Char *name = NULL;
+
+   st.st_dev = 0;
+   st.st_ino = 0;
+
+   if (fd != -1 && (flags & SF_FILE)) {
+      vg_assert((off & (VKI_BYTES_PER_PAGE-1)) == 0);
+
+      if (VG_(fstat)(fd, &st) < 0)
+	 flags &= ~SF_FILE;
+   }
+
+   if ((flags & SF_FILE) && filename == NULL && fd != -1)
+      name = VG_(resolve_filename)(fd);
+
+   if (filename == NULL)
+      filename = name;
+
+   VG_(map_file_segment)(addr, len, prot, flags, st.st_dev, st.st_ino, off, filename);
+
+   if (name)
+      VG_(arena_free)(VG_AR_CORE, name);
+}
+
+void VG_(map_segment)(Addr addr, UInt len, UInt prot, UInt flags)
+{
+   flags &= ~SF_FILE;
+
+   VG_(map_file_segment)(addr, len, prot, flags, 0, 0, 0, 0);
+}
+
+/* set new protection flags on an address range */
+void VG_(mprotect_range)(Addr a, UInt len, UInt prot)
+{
+   Segment *s, *next;
+   static const Bool debug = False || mem_debug;
+
+   if (debug)
+      VG_(printf)("mprotect_range(%p, %d, %x)\n", a, len, prot);
+
+   /* Everything must be page-aligned */
+   vg_assert((a & (VKI_BYTES_PER_PAGE-1)) == 0);
+   vg_assert((len & (VKI_BYTES_PER_PAGE-1)) == 0);
+
+   split_segment(a);
+   split_segment(a+len);
+
+   for(s = VG_(SkipList_Find)(&sk_segments, &a);
+       s != NULL && s->addr < a+len;
+       s = next)
+   {
+      next = VG_(SkipNode_Next)(&sk_segments, s);
+      if (s->addr < a)
+	 continue;
+
+      s->prot = prot;
+   }
+
+   merge_segments(a, len);
+}
+
+Addr VG_(find_map_space)(Addr addr, UInt len, Bool for_client)
+{
+   Segment *s;
+   Addr ret;
+   static const Bool debug = False || mem_debug;
+   Addr limit = (for_client ? VG_(client_end) : VG_(valgrind_mmap_end));
+
+   if (addr == 0)
+      addr = for_client ? VG_(client_mapbase) : VG_(valgrind_base);
+   else {
+      /* leave space for redzone and still try to get the exact
+	 address asked for */
+      addr -= VKI_BYTES_PER_PAGE;
+   }
+   ret = addr;
+
+   /* Everything must be page-aligned */
+   vg_assert((addr & (VKI_BYTES_PER_PAGE-1)) == 0);
+   len = PGROUNDUP(len);
+
+   len += VKI_BYTES_PER_PAGE * 2; /* leave redzone gaps before and after mapping */
+
+   if (debug)
+      VG_(printf)("find_map_space: ret starts as %p-%p client=%d\n",
+		  ret, ret+len, for_client);
+
+   for(s = VG_(SkipList_Find)(&sk_segments, &ret);
+       s != NULL && s->addr < (ret+len);
+       s = VG_(SkipNode_Next)(&sk_segments, s))
+   {
+      if (debug)
+	 VG_(printf)("s->addr=%p len=%d (%p) ret=%p\n",
+		     s->addr, s->len, s->addr+s->len, ret);
+
+      if (s->addr < (ret + len) && (s->addr + s->len) > ret)
+	 ret = s->addr+s->len;
+   }
+
+   if (debug) {
+      if (s)
+	 VG_(printf)("  s->addr=%p ->len=%d\n", s->addr, s->len);
+      else
+	 VG_(printf)("  s == NULL\n");
+   }
+
+   if ((limit - len) < ret)
+      ret = 0;			/* no space */
+   else
+      ret += VKI_BYTES_PER_PAGE; /* skip leading redzone */
+
+   if (debug)
+      VG_(printf)("find_map_space(%p, %d, %d) -> %p\n",
+		  addr, len, for_client, ret);
+   
+   return ret;
+}
+
+Segment *VG_(find_segment)(Addr a)
+{
+   return VG_(SkipList_Find)(&sk_segments, &a);
+}
+
+Segment *VG_(next_segment)(Segment *s)
+{
+   return VG_(SkipNode_Next)(&sk_segments, s);
+}
 
 /*--------------------------------------------------------------*/
 /*--- Initialise program data/text etc on program startup.   ---*/
 /*--------------------------------------------------------------*/
 
-typedef
-   struct _ExeSeg {
-      Addr start;
-      UInt size;
-      struct _ExeSeg* next;
-   }
-   ExeSeg;
-
-/* The list of current executable segments loaded.  Required so that when a
-   segment is munmap'd, if it's executable we can recognise it as such and
-   invalidate translations for it, and drop any basic-block specific
-   information being stored.  If symbols are being used, this list will have
-   the same segments recorded in it as the SegInfo symbols list (but much
-   less information about each segment).
-*/
-static ExeSeg* exeSegsHead = NULL;
-
-/* Prepend it -- mmaps/munmaps likely to follow a stack pattern(?) so this
-   is good.
-   Also check no segments overlap, which would be very bad.  Check is linear
-   for each seg added (quadratic overall) but the total number should be
-   small (konqueror has around 50 --njn). */
-static void add_exe_segment_to_list( Addr a, UInt len ) 
+static
+void build_valgrind_map_callback ( Addr start, UInt size, 
+				   Char rr, Char ww, Char xx, UInt dev, UInt ino,
+				   ULong foffset, const UChar* filename )
 {
-   Addr lo = a;
-   Addr hi = a + len - 1;
-   ExeSeg* es;
-   ExeSeg* es2;
-   
-   /* Prepend it */
-   es        = (ExeSeg*)VG_(arena_malloc)(VG_AR_CORE, sizeof(ExeSeg));
-   es->start = a;
-   es->size  = len;
-   es->next  = exeSegsHead;
-   exeSegsHead = es;
+   UInt prot = 0;
+   UInt flags;
+   Bool is_stack_segment;
+   Bool verbose = False || mem_debug; /* set to True for debugging */
 
-   /* Check there's no overlap with the rest of the list */
-   for (es2 = es->next; es2 != NULL; es2 = es2->next) {
-      Addr lo2 = es2->start;
-      Addr hi2 = es2->start + es2->size - 1;
-      Bool overlap;
-      vg_assert(lo < hi);
-      vg_assert(lo2 < hi2);
-      /* the main assertion */
-      overlap = (lo <= lo2 && lo2 <= hi)
-                 || (lo <= hi2 && hi2 <= hi);
-      if (overlap) {
-         VG_(printf)("\n\nOVERLAPPING EXE SEGMENTS\n"
-                     "  new: start %p, size %d\n"
-                     "  old: start %p, size %d\n\n",
-                     es->start, es->size, es2->start, es2->size );
-         vg_assert(! overlap);
-      }
+   is_stack_segment = (start == VG_(clstk_base) && (start+size) == VG_(clstk_end));
+
+   prot = 0;
+   flags = SF_MMAP|SF_NOSYMS;
+
+   if (start >= VG_(valgrind_base) && (start+size) <= VG_(valgrind_end))
+      flags |= SF_VALGRIND;
+
+   /* Only record valgrind mappings for now, without loading any
+      symbols.  This is so we know where the free space is before we
+      start allocating more memory (note: heap is OK, it's just mmap
+      which is the problem here). */
+   if (flags & SF_VALGRIND) {
+      if (verbose)
+	 VG_(printf)("adding segment %08p-%08p prot=%x flags=%4x filename=%s\n",
+		     start, start+size, prot, flags, filename);
+
+      VG_(map_file_segment)(start, size, prot, flags, dev, ino, foffset, filename);
    }
 }
 
-static Bool remove_if_exeseg_from_list( Addr a )
-{
-   ExeSeg **prev_next_ptr = & exeSegsHead, 
-          *curr = exeSegsHead;
-
-   while (True) {
-      if (curr == NULL) break;
-      if (a == curr->start) break;
-      prev_next_ptr = &curr->next;
-      curr = curr->next;
-   }
-   if (curr == NULL)
-      return False;
-
-   vg_assert(*prev_next_ptr == curr);
-
-   *prev_next_ptr = curr->next;
-
-   VG_(arena_free)(VG_AR_CORE, curr);
-   return True;
-}
-
-/* Records the exe segment in the ExeSeg list (checking for overlaps), and
-   reads debug info if required.  Note the entire /proc/pid/maps file is 
-   read for the debug info, but it just reads symbols for newly added exe
-   segments.  This is required to find out their names if they have one,
-   because with mmap() we only have the file descriptor, not the name.  We
-   don't use this at startup because we do have the names then. */
-void VG_(new_exeseg_mmap) ( Addr a, UInt len )
-{
-   add_exe_segment_to_list( a, len );
-   VG_(read_all_symbols)();
-}
-
-/* Like VG_(new_exeseg_mmap)(), but here we do have the name, so we don't
-   need to grovel through /proc/self/maps to find it. */
-void VG_(new_exeseg_startup) ( Addr a, UInt len, Char rr, Char ww, Char xx,
-                               UInt foffset, UChar* filename )
-{
-   add_exe_segment_to_list( a, len );
-   VG_(read_seg_symbols)( a, len, rr, ww, xx, foffset, filename);
-}
-
-/* Invalidate translations as necessary (also discarding any basic
-   block-specific info retained by the skin) and unload any debug
-   symbols. */
-// Nb: remove_if_exeseg_from_list() and VG_(maybe_unload_symbols)()
-// both ignore 'len', but that seems that's ok for most programs...  see
-// comment above vg_syscalls.c:mmap_segment() et al for more details.
-void VG_(remove_if_exeseg) ( Addr a, UInt len )
-{
-   if (remove_if_exeseg_from_list( a )) {
-      VG_(invalidate_translations) ( a, len, False );
-      VG_(unload_symbols)          ( a, len );
-   }
-}
-
-
 static
-void startup_segment_callback ( Addr start, UInt size, 
-                                Char rr, Char ww, Char xx, 
-                                UInt foffset, UChar* filename )
+void build_segment_map_callback ( Addr start, UInt size, 
+				  Char rr, Char ww, Char xx, UInt dev, UInt ino,
+				  ULong foffset, const UChar* filename )
 {
-   UInt r_esp;
+   UInt prot = 0;
+   UInt flags;
    Bool is_stack_segment;
-   Bool verbose = False; /* set to True for debugging */
+   Bool verbose = False || mem_debug; /* set to True for debugging */
+   Addr r_esp;
+
+   is_stack_segment = (start == VG_(clstk_base) && (start+size) == VG_(clstk_end));
+
+   if (rr == 'r')
+      prot |= VKI_PROT_READ;
+   if (ww == 'w')
+      prot |= VKI_PROT_WRITE;
+   if (xx == 'x')
+      prot |= VKI_PROT_EXEC;
+
+      
+   if (is_stack_segment)
+      flags = SF_STACK | SF_GROWDOWN;
+   else
+      flags = SF_EXEC|SF_MMAP;
+
+   if (filename != NULL)
+      flags |= SF_FILE;
+
+   if (start >= VG_(valgrind_base) && (start+size) <= VG_(valgrind_end))
+      flags |= SF_VALGRIND;
 
    if (verbose)
-      VG_(message)(Vg_DebugMsg,
-                   "initial map %8x-%8x %c%c%c? %8x (%d) (%s)",
-                   start,start+size,rr,ww,xx,foffset,
-                   size, filename?filename:(UChar*)"NULL");
+      VG_(printf)("adding segment %08p-%08p prot=%x flags=%4x filename=%s\n",
+		  start, start+size, prot, flags, filename);
 
-   if (rr != 'r' && xx != 'x' && ww != 'w') {
-      /* Implausible as it seems, R H 6.2 generates such segments:
-      40067000-400ac000 r-xp 00000000 08:05 320686 /usr/X11R6/lib/libXt.so.6.0
-      400ac000-400ad000 ---p 00045000 08:05 320686 /usr/X11R6/lib/libXt.so.6.0
-      400ad000-400b0000 rw-p 00045000 08:05 320686 /usr/X11R6/lib/libXt.so.6.0
-      when running xedit. So just ignore them. */
-      if (0)
-         VG_(printf)("No permissions on a segment mapped from %s\n", 
-                     filename?filename:(UChar*)"NULL");
-      return;
-   }
+   VG_(map_file_segment)(start, size, prot, flags, dev, ino, foffset, filename);
 
-   /* If this segment corresponds to something mmap'd /dev/zero by the
-      low-level memory manager (vg_malloc2.c), skip it.  Clients
-      should never have access to the segments which hold valgrind
-      internal data.  And access to client data in the VG_AR_CLIENT
-      arena is mediated by the skin, so we don't want make it
-      accessible at this stage. */
-   if (VG_(is_inside_segment_mmapd_by_low_level_MM)( start )) {
-      if (verbose)
-         VG_(message)(Vg_DebugMsg,
-                      "   skipping %8x-%8x (owned by our MM)", 
-                      start, start+size );
-      /* Don't announce it to the skin. */
-      return;
-   }
-   
-   /* This is similar to what happens when we mmap some new memory */
-   if (filename != NULL && xx == 'x') {
-      VG_(new_exeseg_startup)( start, size, rr, ww, xx, foffset, filename );
-   }
-
-   VG_TRACK( new_mem_startup, start, size, rr=='r', ww=='w', xx=='x' );
+   if (VG_(is_client_addr)(start) && VG_(is_client_addr)(start+size-1))
+      VG_TRACK( new_mem_startup, start, size, rr=='r', ww=='w', xx=='x' );
 
    /* If this is the stack segment mark all below %esp as noaccess. */
-   r_esp = VG_(baseBlock)[VGOFF_(m_esp)];
-   is_stack_segment = start <= r_esp && r_esp < start+size;
+   r_esp = VG_(m_state_static)[40/4];
    if (is_stack_segment) {
       if (0)
          VG_(message)(Vg_DebugMsg, "invalidating stack area: %x .. %x",
@@ -223,17 +634,17 @@
       buffer at the start of VG_(main) so that any superblocks mmap'd by
       calls to VG_(malloc)() by SK_({pre,post}_clo_init) aren't erroneously
       thought of as being owned by the client.
-
-   2. Sets up the end of the data segment so that vg_syscalls.c can make
-      sense of calls to brk().
  */
 void VG_(init_memory) ( void )
 {
    /* 1 */
-   VG_(parse_procselfmaps) ( startup_segment_callback );
+   /* reserve Valgrind's kickstart, heap and stack */
+   VG_(map_segment)(VG_(valgrind_mmap_end), VG_(valgrind_end)-VG_(valgrind_mmap_end),
+		    VKI_PROT_NONE, SF_VALGRIND|SF_FIXED);
 
-   /* 2 */
-   VG_(init_dataseg_end_for_brk)();
+   /* work out what's mapped where, and read interesting symtabs */
+   VG_(parse_procselfmaps) ( build_valgrind_map_callback );	/* just Valgrind mappings */
+   VG_(parse_procselfmaps) ( build_segment_map_callback );	/* everything */
 
    /* kludge: some newer kernels place a "sysinfo" page up high, with
       vsyscalls in it, and possibly some other stuff in the future. */
@@ -359,6 +770,134 @@
 }
 
 /*--------------------------------------------------------------------*/
+/*--- manage allocation of memory on behalf of the client          ---*/
+/*--------------------------------------------------------------------*/
+
+Addr VG_(client_alloc)(Addr addr, UInt len, UInt prot, UInt flags)
+{
+   len = PGROUNDUP(len);
+
+   if (!(flags & SF_FIXED))
+      addr = VG_(find_map_space)(addr, len, True);
+
+   flags |= SF_CORE;
+
+   if (VG_(mmap)((void *)addr, len, prot,
+		 VKI_MAP_FIXED | VKI_MAP_PRIVATE | VKI_MAP_ANONYMOUS | VKI_MAP_CLIENT,
+		 -1, 0) == (void *)addr) {
+      VG_(map_segment)(addr, len, prot, flags);
+      return addr;
+   }
+
+   return 0;
+}
+
+void VG_(client_free)(Addr addr)
+{
+   Segment *s = VG_(find_segment)(addr);
+
+   if (s == NULL || s->addr != addr || !(s->flags & SF_CORE)) {
+      VG_(message)(Vg_DebugMsg, "VG_(client_free)(%p) - no CORE memory found there", addr);
+      return;
+   }
+
+   VG_(munmap)((void *)s->addr, s->len);
+}
+
+Bool VG_(is_client_addr)(Addr a)
+{
+   return a >= VG_(client_base) && a < VG_(client_end);
+}
+
+Bool VG_(is_shadow_addr)(Addr a)
+{
+   return a >= VG_(shadow_base) && a < VG_(shadow_end);
+}
+
+Bool VG_(is_valgrind_addr)(Addr a)
+{
+   return a >= VG_(valgrind_base) && a < VG_(valgrind_end);
+}
+
+Addr VG_(get_client_base)(void)
+{
+   return VG_(client_base);
+}
+
+Addr VG_(get_client_end)(void)
+{
+   return VG_(client_end);
+}
+
+Addr VG_(get_client_size)(void)
+{
+   return VG_(client_end)-VG_(client_base);
+}
+
+Addr VG_(get_shadow_base)(void)
+{
+   return VG_(shadow_base);
+}
+
+Addr VG_(get_shadow_end)(void)
+{
+   return VG_(shadow_end);
+}
+
+Addr VG_(get_shadow_size)(void)
+{
+   return VG_(shadow_end)-VG_(shadow_base);
+}
+
+
+void VG_(init_shadow_range)(Addr p, UInt sz, Bool call_init)
+{
+   if (0)
+      VG_(printf)("init_shadow_range(%p, %d)\n", p, sz);
+
+   vg_assert(VG_(needs).shadow_memory);
+   vg_assert(VG_(defined_init_shadow_page)());
+
+   sz = PGROUNDUP(p+sz) - PGROUNDDN(p);
+   p = PGROUNDDN(p);
+
+   VG_(mprotect)((void *)p, sz, VKI_PROT_READ|VKI_PROT_WRITE);
+   
+   if (call_init) 
+      while(sz) {
+	 /* ask the skin to initialize each page */
+	 VG_TRACK( init_shadow_page, PGROUNDDN(p) );
+	 
+	 p += VKI_BYTES_PER_PAGE;
+	 sz -= VKI_BYTES_PER_PAGE;
+      }
+}
+
+void *VG_(shadow_alloc)(UInt size)
+{
+   static Addr shadow_alloc = 0;
+   void *ret;
+
+   vg_assert(VG_(needs).shadow_memory);
+   vg_assert(!VG_(defined_init_shadow_page)());
+
+   size = PGROUNDUP(size);
+
+   if (shadow_alloc == 0)
+      shadow_alloc = VG_(shadow_base);
+
+   if (shadow_alloc >= VG_(shadow_end))
+       return 0;
+
+   ret = (void *)shadow_alloc;
+   VG_(mprotect)(ret, size, VKI_PROT_READ|VKI_PROT_WRITE);
+
+   shadow_alloc += size;
+
+   return ret;
+}
+
+/*--------------------------------------------------------------------*/
 /*--- end                                              vg_memory.c ---*/
 /*--------------------------------------------------------------------*/
 
diff --git a/coregrind/vg_mylibc.c b/coregrind/vg_mylibc.c
index 5931db9..dbf78eb 100644
--- a/coregrind/vg_mylibc.c
+++ b/coregrind/vg_mylibc.c
@@ -248,15 +248,57 @@
 void* VG_(mmap)( void* start, UInt length, 
                  UInt prot, UInt flags, UInt fd, UInt offset)
 {
-   Int  res;
+   Addr  res;
    UInt args[6];
+
+   if (!(flags & VKI_MAP_FIXED)) {
+      start = (void *)VG_(find_map_space)((Addr)start, length, !!(flags & VKI_MAP_CLIENT));
+      if (start == 0)
+	 return (void *)-1;
+
+      flags |= VKI_MAP_FIXED;
+   }
+
    args[0] = (UInt)start;
    args[1] = length;
    args[2] = prot;
-   args[3] = flags;
+   args[3] = flags & ~(VKI_MAP_NOSYMS|VKI_MAP_CLIENT);
    args[4] = fd;
    args[5] = offset;
    res = VG_(do_syscall)(__NR_mmap, (UInt)(&(args[0])) );
+
+   if (!VG_(is_kerror)(res)) {
+      UInt sf_flags = SF_MMAP;
+
+      if (flags & VKI_MAP_FIXED)
+	 sf_flags |= SF_FIXED;
+      if (flags & VKI_MAP_SHARED)
+	 sf_flags |= SF_SHARED;
+      if (!(flags & VKI_MAP_ANONYMOUS))
+	 sf_flags |= SF_FILE;
+      if (!(flags & VKI_MAP_CLIENT))
+	 sf_flags |= SF_VALGRIND;
+      if (flags & VKI_MAP_NOSYMS)
+	 sf_flags |= SF_NOSYMS;
+
+      /* placeholder - caller will update flags etc if they want */
+      VG_(map_fd_segment)(res, length, prot, sf_flags, fd, offset, NULL);
+
+      if (flags & VKI_MAP_CLIENT) {
+	 if (res < VG_(client_base) || res >= VG_(client_end)) {
+	    VG_(munmap)((void *)res, length);
+	    res = -1;
+	 }
+      } else {
+	 if (res < VG_(valgrind_base) || res >= VG_(valgrind_end)) {
+	    VG_(munmap)((void *)res, length);
+	    res = -1;
+	 }
+      }
+   }
+
+   
+
    return VG_(is_kerror)(res) ? ((void*)(-1)) : (void*)res;
 }
 
@@ -264,6 +306,16 @@
 Int VG_(munmap)( void* start, Int length )
 {
    Int res = VG_(do_syscall)(__NR_munmap, (UInt)start, (UInt)length );
+   if (!VG_(is_kerror))
+      VG_(unmap_range)((Addr)start, length);
+   return VG_(is_kerror)(res) ? -1 : 0;
+}
+
+Int VG_(mprotect)( void *start, Int length, UInt prot )
+{
+   Int res = VG_(do_syscall)(__NR_mprotect, (UInt)start, (UInt)length, prot );
+   if (!VG_(is_kerror)(res))
+      VG_(mprotect_range)((Addr)start, length, prot);
    return VG_(is_kerror)(res) ? -1 : 0;
 }
 
@@ -999,7 +1051,7 @@
 /* Keep track of recursion depth. */
 static Int recDepth;
 
-static Bool string_match_wrk ( Char* pat, Char* str )
+static Bool string_match_wrk ( const Char* pat, const Char* str )
 {
    vg_assert(recDepth >= 0 && recDepth < 500);
    recDepth++;
@@ -1034,7 +1086,7 @@
    }
 }
 
-Bool VG_(string_match) ( Char* pat, Char* str )
+Bool VG_(string_match) ( const Char* pat, const Char* str )
 {
    Bool b;
    recDepth = 0;
@@ -1064,7 +1116,7 @@
    Addr stacktop;
 
    asm("movl %%ebp, %0; movl %%esp, %1" : "=r" (ebp), "=r" (esp));
-   stacktop = (Addr)&VG_(stack)[VG_STACK_SIZE_W];
+   stacktop = VG_(valgrind_end);
    if (esp >= (Addr)&VG_(sigstack)[0] && esp < (Addr)&VG_(sigstack)[VG_STACK_SIZE_W])
       stacktop = (Addr)&VG_(sigstack)[VG_STACK_SIZE_W];
       
@@ -1289,14 +1341,69 @@
    Misc functions looking for a proper home.
    ------------------------------------------------------------------ */
 
-/* We do getenv without libc's help by snooping around in
-   VG_(client_envp) as determined at startup time. */
-Char* VG_(getenv) ( Char* varname )
+void  VG_(env_unsetenv) ( Char **env, const Char *varname )
+{
+   Char **from;
+   Char **to = NULL;
+   Int len = VG_(strlen)(varname);
+
+   for(from = to = env; from && *from; from++) {
+      if (!(VG_(strncmp)(varname, *from, len) == 0 && (*from)[len] == '=')) {
+	 *to = *from;
+	 to++;
+      }
+   }
+   *to = *from;
+}
+
+/* set the environment; returns the old env if a new one was allocated */
+Char **VG_(env_setenv) ( Char ***envp, const Char* varname, const Char *val )
+{
+   Char **env = (*envp);
+   Char **cpp;
+   Int len = VG_(strlen)(varname);
+   Char *valstr = VG_(arena_malloc)(VG_AR_CORE, len + VG_(strlen)(val) + 2);
+   Char **oldenv = NULL;
+
+   VG_(sprintf)(valstr, "%s=%s", varname, val);
+
+   for(cpp = env; cpp && *cpp; cpp++) {
+      if (VG_(strncmp)(varname, *cpp, len) == 0 && (*cpp)[len] == '=') {
+	 *cpp = valstr;
+	 return oldenv;
+      }
+   }
+
+   if (env == NULL) {
+      env = VG_(arena_malloc)(VG_AR_CORE, sizeof(Char **) * 2);
+      env[0] = valstr;
+      env[1] = NULL;
+
+      *envp = env;
+
+   }  else {
+      Int envlen = (cpp-env) + 2;
+      Char **newenv = VG_(arena_malloc)(VG_AR_CORE, envlen * sizeof(Char **));
+
+      for(cpp = newenv; *env; )
+	 *cpp++ = *env++;
+      *cpp++ = valstr;
+      *cpp++ = NULL;
+
+      oldenv = *envp;
+
+      *envp = newenv;
+   }
+
+   return oldenv;
+}
+
+Char* VG_(env_getenv) ( Char **env, Char* varname )
 {
    Int i, n;
    n = VG_(strlen)(varname);
-   for (i = 0; VG_(client_envp)[i] != NULL; i++) {
-      Char* s = VG_(client_envp)[i];
+   for (i = 0; env[i] != NULL; i++) {
+      Char* s = env[i];
       if (VG_(strncmp)(varname, s, n) == 0 && s[n] == '=') {
          return & s[n+1];
       }
@@ -1304,6 +1411,12 @@
    return NULL;
 }
 
+/* We do getenv without libc's help by snooping around in
+   VG_(client_envp) as determined at startup time. */
+Char *VG_(getenv)(Char *varname)
+{
+   return VG_(env_getenv)(VG_(client_envp), varname);
+}
 
 /* Support for getrlimit. */
 Int VG_(getrlimit) (Int resource, struct vki_rlimit *rlim)
@@ -1532,10 +1645,27 @@
 {
    static UInt tot_alloc = 0;
    void* p;
-   p = VG_(mmap)( 0, nBytes,
-                     VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC, 
-                     VKI_MAP_PRIVATE|VKI_MAP_ANONYMOUS, -1, 0 );
+
+#if 0
+   p = VG_(mmap)( (void *)VG_(valgrind_base), nBytes,
+		  VKI_PROT_READ | VKI_PROT_WRITE | VKI_PROT_EXEC, 
+		  VKI_MAP_PRIVATE | VKI_MAP_ANONYMOUS, -1, 0 );
+#else
+   /* use brk, because it will definitely be in the valgrind address space */
+   {
+      Char *b = VG_(brk)(0);
+
+      p = (void *)PGROUNDUP(b);
+      
+      b = VG_(brk)(p + PGROUNDUP(nBytes));
+
+      if (b != (p + PGROUNDUP(nBytes)))
+	 p = (void *)-1;
+   }
+#endif
+
    if (p != ((void*)(-1))) {
+      vg_assert(p >= (void *)VG_(valgrind_mmap_end) && p < (void *)VG_(valgrind_end));
       tot_alloc += (UInt)nBytes;
       if (0)
          VG_(printf)(
diff --git a/coregrind/vg_needs.c b/coregrind/vg_needs.c
index 551793d..00a0d14 100644
--- a/coregrind/vg_needs.c
+++ b/coregrind/vg_needs.c
@@ -58,61 +58,7 @@
    .syscall_wrapper      = False,
    .sanity_checks        = False,
    .data_syms	         = False,
-};
-
-VgTrackEvents VG_(track_events) = {
-   /* Memory events */
-   .new_mem_startup              = NULL,
-   .new_mem_stack_signal         = NULL,
-   .new_mem_brk                  = NULL,
-   .new_mem_mmap                 = NULL,
-
-   .copy_mem_remap               = NULL,
-   .change_mem_mprotect          = NULL,
-
-   .die_mem_stack_signal         = NULL,
-   .die_mem_brk                  = NULL,
-   .die_mem_munmap               = NULL,
-
-   .new_mem_stack_4              = NULL,
-   .new_mem_stack_8              = NULL,
-   .new_mem_stack_12             = NULL,
-   .new_mem_stack_16             = NULL,
-   .new_mem_stack_32             = NULL,
-   .new_mem_stack                = NULL,
-
-   .die_mem_stack_4              = NULL,
-   .die_mem_stack_8              = NULL,
-   .die_mem_stack_12             = NULL,
-   .die_mem_stack_16             = NULL,
-   .die_mem_stack_32             = NULL,
-   .die_mem_stack                = NULL,
-
-   .ban_mem_stack                = NULL,
-
-   .pre_mem_read                 = NULL,
-   .pre_mem_read_asciiz          = NULL,
-   .pre_mem_write                = NULL,
-   .post_mem_write               = NULL,
-
-   /* Register events */
-   .post_regs_write_init             = NULL,
-   .post_reg_write_syscall_return    = NULL,
-   .post_reg_write_deliver_signal    = NULL,
-   .post_reg_write_pthread_return    = NULL,
-   .post_reg_write_clientreq_return  = NULL,
-   .post_reg_write_clientcall_return = NULL,
-
-   /* Scheduler events */
-   .thread_run                   = NULL,
-
-   /* Mutex events */
-   .post_mutex_lock              = NULL,
-   .post_mutex_unlock            = NULL,
-
-   /* Signal events */
-   .pre_deliver_signal           = NULL,
-   .post_deliver_signal          = NULL,
+   .shadow_memory        = False,
 };
 
 /* static */
@@ -132,35 +78,35 @@
    CHECK_NOT(VG_(details).copyright_author, NULL);
    CHECK_NOT(VG_(details).bug_reports_to,   NULL);
 
-   if ( (VG_(track_events).new_mem_stack_4  ||
-         VG_(track_events).new_mem_stack_8  ||
-         VG_(track_events).new_mem_stack_12 ||
-         VG_(track_events).new_mem_stack_16 ||
-         VG_(track_events).new_mem_stack_32) &&
-       ! VG_(track_events).new_mem_stack) 
+   if ( (VG_(defined_new_mem_stack_4)()  ||
+         VG_(defined_new_mem_stack_8)()  ||
+         VG_(defined_new_mem_stack_12)() ||
+         VG_(defined_new_mem_stack_16)() ||
+         VG_(defined_new_mem_stack_32)()) &&
+       ! VG_(defined_new_mem_stack)()) 
    {
       VG_(printf)("\nTool error: one of the specialised `new_mem_stack_n'\n"
                   "events tracked, but not the generic `new_mem_stack' one.\n");
       VG_(skin_panic)("`new_mem_stack' should be defined\n");
    }
 
-   if ( (VG_(track_events).die_mem_stack_4  ||
-         VG_(track_events).die_mem_stack_8  ||
-         VG_(track_events).die_mem_stack_12 ||
-         VG_(track_events).die_mem_stack_16 ||
-         VG_(track_events).die_mem_stack_32) &&
-       ! VG_(track_events).die_mem_stack) 
+   if ( (VG_(defined_die_mem_stack_4)()  ||
+         VG_(defined_die_mem_stack_8)()  ||
+         VG_(defined_die_mem_stack_12)() ||
+         VG_(defined_die_mem_stack_16)() ||
+         VG_(defined_die_mem_stack_32)()) &&
+       ! VG_(defined_die_mem_stack)()) 
    {
       VG_(printf)("\nTool error: one of the specialised `die_mem_stack_n'\n"
                   "events tracked, but not the generic `die_mem_stack' one.\n");
       VG_(skin_panic)("`die_mem_stack' should be defined\n");
    }
 
-   if ( (VG_(track_events).post_reg_write_syscall_return    ||
-         VG_(track_events).post_reg_write_deliver_signal    ||
-         VG_(track_events).post_reg_write_pthread_return    ||
-         VG_(track_events).post_reg_write_clientreq_return  ||
-         VG_(track_events).post_reg_write_clientcall_return) &&
+   if ( (VG_(defined_post_reg_write_syscall_return)()    ||
+         VG_(defined_post_reg_write_deliver_signal)()    ||
+         VG_(defined_post_reg_write_pthread_return)()    ||
+         VG_(defined_post_reg_write_clientreq_return)()  ||
+         VG_(defined_post_reg_write_clientcall_return)()) &&
        ! VG_(needs).shadow_regs) 
    {
       VG_(printf)("\nTool error: one of the `post_reg_write'\n"
@@ -168,6 +114,16 @@
       VG_(skin_panic)("`shadow_regs' should be set\n");
    }
 
+   if (VG_(needs).shadow_memory != (VG_(get_shadow_size)() != 0)) {
+      if (VG_(get_shadow_size)() != 0)
+	 VG_(printf)("\nTool error: tool allocated shadow memory, but apparently doesn't "
+		     "need it.\n");
+      else
+	 VG_(printf)("\nTool error: tool didn't allocated shadow memory, but apparently "
+		     "needs it.\n");
+      VG_(skin_panic)("VG_(needs).shadow_memory need should be set to match SK_(shadow_ratio)\n");
+   }
+
 #undef CHECK_NOT
 #undef INVALID_Bool
 }
@@ -210,68 +166,7 @@
 NEEDS(syscall_wrapper)
 NEEDS(sanity_checks)
 NEEDS(data_syms)
-
-/*--------------------------------------------------------------------*/
-#define TRACK(event, args...)  \
-   void VG_(track_##event)(void (*f)(args)) \
-   {                                      \
-      VG_(track_events).event = f;        \
-   }
-
-/* Memory events */
-TRACK(new_mem_startup,       Addr a, UInt len, Bool rr, Bool ww, Bool xx)
-TRACK(new_mem_stack_signal,  Addr a, UInt len)
-TRACK(new_mem_brk,           Addr a, UInt len)
-TRACK(new_mem_mmap,          Addr a, UInt len, Bool rr, Bool ww, Bool xx)
-
-TRACK(copy_mem_remap,      Addr from, Addr to, UInt len)
-TRACK(change_mem_mprotect, Addr a, UInt len, Bool rr, Bool ww, Bool xx)
-
-TRACK(die_mem_stack_signal,  Addr a, UInt len)
-TRACK(die_mem_brk,           Addr a, UInt len)
-TRACK(die_mem_munmap,        Addr a, UInt len)
-
-TRACK(new_mem_stack_4,       Addr new_ESP)
-TRACK(new_mem_stack_8,       Addr new_ESP)
-TRACK(new_mem_stack_12,      Addr new_ESP)
-TRACK(new_mem_stack_16,      Addr new_ESP)
-TRACK(new_mem_stack_32,      Addr new_ESP)
-TRACK(new_mem_stack,         Addr a, UInt len)
-
-TRACK(die_mem_stack_4,       Addr new_ESP)
-TRACK(die_mem_stack_8,       Addr new_ESP)
-TRACK(die_mem_stack_12,      Addr new_ESP)
-TRACK(die_mem_stack_16,      Addr new_ESP)
-TRACK(die_mem_stack_32,      Addr new_ESP)
-TRACK(die_mem_stack,         Addr a, UInt len)
-
-TRACK(ban_mem_stack, Addr a, UInt len)
-
-TRACK(pre_mem_read,        CorePart part, ThreadId tid, Char* s, Addr a,
-                           UInt size)
-TRACK(pre_mem_read_asciiz, CorePart part, ThreadId tid, Char* s, Addr a)
-TRACK(pre_mem_write,       CorePart part, ThreadId tid, Char* s, Addr a,
-                           UInt size)
-TRACK(post_mem_write,      Addr a, UInt size)
-
-TRACK(post_regs_write_init,             void );
-TRACK(post_reg_write_syscall_return,    ThreadId tid, UInt reg );
-TRACK(post_reg_write_deliver_signal,    ThreadId tid, UInt reg );
-TRACK(post_reg_write_pthread_return,    ThreadId tid, UInt reg );
-TRACK(post_reg_write_clientreq_return,  ThreadId tid, UInt reg );
-TRACK(post_reg_write_clientcall_return, ThreadId tid, UInt reg, Addr f );
-
-TRACK(thread_run, ThreadId tid)
-
-TRACK(post_thread_create, ThreadId tid, ThreadId child)
-TRACK(post_thread_join,   ThreadId joiner, ThreadId joinee)
-
-TRACK( pre_mutex_lock,   ThreadId tid, void* /*pthread_mutex_t* */ mutex)
-TRACK(post_mutex_lock,   ThreadId tid, void* /*pthread_mutex_t* */ mutex)
-TRACK(post_mutex_unlock, ThreadId tid, void* /*pthread_mutex_t* */ mutex)
-
-TRACK( pre_deliver_signal, ThreadId tid, Int sigNum, Bool alt_stack)
-TRACK(post_deliver_signal, ThreadId tid, Int sigNum)
+NEEDS(shadow_memory)
 
 /*--------------------------------------------------------------------*/
 /* UCodeBlocks */
diff --git a/coregrind/vg_procselfmaps.c b/coregrind/vg_procselfmaps.c
index 33186f1..2788eac 100644
--- a/coregrind/vg_procselfmaps.c
+++ b/coregrind/vg_procselfmaps.c
@@ -50,6 +50,12 @@
    return -1;
 }
 
+static Int decdigit ( Char c )
+{
+   if (c >= '0' && c <= '9') return (Int)(c - '0');
+   return -1;
+}
+
 static Int readchar ( Char* buf, Char* ch )
 {
    if (*buf == 0) return 0;
@@ -68,6 +74,17 @@
    return n;
 }
 
+static Int readdec ( Char* buf, UInt* val )
+{
+   Int n = 0;
+   *val = 0;
+   while (hexdigit(*buf) >= 0) {
+      *val = (*val * 10) + decdigit(*buf);
+      n++; buf++;
+   }
+   return n;
+}
+
 
 /* Read /proc/self/maps, store the contents in a static buffer.  If there's
    a syntax error or other failure, just abort. */
@@ -124,13 +141,16 @@
        procmap_buf!
 */
 void VG_(parse_procselfmaps) (
-   void (*record_mapping)( Addr, UInt, Char, Char, Char, UInt, UChar* ) )
+   void (*record_mapping)( Addr addr, UInt len, Char rr, Char ww, Char xx, 
+			   UInt dev, UInt ino, ULong foff, const UChar* filename )
+   )
 {
    Int    i, j, i_eol;
    Addr   start, endPlusOne;
    UChar* filename;
    UInt   foffset;
    UChar  rr, ww, xx, pp, ch, tmp;
+   UInt	  maj, min, ino;
 
    sk_assert( '\0' != procmap_buf[0] && 0 != buf_n_tot);
 
@@ -142,7 +162,7 @@
    while (True) {
       if (i >= buf_n_tot) break;
 
-      /* Read (without fscanf :) the pattern %8x-%8x %c%c%c%c %8x */
+      /* Read (without fscanf :) the pattern %8x-%8x %c%c%c%c %8x %2x:%2x %d */
       j = readhex(&procmap_buf[i], &start);
       if (j > 0) i += j; else goto syntaxerror;
       j = readchar(&procmap_buf[i], &ch);
@@ -159,7 +179,7 @@
       if (j == 1 && (ww == 'w' || ww == '-')) i += j; else goto syntaxerror;
       j = readchar(&procmap_buf[i], &xx);
       if (j == 1 && (xx == 'x' || xx == '-')) i += j; else goto syntaxerror;
-      /* I haven't a clue what this last field means. */
+      /* This field is the shared/private flag */
       j = readchar(&procmap_buf[i], &pp);
       if (j == 1 && (pp == 'p' || pp == '-' || pp == 's')) 
                                               i += j; else goto syntaxerror;
@@ -169,7 +189,23 @@
 
       j = readhex(&procmap_buf[i], &foffset);
       if (j > 0) i += j; else goto syntaxerror;
-      
+
+      j = readchar(&procmap_buf[i], &ch);
+      if (j == 1 && ch == ' ') i += j; else goto syntaxerror;
+
+      j = readhex(&procmap_buf[i], &maj);
+      if (j > 0) i += j; else goto syntaxerror;
+      j = readchar(&procmap_buf[i], &ch);
+      if (j == 1 && ch == ':') i += j; else goto syntaxerror;
+      j = readhex(&procmap_buf[i], &min);
+      if (j > 0) i += j; else goto syntaxerror;
+
+      j = readchar(&procmap_buf[i], &ch);
+      if (j == 1 && ch == ' ') i += j; else goto syntaxerror;
+
+      j = readdec(&procmap_buf[i], &ino);
+      if (j > 0) i += j; else goto syntaxerror;
+ 
       goto read_line_ok;
 
     syntaxerror:
@@ -203,7 +239,7 @@
       }
 
       (*record_mapping) ( start, endPlusOne-start, 
-                          rr, ww, xx, 
+                          rr, ww, xx, maj * 256 + min, ino,
                           foffset, filename );
 
       if ('\0' != tmp) {
diff --git a/coregrind/vg_proxylwp.c b/coregrind/vg_proxylwp.c
index 8c4790e..17d599a 100644
--- a/coregrind/vg_proxylwp.c
+++ b/coregrind/vg_proxylwp.c
@@ -257,7 +257,6 @@
 }
 
 #define PROXYLWP_OFFSET	(VKI_BYTES_PER_PAGE - sizeof(ProxyLWP))
-#define ROUNDDN(p)	((UChar *)((Addr)(p) & ~(VKI_BYTES_PER_PAGE-1)))
 
 /* 
    Allocate a page for the ProxyLWP and its stack.
@@ -271,7 +270,7 @@
 {
    UChar *p = VG_(get_memory_from_mmap)(VKI_BYTES_PER_PAGE, "alloc_LWP");
    ProxyLWP *ret;
-   vg_assert(p == ROUNDDN(p)); /* px must be page aligned */
+   vg_assert(p == (UChar *)PGROUNDDN(p)); /* px must be page aligned */
 
    ret = (ProxyLWP *)(p + PROXYLWP_OFFSET);
 
@@ -283,7 +282,7 @@
 /* Free a thread structure */
 static void LWP_free(ProxyLWP *px)
 {
-   UChar *p = ROUNDDN(px);
+   UChar *p = (UChar *)PGROUNDDN(px);
    
    vg_assert(px->magic == VG_PROXY_MAGIC);
    px->magic = 0;
@@ -297,7 +296,7 @@
    end). */
 static inline ProxyLWP *LWP_TSD(void *esp)
 {
-   UChar *p = ROUNDDN(esp);
+   UChar *p = (UChar *)PGROUNDDN(esp);
    ProxyLWP *ret;
 
    ret = (ProxyLWP *)(p + PROXYLWP_OFFSET);
diff --git a/coregrind/vg_replace_malloc.c b/coregrind/vg_replace_malloc.c
index 3a79a21..459db67 100644
--- a/coregrind/vg_replace_malloc.c
+++ b/coregrind/vg_replace_malloc.c
@@ -43,235 +43,165 @@
 
 #include "valgrind.h"            /* for VALGRIND_NON_SIMD_CALL[12] */
 #include "vg_include.h"
+#include "vg_skin.h"
 
-/*------------------------------------------------------------*/
-/*--- Command line options                                 ---*/
-/*------------------------------------------------------------*/
+/* Create an alias */
+#define ALIAS(ret, name, args, toname)					\
+   ret name args __attribute__((alias(#toname), visibility("protected")))
 
-/* Round malloc sizes upwards to integral number of words? default: NO */
-Bool VG_(clo_sloppy_malloc)  = False;
-
-/* DEBUG: print malloc details?  default: NO */
-Bool VG_(clo_trace_malloc)   = False;
-
-/* Minimum alignment in functions that don't specify alignment explicitly.
-   default: 0, i.e. use default of the machine (== 4) */
-Int  VG_(clo_alignment) = 4;
-
-
-Bool VG_(replacement_malloc_process_cmd_line_option)(Char* arg)
-{
-   if      (VG_CLO_STREQN(12, arg, "--alignment=")) {
-      VG_(clo_alignment) = (Int)VG_(atoll)(&arg[12]);
-
-      if (VG_(clo_alignment) < 4 
-          || VG_(clo_alignment) > 4096
-          || VG_(log2)( VG_(clo_alignment) ) == -1 /* not a power of 2 */) {
-         VG_(message)(Vg_UserMsg, "");
-         VG_(message)(Vg_UserMsg, 
-            "Invalid --alignment= setting.  "
-            "Should be a power of 2, >= 4, <= 4096.");
-         VG_(bad_option)("--alignment");
-      }
-   }
-
-   else if (VG_CLO_STREQ(arg, "--sloppy-malloc=yes"))
-      VG_(clo_sloppy_malloc) = True;
-   else if (VG_CLO_STREQ(arg, "--sloppy-malloc=no"))
-      VG_(clo_sloppy_malloc) = False;
-
-   else if (VG_CLO_STREQ(arg, "--trace-malloc=yes"))
-      VG_(clo_trace_malloc) = True;
-   else if (VG_CLO_STREQ(arg, "--trace-malloc=no"))
-      VG_(clo_trace_malloc) = False;
-
-   else 
-      return False;
-
-   return True;
-}
-
-void VG_(replacement_malloc_print_usage)(void)
-{
-   VG_(printf)(
-"    --sloppy-malloc=no|yes    round malloc sizes to next word? [no]\n"
-"    --alignment=<number>      set minimum alignment of allocations [4]\n"
-   );
-}
-
-void VG_(replacement_malloc_print_debug_usage)(void)
-{
-   VG_(printf)(
-"    --trace-malloc=no|yes     show client malloc details? [no]\n"
-   );
-}
-
+/* Declare a function, along with libc's various aliases */
+#define LIBALIAS(ret, name, args)		\
+	ALIAS(ret, __##name, args, name);	\
+	ALIAS(ret, __libc_##name, args, name);	\
+	ret name args
 
 /*------------------------------------------------------------*/
 /*--- Replacing malloc() et al                             ---*/
 /*------------------------------------------------------------*/
 
+static struct vg_mallocfunc_info info;
+static int init_done;
+
+/* Startup hook - called as init section */
+static void init(void) __attribute__((constructor));
+
 /* Below are new versions of malloc, __builtin_new, free, 
    __builtin_delete, calloc, realloc, memalign, and friends.
 
-   malloc, __builtin_new, free, __builtin_delete, calloc and realloc
-   can be entered either on the real CPU or the simulated one.  If on
-   the real one, this is because the dynamic linker is running the
-   static initialisers for C++, before starting up Valgrind itself.
-   In this case it is safe to route calls through to
-   VG_(arena_malloc)/VG_(arena_free), since they are self-initialising.
-
-   Once Valgrind is initialised, vg_running_on_simd_CPU becomes True.
-   The call needs to be transferred from the simulated CPU back to the
-   real one and routed to the VG_(cli_malloc)() or VG_(cli_free)().  To do
-   that, the client-request mechanism (in valgrind.h) is used to convey
-   requests to the scheduler.
+   None of these functions are called directly - they are not meant to
+   be found by the dynamic linker.  They get called because
+   vg_replace_malloc installs a bunch of code redirects which causes
+   Valgrind to use these functions rather than the ones they're
+   replacing.  That said, we certainly don't mind if the linker finds
+   them, because it makes our life easier with respect to startup
+   initialization order (we can't guarantee that our init routine will
+   necessarily be called early enough to do the redirects before
+   someone wants to allocate).
 */
 
 #define MALLOC_TRACE(format, args...)  \
-   if (VG_(clo_trace_malloc))          \
+   if (info.clo_trace_malloc)          \
       VALGRIND_INTERNAL_PRINTF(format, ## args )
 
 #define MAYBE_SLOPPIFY(n)           \
-   if (VG_(clo_sloppy_malloc)) {    \
-      while ((n % 4) > 0) n++;      \
+   if (info.clo_sloppy_malloc) {    \
+      n = (n+3) & ~3;		    \
    }
 
 /* ALL calls to malloc() and friends wind up here. */
 #define ALLOC(fff, vgfff) \
-void* fff ( Int n ) \
+LIBALIAS(void *, fff, (Int n))			\
 { \
    void* v; \
  \
-   MALLOC_TRACE(#fff "[simd=%d](%d)",  \
-                (UInt)VG_(is_running_on_simd_CPU)(), n ); \
+   MALLOC_TRACE(#fff "(%d)", n ); \
    MAYBE_SLOPPIFY(n); \
+   if (!init_done) init(); \
  \
-   if (VG_(is_running_on_simd_CPU)()) { \
-      v = (void*)VALGRIND_NON_SIMD_CALL1( vgfff, n ); \
-   } else if (VG_(clo_alignment) != 4) { \
-      v = VG_(arena_malloc_aligned)(VG_AR_CLIENT, VG_(clo_alignment), n); \
-   } else { \
-      v = VG_(arena_malloc)(VG_AR_CLIENT, n); \
-   } \
+   v = (void*)VALGRIND_NON_SIMD_CALL1( info.sk_##vgfff, n ); \
    MALLOC_TRACE(" = %p", v ); \
    return v; \
 }
-ALLOC( malloc,              SK_(malloc)            );
-ALLOC( __builtin_new,       SK_(__builtin_new)     );
-ALLOC( _Znwj,               SK_(__builtin_new)     );
+ALLOC( malloc,              malloc            );
+ALLOC( __builtin_new,       __builtin_new     );
+ALLOC( _Znwj,               __builtin_new     );
 
 // operator new(unsigned, std::nothrow_t const&)
-ALLOC( _ZnwjRKSt9nothrow_t, SK_(__builtin_new)     );
+ALLOC( _ZnwjRKSt9nothrow_t, __builtin_new     );
 
-ALLOC( __builtin_vec_new,   SK_(__builtin_vec_new) );
-ALLOC( _Znaj,               SK_(__builtin_vec_new) );
+ALLOC( __builtin_vec_new,   __builtin_vec_new );
+ALLOC( _Znaj,               __builtin_vec_new );
 
 // operator new[](unsigned, std::nothrow_t const&
-ALLOC( _ZnajRKSt9nothrow_t, SK_(__builtin_vec_new) );
+ALLOC( _ZnajRKSt9nothrow_t, __builtin_vec_new );
 
 #define FREE(fff, vgfff) \
-void fff ( void* p ) \
+LIBALIAS(void, fff, (void *p))			\
 { \
-   MALLOC_TRACE(#fff "[simd=%d](%p)",  \
-                (UInt)VG_(is_running_on_simd_CPU)(), p ); \
+   MALLOC_TRACE(#fff "(%p)", p ); \
    if (p == NULL)  \
       return; \
-   if (VG_(is_running_on_simd_CPU)()) { \
-      (void)VALGRIND_NON_SIMD_CALL1( vgfff, p ); \
-   } else { \
-      VG_(arena_free)(VG_AR_CLIENT, p);       \
-   } \
+   if (!init_done) init(); \
+   (void)VALGRIND_NON_SIMD_CALL1( info.sk_##vgfff, p ); \
 }
-FREE( free,                 SK_(free)                 );
-FREE( __builtin_delete,     SK_(__builtin_delete)     );
-FREE( _ZdlPv,               SK_(__builtin_delete)     );
-FREE( __builtin_vec_delete, SK_(__builtin_vec_delete) );
-FREE( _ZdaPv,               SK_(__builtin_vec_delete) );
+FREE( free,                 free                 );
+FREE( __builtin_delete,     __builtin_delete     );
+FREE( _ZdlPv,               __builtin_delete     );
+FREE( __builtin_vec_delete, __builtin_vec_delete );
+FREE( _ZdaPv,               __builtin_vec_delete );
 
-void* calloc ( UInt nmemb, UInt size )
+LIBALIAS(void*, calloc, ( Int nmemb, Int size ))
 {
    void* v;
 
-   MALLOC_TRACE("calloc[simd=%d](%d,%d)", 
-                (UInt)VG_(is_running_on_simd_CPU)(), nmemb, size );
+   MALLOC_TRACE("calloc(%d,%d)", nmemb, size );
    MAYBE_SLOPPIFY(size);
 
-   if (VG_(is_running_on_simd_CPU)()) {
-      v = (void*)VALGRIND_NON_SIMD_CALL2( SK_(calloc), nmemb, size );
-   } else {
-      v = VG_(arena_calloc)(VG_AR_CLIENT, VG_(clo_alignment), nmemb, size);
-   }
+   if (!init_done) init();
+   v = (void*)VALGRIND_NON_SIMD_CALL2( info.sk_calloc, nmemb, size );
    MALLOC_TRACE(" = %p", v );
    return v;
 }
 
-
-void* realloc ( void* ptrV, Int new_size )
+LIBALIAS(void*, realloc, ( void* ptrV, Int new_size ))
 {
    void* v;
 
-   MALLOC_TRACE("realloc[simd=%d](%p,%d)", 
-                (UInt)VG_(is_running_on_simd_CPU)(), ptrV, new_size );
+   MALLOC_TRACE("realloc(%p,%d)", ptrV, new_size );
    MAYBE_SLOPPIFY(new_size);
 
    if (ptrV == NULL)
       return malloc(new_size);
    if (new_size <= 0) {
       free(ptrV);
-      if (VG_(clo_trace_malloc)) 
-         VG_(printf)(" = 0" );
+      if (info.clo_trace_malloc) 
+         VALGRIND_INTERNAL_PRINTF(" = 0" );
       return NULL;
    }   
-   if (VG_(is_running_on_simd_CPU)()) {
-      v = (void*)VALGRIND_NON_SIMD_CALL2( SK_(realloc), ptrV, new_size );
-   } else {
-      v = VG_(arena_realloc)(VG_AR_CLIENT, ptrV, VG_(clo_alignment), new_size);
-   }
+   if (!init_done) init();
+   v = (void*)VALGRIND_NON_SIMD_CALL2( info.sk_realloc, ptrV, new_size );
    MALLOC_TRACE(" = %p", v );
    return v;
 }
 
 
-void* memalign ( Int alignment, Int n )
+LIBALIAS(void*, memalign, ( Int alignment, Int n ))
 {
    void* v;
 
-   MALLOC_TRACE("memalign[simd=%d](al %d, size %d)", 
-                (UInt)VG_(is_running_on_simd_CPU)(), alignment, n );
+   MALLOC_TRACE("memalign(al %d, size %d)", alignment, n );
    MAYBE_SLOPPIFY(n);
 
-   if (VG_(is_running_on_simd_CPU)()) {
-      v = (void*)VALGRIND_NON_SIMD_CALL2( SK_(memalign), alignment, n );
-   } else {
-      v = VG_(arena_malloc_aligned)(VG_AR_CLIENT, alignment, n);
-   }
+   if (!init_done) init();
+   v = (void*)VALGRIND_NON_SIMD_CALL2( info.sk_memalign, alignment, n );
    MALLOC_TRACE(" = %p", v );
    return v;
 }
 
 
-void* valloc ( Int size )
+LIBALIAS(void*, valloc, ( Int size ))
 {
    return memalign(VKI_BYTES_PER_PAGE, size);
 }
 
 
 /* Various compatibility wrapper functions, for glibc and libstdc++. */
-void cfree ( void* p )
+
+/* Don't just alias free, otherwise people could get confused seeing
+   cfree rather than free in error output */
+LIBALIAS(void, cfree, ( void* p ) )
 {
-   free ( p );
+   free(p);
 }
 
-
-int mallopt ( int cmd, int value )
+LIBALIAS(int, mallopt, ( int cmd, int value ))
 {
    /* In glibc-2.2.4, 1 denotes a successful return value for mallopt */
    return 1;
 }
 
 
-int __posix_memalign ( void **memptr, UInt alignment, UInt size )
+LIBALIAS(int, posix_memalign, ( void **memptr, UInt alignment, UInt size ))
 {
     void *mem;
 
@@ -290,25 +220,17 @@
     return VKI_ENOMEM /*12*/ /*ENOMEM*/;
 }
 
-# define weak_alias(name, aliasname) \
-  extern __typeof (name) aliasname __attribute__ ((weak, alias (#name)));
-weak_alias(__posix_memalign, posix_memalign);
-
-Int malloc_usable_size ( void* p )
+LIBALIAS(int, malloc_usable_size, ( void* p ))
 { 
    Int pszB;
    
-   MALLOC_TRACE("malloc_usable_size[simd=%d](%p)", 
-                (UInt)VG_(is_running_on_simd_CPU)(), p );
+   MALLOC_TRACE("malloc_usable_size(%p)", p );
    if (NULL == p)
       return 0;
 
-   if (VG_(is_running_on_simd_CPU)()) {
-      pszB = (Int)VALGRIND_NON_SIMD_CALL2( VG_(arena_payload_szB), 
-                                           VG_AR_CLIENT, p );
-   } else {
-      pszB = VG_(arena_payload_szB)(VG_AR_CLIENT, p);
-   }
+   if (!init_done) init();
+   pszB = (Int)VALGRIND_NON_SIMD_CALL2( info.arena_payload_szB, 
+					VG_AR_CLIENT, p );
    MALLOC_TRACE(" = %d", pszB );
 
    return pszB;
@@ -316,21 +238,28 @@
 
 
 /* Bomb out if we get any of these. */
-/* HACK: We shouldn't call VG_(core_panic) or VG_(message) on the simulated
-   CPU.  Really we should pass the request in the usual way, and
-   Valgrind itself can do the panic.  Too tedious, however.  
-*/
-void pvalloc ( void )
-{ VG_(core_panic)("call to pvalloc\n"); }
-void malloc_stats ( void )
-{ VG_(core_panic)("call to malloc_stats\n"); }
 
-void malloc_trim ( void )
-{ VG_(core_panic)("call to malloc_trim\n"); }
-void malloc_get_state ( void )
-{ VG_(core_panic)("call to malloc_get_state\n"); }
-void malloc_set_state ( void )
-{ VG_(core_panic)("call to malloc_set_state\n"); }
+extern void _exit(int);
+
+static void panic(const char *str)
+{
+   VALGRIND_PRINTF_BACKTRACE("Program aborting because of call to %s", str);
+   
+   _exit(99);
+   *(int *)0 = 'x';
+}
+
+#define PANIC(x)				\
+   void x(void)					\
+   {						\
+      panic(#x);				\
+   }
+
+PANIC(pvalloc);
+PANIC(malloc_stats);
+PANIC(malloc_trim);
+PANIC(malloc_get_state);
+PANIC(malloc_set_state);
 
 
 /* Yet another ugly hack.  Cannot include <malloc.h> because we
@@ -351,7 +280,7 @@
    int keepcost; /* top-most, releasable (via malloc_trim) space */
 };
 
-struct mallinfo mallinfo ( void )
+LIBALIAS(struct mallinfo, mallinfo, ( void ))
 {
    /* Should really try to return something a bit more meaningful */
    UInt            i;
@@ -362,6 +291,83 @@
    return mi;
 }
 
+static const struct {
+   const Char *libname;
+   Addr		func;
+} replacements[] =
+{
+#define E(pfx, x)	{ pfx #x, (Addr)x }
+#define R(x)		E("", x), E("__libc_", x), E("__", x)
+
+   /* alloc */
+   R(malloc),
+   R(__builtin_new),
+   R(_Znwj),
+   R(_ZnwjRKSt9nothrow_t),	/* operator new(unsigned, std::nothrow_t const&) */
+   R(__builtin_vec_new),
+   R(_Znaj),
+   R(_ZnajRKSt9nothrow_t),	/* operator new[](unsigned, std::nothrow_t const& */
+   R(calloc),
+   R(realloc),
+   R(memalign),
+   R(valloc),
+   R(cfree),
+   R(posix_memalign),
+
+   /* free */
+   R(free),
+   R(__builtin_delete),
+   R(_ZdlPv),
+   R(__builtin_vec_delete),
+   R(_ZdaPv),
+
+   /* misc */
+   R(mallopt),
+   R(malloc_usable_size),
+   R(mallinfo),
+
+   /* bad */
+   R(pvalloc),
+   R(malloc_stats),
+   R(malloc_trim),
+   R(malloc_get_state),
+   R(malloc_set_state),   
+#undef R
+#undef S
+#undef E
+};
+
+/* All the code in here is unused until this function is called */
+
+static void init(void)
+{
+   int i;
+   int res;
+
+   if (init_done)
+      return;
+
+   init_done = 1;
+
+   VALGRIND_MAGIC_SEQUENCE(res, -1, VG_USERREQ__GET_MALLOCFUNCS, &info, 0, 0, 0);
+
+   for(i = 0; i < sizeof(replacements)/sizeof(*replacements); i++) {
+#if 0
+      /* doesn't seem much point - ld-linux.so will have already used
+	 malloc/free before we run */
+      VALGRIND_MAGIC_SEQUENCE(res, 0, VG_USERREQ__REGISTER_REDIRECT_ADDR, 
+			      "soname:ld-linux.so.2", replacements[i].libname,
+			      replacements[i].func, 0);
+#endif
+      VALGRIND_MAGIC_SEQUENCE(res, 0, VG_USERREQ__REGISTER_REDIRECT_ADDR, 
+			      "soname:libc.so.6", replacements[i].libname,
+			      replacements[i].func, 0);
+      VALGRIND_MAGIC_SEQUENCE(res, 0, VG_USERREQ__REGISTER_REDIRECT_ADDR, 
+			      "soname:libstdc++*", replacements[i].libname,
+			      replacements[i].func, 0);
+   }
+}
+
 /*--------------------------------------------------------------------*/
 /*--- end                                      vg_replace_malloc.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c
index 67824e6..9d823e3 100644
--- a/coregrind/vg_scheduler.c
+++ b/coregrind/vg_scheduler.c
@@ -119,6 +119,10 @@
 
 typedef UInt ThreadKey;
 
+/* The scheduler does need to know the address of it so it can be
+   called at program exit. */
+static Addr VG_(__libc_freeres_wrapper);
+
 
 UInt VG_(syscall_altered_shadow_reg);
 UInt VG_(signal_delivery_altered_shadow_reg);
@@ -594,9 +598,9 @@
    VG_(save_thread_state) ( tid_main );
 
    VG_(threads)[tid_main].stack_highest_word 
-      = VG_(foundstack_start) + VG_(foundstack_size) - 4;
-   VG_(threads)[tid_main].stack_base = VG_(foundstack_start);
-   VG_(threads)[tid_main].stack_size = VG_(foundstack_size);
+      = VG_(clstk_end) - 4;
+   VG_(threads)[tid_main].stack_base = VG_(clstk_base);
+   VG_(threads)[tid_main].stack_size = VG_(clstk_end) - VG_(clstk_base);
 
    /* So now ... */
    vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID);
@@ -1096,15 +1100,16 @@
                   it hasn't been overridden with --run-libc-freeres=no
                   on the command line. */
 
-               if (VG_(needs).libc_freeres && VG_(clo_run_libc_freeres)) {
-
+               if (VG_(needs).libc_freeres && 
+		   VG_(clo_run_libc_freeres) &&
+		   VG_(__libc_freeres_wrapper) != 0) {
                   if (VG_(clo_verbosity) > 2 
                       || VG_(clo_trace_syscalls) || VG_(clo_trace_sched)) {
                      VG_(message)(Vg_DebugMsg, 
                         "Caught __NR_exit; running __libc_freeres()");
                   }
                   VG_(nuke_all_threads_except) ( tid );
-                  VG_(threads)[tid].m_eip = (UInt)(&VG_(__libc_freeres_wrapper));
+                  VG_(threads)[tid].m_eip = (UInt)VG_(__libc_freeres_wrapper);
                   vg_assert(VG_(threads)[tid].status == VgTs_Runnable);
                   goto stage1; /* party on, dudes (but not for much longer :) */
 
@@ -1858,8 +1863,9 @@
          assigning it for the first time. */
       vg_assert(VG_(threads)[tid].stack_size == 0);
       vg_assert(VG_(threads)[tid].stack_base == (Addr)NULL);
-      new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb, 
-                                                   "new thread stack" );
+      new_stack = VG_(client_alloc)(0, new_stk_szb, 
+				    VKI_PROT_READ | VKI_PROT_WRITE | VKI_PROT_EXEC, 
+				    SF_STACK);
       VG_(threads)[tid].stack_base = new_stack;
       VG_(threads)[tid].stack_size = new_stk_szb;
       VG_(threads)[tid].stack_highest_word
@@ -2880,27 +2886,40 @@
    UInt*        arg    = (UInt*)(VG_(threads)[tid].m_eax);
    UInt         req_no = arg[0];
 
-   /* VG_(printf)("req no = 0x%x\n", req_no); */
+   if (0)
+      VG_(printf)("req no = 0x%x\n", req_no);
    switch (req_no) {
 
       case VG_USERREQ__CLIENT_CALL0: {
          UInt (*f)(void) = (void*)arg[1];
-         SET_CLCALL_RETVAL(tid, f ( ), (Addr)f);
+	 if (f == NULL)
+	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL: func=%p\n", f);
+	 else
+	    SET_CLCALL_RETVAL(tid, f ( ), (Addr)f);
          break;
       }
       case VG_USERREQ__CLIENT_CALL1: {
          UInt (*f)(UInt) = (void*)arg[1];
-         SET_CLCALL_RETVAL(tid, f ( arg[2] ), (Addr)f );
+	 if (f == NULL)
+	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL: func=%p\n", f);
+	 else
+	    SET_CLCALL_RETVAL(tid, f ( arg[2] ), (Addr)f );
          break;
       }
       case VG_USERREQ__CLIENT_CALL2: {
          UInt (*f)(UInt, UInt) = (void*)arg[1];
-         SET_CLCALL_RETVAL(tid, f ( arg[2], arg[3] ), (Addr)f );
+	 if (f == NULL)
+	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL: func=%p\n", f);
+	 else
+	    SET_CLCALL_RETVAL(tid, f ( arg[2], arg[3] ), (Addr)f );
          break;
       }
       case VG_USERREQ__CLIENT_CALL3: {
          UInt (*f)(UInt, UInt, UInt) = (void*)arg[1];
-         SET_CLCALL_RETVAL(tid, f ( arg[2], arg[3], arg[4] ), (Addr)f );
+	 if (f == NULL)
+	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL: func=%p\n", f);
+	 else
+	    SET_CLCALL_RETVAL(tid, f ( arg[2], arg[3], arg[4] ), (Addr)f );
          break;
       }
 
@@ -3109,13 +3128,27 @@
       case VG_USERREQ__SIGNAL_RETURNS: 
          handle_signal_return(tid);
 	 break;
- 
+
+
+      case VG_USERREQ__GET_SIGRT_MIN:
+	 SET_PTHREQ_RETVAL(tid, VG_(sig_rtmin));
+	 break;
+
+      case VG_USERREQ__GET_SIGRT_MAX:
+	 SET_PTHREQ_RETVAL(tid, VG_(sig_rtmax));
+	 break;
+
+      case VG_USERREQ__ALLOC_RTSIG:
+	 SET_PTHREQ_RETVAL(tid, VG_(sig_alloc_rtsig)((Int)arg[1]));
+	 break;
+
       case VG_USERREQ__PRINTF: {
          int count = 
             VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], (va_list)arg[2] );
             SET_CLREQ_RETVAL( tid, count );
          break; }
 
+
       case VG_USERREQ__INTERNAL_PRINTF: {
          int count = 
             VG_(vmessage)( Vg_UserMsg, (char *)arg[1], (va_list)arg[2] );
@@ -3138,6 +3171,46 @@
             SET_CLREQ_RETVAL( tid, count );
          break; }
 
+      case VG_USERREQ__REGISTER_LIBC_FREERES:
+	 VG_(__libc_freeres_wrapper)	= arg[1];
+         SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
+	 break;
+
+      case VG_USERREQ__GET_MALLOCFUNCS: {
+	 struct vg_mallocfunc_info *info = (struct vg_mallocfunc_info *)arg[1];
+
+	 info->sk_malloc	= (Addr)SK_(malloc);
+	 info->sk_calloc	= (Addr)SK_(calloc);
+	 info->sk_realloc	= (Addr)SK_(realloc);
+	 info->sk_memalign	= (Addr)SK_(memalign);
+	 info->sk___builtin_new	= (Addr)SK_(__builtin_new);
+	 info->sk___builtin_vec_new	= (Addr)SK_(__builtin_vec_new);
+	 info->sk_free		= (Addr)SK_(free);
+	 info->sk___builtin_delete	= (Addr)SK_(__builtin_delete);
+	 info->sk___builtin_vec_delete	= (Addr)SK_(__builtin_vec_delete);
+
+	 info->arena_payload_szB	= (Addr)VG_(arena_payload_szB);
+	 
+	 info->clo_sloppy_malloc	= VG_(clo_sloppy_malloc);
+	 info->clo_trace_malloc		= VG_(clo_trace_malloc);
+
+         SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
+
+	 break;
+      }
+
+      case VG_USERREQ__REGISTER_REDIRECT_SYM: {
+	 VG_(add_redirect_sym)((const Char *)arg[1], (const Char *)arg[2],
+			       (const Char *)arg[3], (const Char *)arg[4]);
+	 break;
+      }
+
+      case VG_USERREQ__REGISTER_REDIRECT_ADDR: {
+	 VG_(add_redirect_addr)((const Char *)arg[1], (const Char *)arg[2],
+				(Addr)arg[3]);
+	 break;
+      }
+
       /* Requests from the client program */
 
       case VG_USERREQ__DISCARD_TRANSLATIONS:
@@ -3160,7 +3233,7 @@
 	    UInt ret;
 
             if (VG_(clo_verbosity) > 2)
-               VG_(printf)("client request: code %d,  addr %p,  len %d\n",
+               VG_(printf)("client request: code %x,  addr %p,  len %d\n",
                            arg[0], (void*)arg[1], arg[2] );
 
 	    if (SK_(handle_client_request) ( tid, arg, &ret ))
diff --git a/coregrind/vg_signals.c b/coregrind/vg_signals.c
index ece604a..50be2b6 100644
--- a/coregrind/vg_signals.c
+++ b/coregrind/vg_signals.c
@@ -950,6 +950,8 @@
    sc->cr2 = (UInt)si->_sifields._sigfault._addr;
 }
 
+static Addr signalreturn_stub_addr = 0;
+
 /* Set up a stack frame (VgSigContext) for the client's signal
    handler.  This includes the signal number and a bogus return
    address.  */
@@ -1007,10 +1009,30 @@
    vg_assert( ((Char*)(&frame->magicE)) + sizeof(UInt) 
               == ((Char*)(esp_top_of_frame)) );
 
+   /* if the sigreturn stub isn't in the client address space yet,
+      allocate space for it and copy it into place. */
+   if (signalreturn_stub_addr == 0) {
+      UInt len = PGROUNDUP(VG_(signalreturn_bogusRA_length));
+
+      signalreturn_stub_addr = VG_(client_alloc)(0, len,
+						 VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC,
+						 0);
+      VG_(memcpy)((void *)signalreturn_stub_addr, &VG_(signalreturn_bogusRA), 
+		  VG_(signalreturn_bogusRA_length));
+      VG_(mprotect)((void *)signalreturn_stub_addr, len, VKI_PROT_READ|VKI_PROT_EXEC);
+      VG_TRACK(new_mem_mmap, signalreturn_stub_addr, VG_(signalreturn_bogusRA_length),
+	       True, False, True);
+
+      if (VG_(clo_trace_signals))
+	 VG_(message)(Vg_DebugMsg, "Put sigreturn stub at %p-%p in client address space",
+		      signalreturn_stub_addr, 
+		      signalreturn_stub_addr + VG_(signalreturn_bogusRA_length));
+   }
+
    /* retaddr, sigNo, psigInfo, puContext fields are to be written */
    VG_TRACK( pre_mem_write, Vg_CoreSignal, tid, "signal handler frame", 
                             (Addr)frame, offsetof(VgSigFrame, handlerArgs) );
-   frame->retaddr    = (UInt)(&VG_(signalreturn_bogusRA));
+   frame->retaddr    = (UInt)signalreturn_stub_addr;
    frame->sigNo      = sigNo;
    frame->sigNo_private = sigNo;
    VG_TRACK( post_mem_write, (Addr)frame, offsetof(VgSigFrame, handlerArgs) );
@@ -1323,6 +1345,10 @@
 
    vg_assert(!core || (core && terminate));
 
+   if (VG_(clo_trace_signals))
+      VG_(message)(Vg_DebugMsg, "delivering %d to default handler %s%s",
+		   sigNo, terminate ? "terminate" : "", core ? "+core" : "");
+
    if (terminate) {
       if (VG_(clo_verbosity) != 0 && (core || VG_(clo_verbosity) > 1)) {
 	 VG_(message)(Vg_UserMsg, "");
@@ -1336,8 +1362,8 @@
 	    switch(sigNo) {
 	    case VKI_SIGSEGV:
 	       switch(info->si_code) {
-	       case 1: event = "Address not mapped to object"; break;
-	       case 2: event = "Invalid permissions for mapped object"; break;
+	       case 1: event = "Access not within mapped region"; break;
+	       case 2: event = "Bad permissions for mapped region"; break;
 	       }
 	       break;
 
@@ -1409,6 +1435,7 @@
    Int			sigNo = info->si_signo;
    vki_ksigset_t	handlermask;
    SCSS_Per_Signal	*handler = &vg_scss.scss_per_sig[sigNo];
+   void			*handler_fn;
    ThreadState		*tst = VG_(get_ThreadState)(tid);
 
    if (VG_(clo_trace_signals))
@@ -1461,14 +1488,19 @@
       }
    }
 
-   vg_assert(handler->scss_handler != VKI_SIG_IGN);
+   /* If the client specifies SIG_IGN, treat it as SIG_DFL */
+   handler_fn = handler->scss_handler;
+   if (handler_fn == VKI_SIG_IGN)
+      handler_fn = VKI_SIG_DFL;
+
+   vg_assert(handler_fn != VKI_SIG_IGN);
 
    if (sigNo == VKI_SIGCHLD && (handler->scss_flags & VKI_SA_NOCLDWAIT)) {
       //VG_(printf)("sigNo==SIGCHLD and app asked for NOCLDWAIT\n");
       vg_babyeater(sigNo, NULL, NULL);
    }
 
-   if (handler->scss_handler == VKI_SIG_DFL) {
+   if (handler_fn == VKI_SIG_DFL) {
       handlermask = tst->sig_mask; /* no change to signal mask */
       vg_default_action(info, tid);
    } else {
@@ -1604,8 +1636,7 @@
    */
 
    if (VG_(clo_trace_signals)) {
-      VG_(start_msg)(Vg_DebugMsg);
-      VG_(add_to_msg)("signal %d arrived ... ", sigNo );
+      VG_(message)(Vg_DebugMsg, "signal %d arrived ... si_code=%d", sigNo, info->si_code );
    }
    vg_assert(sigNo >= 1 && sigNo <= VKI_KNSIG);
 
@@ -1631,6 +1662,77 @@
    vg_assert((Char*)(&(VG_(sigstack)[0])) <= (Char*)(&dummy_local));
    vg_assert((Char*)(&dummy_local) < (Char*)(&(VG_(sigstack)[VG_SIGSTACK_SIZE_W])));
 
+   /* Special fault-handling case. We can now get signals which can
+      act upon and immediately restart the faulting instruction.
+    */
+   if (info->si_signo == VKI_SIGSEGV) {
+      ThreadId tid = VG_(get_current_or_recent_tid)();
+      Addr fault = (Addr)info->_sifields._sigfault._addr;
+      Addr esp = VG_(is_running_thread)(tid) ?
+	 VG_(baseBlock)[VGOFF_(m_esp)] : VG_(threads)[tid].m_esp;
+      Segment *seg;
+
+      seg = VG_(find_segment)(fault);
+      if (seg != NULL)
+	 seg = VG_(next_segment)(seg);
+
+      if (VG_(clo_trace_signals)) {
+	 if (seg == NULL)
+	    VG_(message)(Vg_DebugMsg,
+			 "SIGSEGV: si_code=%d faultaddr=%p tid=%d esp=%p seg=NULL shad=%p-%p",
+			 info->si_code, fault, tid, esp,
+			 VG_(shadow_base), VG_(shadow_end));
+	 else
+	    VG_(message)(Vg_DebugMsg,
+			 "SIGSEGV: si_code=%d faultaddr=%p tid=%d esp=%p seg=%p-%p fl=%x shad=%p-%p",
+			 info->si_code, fault, tid, esp, seg->addr, seg->addr+seg->len, seg->flags,
+			 VG_(shadow_base), VG_(shadow_end));
+      }
+
+      if (info->si_code == 1		&&	/* SEGV_MAPERR */
+	  seg != NULL                   &&
+	  fault >= esp			&&
+	  fault < seg->addr		&&
+	  (seg->flags & SF_GROWDOWN)) {
+	 /* If the fault address is above esp but below the current known
+	    stack segment base, and it was a fault because there was
+	    nothing mapped there (as opposed to a permissions fault),
+	    then extend the stack segment. 
+	 */
+	 Addr base = PGROUNDDN(esp);
+	 Char *ret = VG_(mmap)((Char *)base, seg->addr - base, 
+			       VKI_PROT_READ | VKI_PROT_WRITE | VKI_PROT_EXEC,
+			       VKI_MAP_PRIVATE | VKI_MAP_FIXED | VKI_MAP_ANONYMOUS | VKI_MAP_CLIENT,
+			       -1, 0);
+	 if ((Addr)ret == base) {
+	    VG_(map_segment)(base, seg->addr - base,
+			     VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC,
+			     SF_STACK|SF_GROWDOWN);
+	    return;		/* restart instruction */
+	 }
+	 /* Otherwise fall into normal signal handling */
+      } else if (info->si_code == 2 && /* SEGV_ACCERR */
+		 VG_(needs).shadow_memory &&
+		 VG_(is_shadow_addr)(fault)) {
+	 /* If there's a fault within the shadow memory range, and it
+	    is a permissions fault, then it means that the client is
+	    using some memory which had not previously been used.
+	    This catches those faults, makes the memory accessible,
+	    and calls the skin to initialize that page.
+	 */
+	 static Int recursion = 0;
+
+	 if (recursion++ == 0) {
+	    VG_(init_shadow_range)(PGROUNDDN(fault), VKI_BYTES_PER_PAGE, True);
+	    recursion--;
+	    return;
+	 } else {
+	    /* otherwise fall into normal SEGV handling */	    
+	    recursion--;
+	 }
+      }
+   }
+
    if (VG_(scheduler_jmpbuf_valid)) {
       /* Can't continue; must longjmp back to the scheduler and thus
          enter the sighandler immediately. */
@@ -1661,13 +1763,26 @@
 	 it was actually generated by Valgrind internally.
        */
       struct vki_sigcontext *sc = &uc->uc_mcontext;
+      Char buf[1024];
 
       VG_(message)(Vg_DebugMsg, 
 		   "INTERNAL ERROR: Valgrind received a signal %d (%s) - exiting",
 		   sigNo, signame(sigNo));
+
+      buf[0] = 0;
+      if (1 && !VG_(get_fnname)(sc->eip, buf+2, sizeof(buf)-5)) {
+	 Int len;
+
+	 buf[0] = ' ';
+	 buf[1] = '(';
+	 len = VG_(strlen)(buf);
+	 buf[len] = ')';
+	 buf[len+1] = '\0';
+      }
+
       VG_(message)(Vg_DebugMsg, 
-		   "si_code=%x Fault EIP: %p; Faulting address: %p",
-		   info->si_code, sc->eip, info->_sifields._sigfault._addr);
+		   "si_code=%x Fault EIP: %p%s; Faulting address: %p",
+		   info->si_code, sc->eip, buf, info->_sifields._sigfault._addr);
 
       if (0)
 	 VG_(kill_self)(sigNo);		/* generate a core dump */
diff --git a/coregrind/vg_stabs.c b/coregrind/vg_stabs.c
index 3ca4687..1df65b6 100644
--- a/coregrind/vg_stabs.c
+++ b/coregrind/vg_stabs.c
@@ -300,7 +300,7 @@
    return &sf->types[sym];
 }
 
-static inline Bool isdigit(Char c, Int base, Int *v)
+static Bool isdigit(Char c, Int base, Int *v)
 {
    switch(base) {
    case 10:
diff --git a/coregrind/vg_startup.S b/coregrind/vg_startup.S
index d8bc4da..c5fe674 100644
--- a/coregrind/vg_startup.S
+++ b/coregrind/vg_startup.S
@@ -32,139 +32,9 @@
 #include "vg_constants.h"
 #include "config.h"
 
-
-#---------------------------------------------------------------------
-#
-# Startup and shutdown code for Valgrind.  Particularly hairy.
-#
-# The dynamic linker, ld.so, will run the contents of the .init
-# section, once it has located, mmap-d and and linked the shared
-# libraries needed by the program.  Valgrind is itself a shared
-# library.  ld.so then runs code in the .init sections of each
-# library in turn, in order to give them a chance to initialise
-# themselves.  We hijack this mechanism.  Our startup routine
-# does return -- and execution continues -- except on the
-# synthetic CPU, not the real one.  But ld.so, and the program
-# it is starting, cant tell the difference.
-#
-# The management apologise for the lack of apostrophes in these
-# comments.  GNU as seems to object to them, for some reason.
-
-
-.section .init
-	call VG_(startup)
-.section .fini
-	call VG_(shutdown)
-
-.section .data
-valgrind_already_initted:
-	.word	0
-	
 .section .text
 	
 
-.global VG_(startup)
-VG_(startup):
-	pushfl
-	cmpl	$0, valgrind_already_initted
-	je	really_start_up
-	popfl
-	ret
-
-really_start_up:
-	popfl
-	movl	$1, valgrind_already_initted
-	
-        # Record %esp as it was when we got here.  This is because argv/c
-	# and envp[] are passed as args to this function, and we need to see
-	# envp so we can get at the env var VG_ARGS without help from libc.
-	# The stack layout at this point depends on the version of glibc in
-	# use.  See process_cmd_line_options() in vg_main.c for details.
-        movl    %esp, VG_(esp_at_startup)
-        
-	# We have control!  Save the state of the machine in
-	# the simulators state, and switch stacks.
-	# Except ... we cant copy the machines registers into their
-	# final places in vg_baseBlock, because the offsets to them
-	# have not yet been set up.  Instead, they are copied to a
-	# temporary place (m_state_static).  In vg_main.c, once the
-	# baseBlock offsets are set up, values are copied into baseBlock.
-	movw	%cs, VG_(m_state_static)+0
-	movw	%ss, VG_(m_state_static)+4
-	movw	%ds, VG_(m_state_static)+8
-	movw	%es, VG_(m_state_static)+12
-	movw	%fs, VG_(m_state_static)+16
-	movw	%gs, VG_(m_state_static)+20
-	movl	%eax, VG_(m_state_static)+24
-	movl	%ecx, VG_(m_state_static)+28
-	movl	%edx, VG_(m_state_static)+32
-	movl	%ebx, VG_(m_state_static)+36
-	movl	%esp, VG_(m_state_static)+40
-	movl	%ebp, VG_(m_state_static)+44
-	movl	%esi, VG_(m_state_static)+48
-	movl	%edi, VG_(m_state_static)+52
-	pushfl
-	popl	%eax
-	movl	%eax, VG_(m_state_static)+56
-
-	# now weve captured all the integer registers and
-	# flags, figure out whether this is an sse-enabled
-	# cpu or not.
-	movb	$0, VG_(have_ssestate)	# assume sse-disabled
-	movl	$0, %eax
-	cpuid
-	cmpl	$1, %eax
-	jl	get_fpu		# we cant do cpuid(1) ?!
-	movl	$1, %eax
-	cpuid
-	testl	$(1<<25), %edx
-	jz	get_fpu		# edx bit 25 is set iff sse
-	# well, it looks like were sse-enabled
-	movb	$1, VG_(have_ssestate)
-
-	# next, capture the FPU/SSE state
-get_fpu:
-	fwait
-
-	pushfl
-	cmpb	$0, VG_(have_ssestate)
-	jz	qq3nosse
-	fxsave	VG_(m_state_static)+64
-	andl	$0x0000FFBF, VG_(m_state_static)+64+24
-	fxrstor	VG_(m_state_static)+64
-	jmp	qq3merge
-qq3nosse:
-	fnsave	VG_(m_state_static)+64
-	frstor	VG_(m_state_static)+64
-qq3merge:
-	popfl
-
-	# keep the first and last 10 words free to check for overruns	
-	movl	$VG_(stack)+39996 -40, %esp
-
-	# Now some real magic.  We need this procedure to return,
-	# since thats what ld.so expects, but running on the
-	# simulator.  So vg_main starts the simulator running at
-	# the insn labelled first_insn_to_simulate.
-
-	movl	$first_insn_to_simulate, VG_(m_state_static)+60
-	jmp	VG_(main)
-first_insn_to_simulate:
-	# Nothing else to do -- just return in the "normal" way.
-	ret
-
-
-
-VG_(shutdown):
-	# Just return, and ignore any attempt by ld.so to call
-	# valgrind.sos exit function.  We just run the client all
-	# the way to the final exit() syscall.  This sidesteps
-	# problems caused by ld.so calling the finalisation code
-	# of other .sos *after* it shuts down valgrind, which
-	# was causing big problems with threads.
-	ret
-
-	
 	
 .global	VG_(switch_to_real_CPU)
 VG_(switch_to_real_CPU):
diff --git a/coregrind/vg_symtab2.c b/coregrind/vg_symtab2.c
index 45a48a1..6913303 100644
--- a/coregrind/vg_symtab2.c
+++ b/coregrind/vg_symtab2.c
@@ -474,7 +474,7 @@
   cleanup_more:
  
    /* If two symbols have identical address ranges, favour the
-      one with the longer name. 
+      one with the longer name (unless the extra length is junk)
    */
    do {
       n_merged = 0;
@@ -717,6 +717,35 @@
 /*--- Read info from a .so/exe file.                       ---*/
 /*------------------------------------------------------------*/
 
+Bool VG_(is_object_file)(const void *buf)
+{
+   {
+      Elf32_Ehdr *ehdr = (Elf32_Ehdr *)buf;
+      Int ok = 1;
+
+      ok &= (ehdr->e_ident[EI_MAG0] == 0x7F
+             && ehdr->e_ident[EI_MAG1] == 'E'
+             && ehdr->e_ident[EI_MAG2] == 'L'
+             && ehdr->e_ident[EI_MAG3] == 'F');
+      ok &= (ehdr->e_ident[EI_CLASS] == ELFCLASS32
+             && ehdr->e_ident[EI_DATA] == ELFDATA2LSB
+             && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
+      ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN);
+      ok &= (ehdr->e_machine == EM_386);
+      ok &= (ehdr->e_version == EV_CURRENT);
+      ok &= (ehdr->e_shstrndx != SHN_UNDEF);
+      ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0);
+      ok &= (ehdr->e_phoff != 0 && ehdr->e_phnum != 0);
+
+      if (ok)
+	 return True;
+   }
+
+   /* other file formats here? */
+
+   return False;
+}
+
 /* Read a symbol table (normal or dynamic) */
 static
 void read_symtab( SegInfo* si, Char* tab_name,
@@ -848,7 +877,6 @@
       name = VG_(addStr) ( si, t0, -1 );
       vg_assert(name != NULL
                 /* && 0==VG_(strcmp)(t0,&vg_strtab[nmoff]) */ );
-      vg_assert( (Int)sym->st_value >= 0);
       /* VG_(printf)("%p + %d:   %p %s\n", si->start, 
                   (Int)sym->st_value, sym_addr,  t0 ); */
       risym.addr  = sym_addr;
@@ -877,7 +905,7 @@
 
    oimage = (Addr)NULL;
    if (VG_(clo_verbosity) > 1)
-      VG_(message)(Vg_UserMsg, "Reading syms from %s", si->filename );
+      VG_(message)(Vg_UserMsg, "Reading syms from %s (%p)", si->filename, si->start );
 
    /* mmap the object image aboard, so that we can read symbols and
       line number info out of it.  It will be munmapped immediately
@@ -897,7 +925,8 @@
    }
 
    oimage = (Addr)VG_(mmap)( NULL, n_oimage, 
-                             VKI_PROT_READ, VKI_MAP_PRIVATE, fd, 0 );
+                             VKI_PROT_READ, VKI_MAP_PRIVATE|VKI_MAP_NOSYMS, fd, 0 );
+
    VG_(close)(fd);
 
    if (oimage == ((Addr)(-1))) {
@@ -913,21 +942,8 @@
    ok = (n_oimage >= sizeof(Elf32_Ehdr));
    ehdr = (Elf32_Ehdr*)oimage;
 
-   if (ok) {
-      ok &= (ehdr->e_ident[EI_MAG0] == 0x7F
-             && ehdr->e_ident[EI_MAG1] == 'E'
-             && ehdr->e_ident[EI_MAG2] == 'L'
-             && ehdr->e_ident[EI_MAG3] == 'F');
-      ok &= (ehdr->e_ident[EI_CLASS] == ELFCLASS32
-             && ehdr->e_ident[EI_DATA] == ELFDATA2LSB
-             && ehdr->e_ident[EI_VERSION] == EV_CURRENT);
-      ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN);
-      ok &= (ehdr->e_machine == EM_386);
-      ok &= (ehdr->e_version == EV_CURRENT);
-      ok &= (ehdr->e_shstrndx != SHN_UNDEF);
-      ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0);
-      ok &= (ehdr->e_phoff != 0 && ehdr->e_phnum != 0);
-   }
+   if (ok)
+      ok &= VG_(is_object_file)(ehdr);
 
    if (!ok) {
       VG_(symerr)("Invalid ELF header, or missing stringtab/sectiontab.");
@@ -945,6 +961,7 @@
    {
       Bool offset_set = False;
       Elf32_Addr prev_addr = 0;
+      Addr baseaddr = 0;
 
       si->offset = 0;
 
@@ -954,12 +971,37 @@
 
 	 o_phdr = &((Elf32_Phdr *)(oimage + ehdr->e_phoff))[i];
 
+	 if (o_phdr->p_type == PT_DYNAMIC && si->soname == NULL) {
+	    const Elf32_Dyn *dyn = (const Elf32_Dyn *)(oimage + o_phdr->p_offset);
+	    Int stroff = -1;
+	    Char *strtab = NULL;
+	    Int j;
+	    
+	    for(j = 0; dyn[j].d_tag != DT_NULL; j++) {
+	       switch(dyn[j].d_tag) {
+	       case DT_SONAME:
+		  stroff =  dyn[j].d_un.d_val;
+		  break;
+
+	       case DT_STRTAB:
+		  strtab = (Char *)oimage + dyn[j].d_un.d_ptr - baseaddr;
+		  break;
+	       }
+	    }
+
+	    if (stroff != -1 && strtab != 0) {
+	       TRACE_SYMTAB("soname=%s\n", strtab+stroff);
+	       si->soname = VG_(arena_strdup)(VG_AR_SYMTAB, strtab+stroff);
+	    }
+	 }
+
 	 if (o_phdr->p_type != PT_LOAD)
 	    continue;
 
 	 if (!offset_set) {
 	    offset_set = True;
 	    si->offset = si->start - o_phdr->p_vaddr;
+	    baseaddr = o_phdr->p_vaddr;
 	 }
 
 	 if (o_phdr->p_vaddr < prev_addr) {
@@ -990,10 +1032,31 @@
 	     (mapped_end > (si->start+si->size))) {
 	    UInt newsz = mapped_end - si->start;
 	    if (newsz > si->size) {
+	       Segment *seg;
+
 	       if (0)
 		  VG_(printf)("extending mapping %p..%p %d -> ..%p %d\n", 
 			      si->start, si->start+si->size, si->size,
 			      si->start+newsz, newsz);
+
+	       for(seg = VG_(find_segment)(si->start);
+		   seg != NULL && VG_(seg_overlaps)(seg, si->start, si->size); 
+		   seg = VG_(next_segment)(seg)) {
+		  if (seg->symtab == si)
+		     continue;
+
+		  if (seg->symtab != NULL)
+		     VG_(symtab_decref)(seg->symtab, seg->addr, seg->len);
+
+		  VG_(symtab_incref)(si);
+		  seg->symtab = si;
+		  
+		  if (0)
+		     VG_(printf)("adding symtab %p (%p-%p) to segment %p (%p-%p)\n",
+				 si, si->start, si->start+newsz,
+				 seg, seg->addr, seg->addr+seg->len);
+	       }
+	       
 	       si->size = newsz;
 	    }
 	 }
@@ -1128,54 +1191,27 @@
 */
 static SegInfo* segInfo = NULL;
 
-void VG_(read_seg_symbols) ( Addr start, UInt size, 
-                             Char rr, Char ww, Char xx, 
-                             UInt foffset, UChar* filename )
+static void resolve_seg_redirs(SegInfo *si);
+
+SegInfo *VG_(read_seg_symbols) ( Segment *seg )
 {
    SegInfo* si;
 
-   /* Stay sane ... */
-   if (size == 0)
-      return;
-
-   /* We're only interested in collecting symbols in executable
-      segments which are associated with a real file.  Hence: */
-   if (filename == NULL || xx != 'x')
-      return;
-   if (0 == VG_(strcmp)(filename, "/dev/zero"))
-      return;
-   if (foffset != 0)
-      return;
+   vg_assert(seg->symtab == NULL);
 
    VGP_PUSHCC(VgpReadSyms);
 
-   /* Perhaps we already have this one?  If so, skip. */
-   for (si = segInfo; si != NULL; si = si->next) {
-      /*
-      if (0==VG_(strcmp)(si->filename, filename)) 
-         VG_(printf)("same fnames: %c%c%c (%p, %d) (%p, %d) %s\n", 
-                     rr,ww,xx,si->start,si->size,start,size,filename);
-      */
-      /* For some reason the observed size of a mapping can change, so
-         we don't use that to determine uniqueness. */
-      if (si->start == start
-          /* && si->size == size */
-          && 0==VG_(strcmp)(si->filename, filename)) 
-      {
-         VGP_POPCC(VgpReadSyms);
-         return;
-      }
-   }
-
    /* Get the record initialised right. */
    si = VG_(arena_malloc)(VG_AR_SYMTAB, sizeof(SegInfo));
 
    VG_(memset)(si, 0, sizeof(*si));
-   si->start    = start;
-   si->size     = size;
-   si->foffset  = foffset;
-   si->filename = VG_(arena_malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename));
-   VG_(strcpy)(si->filename, filename);
+   si->start    = seg->addr;
+   si->size     = seg->len;
+   si->foffset  = seg->offset;
+   si->filename = VG_(arena_malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(seg->filename));
+   VG_(strcpy)(si->filename, seg->filename);
+
+   si->ref = 1;
 
    si->symtab = NULL;
    si->symtab_size = si->symtab_used = 0;
@@ -1185,6 +1221,8 @@
    si->scopetab = NULL;
    si->scopetab_size = si->scopetab_used = 0;
 
+   si->seg = seg;
+
    si->stab_typetab = NULL;
 
    si->plt_start  = si->plt_size  = 0;
@@ -1209,28 +1247,16 @@
       canonicaliseSymtab ( si );
       canonicaliseLoctab ( si );
       canonicaliseScopetab ( si );
+
+      /* do redirects */
+      resolve_seg_redirs( si );
    }
    VGP_POPCC(VgpReadSyms);
+
+   return si;
 }
 
 
-/* This one really is the Head Honcho.  Update the symbol tables to
-   reflect the current state of /proc/self/maps.  Rather than re-read
-   everything, just read the entries which are not already in segInfo.
-   So we can call here repeatedly, after every mmap of a non-anonymous
-   segment with execute permissions, for example, to pick up new
-   libraries as they are dlopen'd.  Conversely, when the client does
-   munmap(), vg_symtab_notify_munmap() throws away any symbol tables
-   which happen to correspond to the munmap()d area.  */
-void VG_(read_all_symbols) ( void )
-{
-   /* 9 July 2003: In order to work around PLT bypassing in
-      glibc-2.3.2 (see below VG_(setup_code_redirect_table)), we need
-      to load debug info regardless of the skin, unfortunately.  */
-   VG_(read_procselfmaps)  ( );
-   VG_(parse_procselfmaps) ( VG_(read_seg_symbols) );
-}
-
 /* When an munmap() call happens, check to see whether it corresponds
    to a segment for a .so, and if so discard the relevant SegInfo.
    This might not be a very clever idea from the point of view of
@@ -1256,8 +1282,8 @@
 
    if (VG_(clo_verbosity) > 1)
       VG_(message)(Vg_UserMsg, 
-                   "discard syms in %s due to munmap()", 
-                   curr->filename ? curr->filename : (Char *)"???");
+                   "discard syms at %p-%p in %s due to munmap()", 
+                   start, start+length, curr->filename ? curr->filename : (Char *)"???");
 
    vg_assert(prev == NULL || prev->next == curr);
 
@@ -1271,6 +1297,18 @@
    return;
 }
 
+void VG_(symtab_decref)(SegInfo *si, Addr start, UInt len)
+{
+   vg_assert(si->ref >= 1);
+   if (--si->ref == 0)
+      VG_(unload_symbols)(si->start, si->size);
+}
+
+void VG_(symtab_incref)(SegInfo *si)
+{
+   vg_assert(si->ref > 0);
+   si->ref++;
+}
 
 /*------------------------------------------------------------*/
 /*--- Use of symbol table & location info to create        ---*/
@@ -1310,8 +1348,8 @@
    table is designed we have no option but to do a complete linear
    scan of the table.  Returns NULL if not found. */
 
-static Addr reverse_search_one_symtab ( SegInfo* si,
-                                        Char* name )
+static Addr reverse_search_one_symtab ( const SegInfo* si,
+                                        const Char* name )
 {
    UInt i;
    for (i = 0; i < si->symtab_used; i++) {
@@ -1334,19 +1372,25 @@
 {
    Int      sno;
    SegInfo* si;
+   Segment *s;
 
    VGP_PUSHCC(VgpSearchSyms);
+
+   s = VG_(find_segment)(ptr);
+
+   if (s == NULL || !VG_(seg_overlaps)(s, ptr, 0) || s->symtab == NULL)
+      goto not_found;
    
-   for (si = segInfo; si != NULL; si = si->next) {
-      if (si->start <= ptr && ptr < si->start+si->size) {
-         sno = search_one_symtab ( si, ptr, match_anywhere_in_fun );
-         if (sno == -1) goto not_found;
-         *symno = sno;
-         *psi = si;
-         VGP_POPCC(VgpSearchSyms);
-         return;
-      }
-   }
+   si = s->symtab;
+
+   sno = search_one_symtab ( si, ptr, match_anywhere_in_fun );
+   if (sno == -1) goto not_found;
+   
+   *symno = sno;
+   *psi = si;
+   VGP_POPCC(VgpSearchSyms);
+   return;
+
   not_found:
    *psi = NULL;
    VGP_POPCC(VgpSearchSyms);
@@ -1864,84 +1908,259 @@
 
 
 /*------------------------------------------------------------*/
-/*--- Find interesting glibc entry points.                 ---*/
+/*--- General purpose redirection.                         ---*/
 /*------------------------------------------------------------*/
 
-CodeRedirect VG_(code_redirect_table)[VG_N_CODE_REDIRECTS];
+/* resolved redirections, indexed by from_addr */
+typedef struct _CodeRedirect {
+   const Char	*from_lib;	/* library qualifier pattern */
+   const Char	*from_sym;	/* symbol */
+   Addr		from_addr;	/* old addr */
 
-Int VG_(setup_code_redirect_table) ( void )
+   const Char	*to_lib;	/* library qualifier pattern */
+   const Char	*to_sym;	/* symbol */
+   Addr		to_addr;	/* new addr */
+
+   struct _CodeRedirect *next;	/* next pointer on unresolved list */
+} CodeRedirect;
+
+static Int addrcmp(const void *ap, const void *bp)
 {
-#  define N_SUBSTS 6
+   Addr a = *(Addr *)ap;
+   Addr b = *(Addr *)bp;
+   Int ret;
 
-   Int     i, j;
-   Addr    a_libc, a_pth;
-   SegInfo *si, *si_libc, *si_pth;
+   if (a == b)
+      ret = 0;
+   else
+      ret = (a < b) ? -1 : 1;
 
-   /* Original entry points to look for in libc. */
-   static Char* libc_names[N_SUBSTS]
-     = { "__GI___errno_location"
-       , "__errno_location"
-       , "__GI___h_errno_location"
-       , "__h_errno_location"
-       , "__GI___res_state" 
-       , "__res_state"
-       };
+   return ret;
+}
 
-   /* Corresponding substitute address in our pthread lib. */
-   static Char* pth_names[N_SUBSTS]
-     = { "__errno_location"
-       , "__errno_location"
-       , "__h_errno_location"
-       , "__h_errno_location"
-       , "__res_state" 
-       , "__res_state"
-       };
+static Char *straddr(void *p)
+{
+   static Char buf[16];
 
-   /* Look for the SegInfo for glibc and our pthread library. */
+   VG_(sprintf)(buf, "%p", *(Addr *)p);
 
-   si_libc = si_pth = NULL;
+   return buf;
+}
 
-   for (si = segInfo; si != NULL; si = si->next) {
-      if (VG_(strstr)(si->filename, "/libc-2.2.93.so")
-          || VG_(strstr)(si->filename, "/libc-2.3.1.so")
-          || VG_(strstr)(si->filename, "/libc-2.3.2.so")
-          || VG_(strstr)(si->filename, "/libc.so"))
-         si_libc = si;
-      if (VG_(strstr)(si->filename, "/libpthread.so"))
-         si_pth = si;
+static SkipList sk_resolved_redir = SKIPLIST_INIT(CodeRedirect, from_addr, 
+						  addrcmp, straddr, VG_AR_SYMTAB);
+static CodeRedirect *unresolved_redir = NULL;
+
+static Bool match_lib(const Char *pattern, const SegInfo *si)
+{
+   /* pattern == NULL matches everything, otherwise use globbing
+
+      If the pattern starts with:
+	file:, then match filename
+	soname:, then match soname
+	something else, match filename
+   */
+   const Char *name = si->filename;
+
+   if (pattern == NULL)
+      return True;
+
+   if (VG_(strncmp)(pattern, "file:", 5) == 0) {
+      pattern += 5;
+      name = si->filename;
+   }
+   if (VG_(strncmp)(pattern, "soname:", 7) == 0) {
+      pattern += 7;
+      name = si->soname;
    }
 
-   if (si_libc == NULL || si_pth == NULL) 
-      return 0;
+   if (name == NULL)
+      return False;
+   
+   return VG_(string_match)(pattern, name);
+}
 
-   /* Build the substitution table. */
-   vg_assert(N_SUBSTS <= VG_N_CODE_REDIRECTS-1);
+/* Resolve a redir using si if possible, and add it to the resolved
+   list */
+static Bool resolve_redir(CodeRedirect *redir, const SegInfo *si)
+{
+   Bool resolved;
+   static const Bool verbose = False;
 
-   j = 0;
-   VG_(code_redirect_table)[j].entry_pt_orig = 0; 
+   vg_assert(si != NULL);
+   vg_assert(si->seg != NULL);
 
-   for (i = 0; i < N_SUBSTS; i++) {
-      a_libc = reverse_search_one_symtab(si_libc, libc_names[i]);
-      a_pth  = reverse_search_one_symtab(si_pth,  pth_names[i]);
-      if (a_libc == 0 || a_pth == 0)
-         continue;
-      /* We've found a substitution pair. */
-      VG_(code_redirect_table)[j].entry_pt_orig  = a_libc;
-      VG_(code_redirect_table)[j].entry_pt_subst = a_pth;
-      j++;
-      vg_assert(j < VG_N_CODE_REDIRECTS);
-      /* Set end marker. */
-      VG_(code_redirect_table)[j].entry_pt_orig = 0; 
+   /* no redirection from Valgrind segments */
+   if (si->seg->flags & SF_VALGRIND)
+      return False;
+
+   resolved = (redir->from_addr != 0) && (redir->to_addr != 0);
+
+   if (verbose)
+      VG_(printf)("trying to resolve %s:%s / %s:%s against %s:%s\n",
+		  redir->from_lib, redir->from_sym,
+		  redir->to_lib, redir->to_sym,
+		  si->filename, si->soname);
+
+   vg_assert(!resolved);
+
+   if (redir->from_addr == 0) {
+      vg_assert(redir->from_sym != NULL);
+
+      if (match_lib(redir->from_lib, si)) {
+	 redir->from_addr = reverse_search_one_symtab(si, redir->from_sym);
+	 if (verbose)
+	    VG_(printf)("match lib %s passed; from_addr=%p\n", 
+			redir->from_lib, redir->from_addr);
+      }
+   }
+
+   if (redir->to_addr == 0) {
+      vg_assert(redir->to_sym != NULL);
+
+      if (match_lib(redir->to_lib, si)) {
+	 redir->to_addr = reverse_search_one_symtab(si, redir->to_sym);
+	 if (verbose)
+	    VG_(printf)("match lib %s passed; to_addr=%p\n", 
+			redir->to_lib, redir->to_addr);
+      }
+   }
+
+   resolved = (redir->from_addr != 0) && (redir->to_addr != 0);
+
+   if (verbose)
+      VG_(printf)("resolve_redir: %s:%s from=%p %s:%s to=%p\n",
+		  redir->from_lib, redir->from_sym, redir->from_addr, 
+		  redir->to_lib, redir->to_sym, redir->to_addr);
+
+   if (resolved) {
+      if (VG_(clo_verbosity) > 2 || verbose) {
+	 VG_(message)(Vg_DebugMsg, "redir resolved (%s:%s=%p -> ",
+		      redir->from_lib, redir->from_sym, redir->from_addr);
+	 VG_(message)(Vg_DebugMsg, "                %s:%s=%p)",
+		      redir->to_lib, redir->to_sym, redir->to_addr);
+      }
+      
+      if (VG_(search_transtab)(redir->from_addr) != 0)
+	 VG_(message)(Vg_DebugMsg, "!!!! adding redirect to already called function %s (%p -> %p)!!!",
+		      redir->from_sym, redir->from_addr, redir->to_addr);
+
+      VG_(SkipList_Insert)(&sk_resolved_redir, redir);
+   }
+
+   return resolved;
+}
+
+/* Go through the complete redir list, resolving as much as possible with this SegInfo.
+
+    This should be called when a new SegInfo symtab is loaded.
+ */
+static void resolve_seg_redirs(SegInfo *si)
+{
+   CodeRedirect **prevp = &unresolved_redir;
+   CodeRedirect *redir, *next;
+
+   /* visit each unresolved redir - if it becomes resolved, then
+      remove it from the unresolved list */
+   for(redir = unresolved_redir; redir != NULL; redir = next) {
+      next = redir->next;
+
+      if (resolve_redir(redir, si)) {
+	 *prevp = next;
+	 redir->next = NULL;
+      } else
+	 prevp = &redir->next;
+   }
+}
+
+static Bool resolve_redir_allsegs(CodeRedirect *redir)
+{
+   SegInfo *si;
+
+   for(si = segInfo; si != NULL; si = si->next)
+      if (resolve_redir(redir, si))
+	 return True;
+
+   return False;
+}
+
+/* Redirect a lib/symbol reference to a function at lib/symbol */
+void VG_(add_redirect_sym)(const Char *from_lib, const Char *from_sym,
+			   const Char *to_lib, const Char *to_sym)
+{
+   CodeRedirect *redir = VG_(SkipNode_Alloc)(&sk_resolved_redir);
+
+   redir->from_lib = VG_(arena_strdup)(VG_AR_SYMTAB, from_lib);
+   redir->from_sym = VG_(arena_strdup)(VG_AR_SYMTAB, from_sym);
+   redir->from_addr = 0;
+
+   redir->to_lib = VG_(arena_strdup)(VG_AR_SYMTAB, to_lib);
+   redir->to_sym = VG_(arena_strdup)(VG_AR_SYMTAB, to_sym);
+   redir->to_addr = 0;
+
+   if (!resolve_redir_allsegs(redir)) {
+      /* can't resolve immediately; add to list */
+      redir->next = unresolved_redir;
+      unresolved_redir = redir;
+   }
+}
+
+/* Redirect a lib/symbol reference to a function at lib/symbol */
+void VG_(add_redirect_addr)(const Char *from_lib, const Char *from_sym,
+			    Addr to_addr)
+{
+   CodeRedirect *redir = VG_(SkipNode_Alloc)(&sk_resolved_redir);
+
+   redir->from_lib = VG_(arena_strdup)(VG_AR_SYMTAB, from_lib);
+   redir->from_sym = VG_(arena_strdup)(VG_AR_SYMTAB, from_sym);
+   redir->from_addr = 0;
+
+   redir->to_lib = NULL;
+   redir->to_sym = NULL;
+   redir->to_addr = to_addr;
+
+   if (!resolve_redir_allsegs(redir)) {
+      /* can't resolve immediately; add to list */
+      redir->next = unresolved_redir;
+      unresolved_redir = redir;
+   }
+}
+
+Addr VG_(code_redirect)(Addr a)
+{
+   CodeRedirect *r = VG_(SkipList_Find)(&sk_resolved_redir, &a);
+
+   if (r == NULL || r->from_addr != a)
+      return a;
+
+   vg_assert(r->to_addr != 0);
+
+   return r->to_addr;
+}
+
+void VG_(setup_code_redirect_table) ( void )
+{
+   static const struct {
+      const Char *from, *to;
+   } redirects[] = {
+      { "__GI___errno_location",	"__errno_location"	},
+      { "__errno_location",		"__errno_location"	},
+      { "__GI___h_errno_location",	"__h_errno_location"	},
+      { "__h_errno_location",		"__h_errno_location"	},
+      { "__GI___res_state",		"__res_state"		},
+      { "__res_state",			"__res_state"		},
+   };
+   Int i;
+
+   for(i = 0; i < sizeof(redirects)/sizeof(*redirects); i++) {
+      VG_(add_redirect_sym)("soname:libc.so.6",		redirects[i].from,
+			    "soname:libpthread.so.0",	redirects[i].to);
+
       if (VG_(clo_verbosity) >= 2)
          VG_(message)(Vg_UserMsg, 
-            "REPLACING libc(%s) with libpthread(%s)",
-            libc_names[i], pth_names[i]
-         );
+		      "REPLACING libc(%s) with libpthread(%s)",
+		      redirects[i].from, redirects[i].to);
    }
-
-   return j;
-
-#  undef N_SUBSTS
 }
 
 /*------------------------------------------------------------*/
diff --git a/coregrind/vg_symtab2.h b/coregrind/vg_symtab2.h
index 36f2c57..178a9d0 100644
--- a/coregrind/vg_symtab2.h
+++ b/coregrind/vg_symtab2.h
@@ -120,12 +120,18 @@
 /* A structure which contains information pertaining to one mapped
    text segment. (typedef in vg_skin.h) */
 struct _SegInfo {
-   struct _SegInfo* next;
+   struct _SegInfo* next;	/* list of SegInfos */
+
+   Segment	*seg;		/* first segment we're mapped out of */
+   Int		ref;
+
    /* Description of the mapped segment. */
    Addr   start;
    UInt   size;
    Char*  filename; /* in mallocville */
    UInt   foffset;
+   Char*  soname;
+
    /* An expandable array of symbols. */
    RiSym* symtab;
    UInt   symtab_used;
diff --git a/coregrind/vg_syscalls.c b/coregrind/vg_syscalls.c
index 0d16dc1..91f84f7 100644
--- a/coregrind/vg_syscalls.c
+++ b/coregrind/vg_syscalls.c
@@ -137,18 +137,30 @@
 static 
 void mash_addr_and_len( Addr* a, UInt* len)
 {
-   while (( *a         % VKI_BYTES_PER_PAGE) > 0) { (*a)--; (*len)++; }
-   while (((*a + *len) % VKI_BYTES_PER_PAGE) > 0) {         (*len)++; }
+   Addr ra;
+   
+   ra = PGROUNDDN(*a);
+   *len = PGROUNDUP(*a + *len) - ra;
+   *a = ra;
 }
 
 static
-void mmap_segment ( Addr a, UInt len, UInt prot, Int fd )
+void mmap_segment ( Addr a, UInt len, UInt prot, UInt mm_flags, Int fd, ULong offset )
 {
    Bool rr, ww, xx;
+   UInt flags;
 
-   /* Records segment, reads debug symbols if necessary */
-   if ((prot & PROT_EXEC) && fd != -1)
-      VG_(new_exeseg_mmap) ( a, len );
+   flags = SF_MMAP;
+   
+   if (mm_flags & VKI_MAP_FIXED)
+      flags |= SF_FIXED;
+   if (!(mm_flags & VKI_MAP_PRIVATE))
+      flags |= SF_SHARED;
+
+   if (fd != -1)
+      flags |= SF_FILE;
+
+   VG_(map_fd_segment)(a, len, prot, flags, fd, offset, NULL);
 
    rr = prot & PROT_READ;
    ww = prot & PROT_WRITE;
@@ -164,19 +176,15 @@
       Addr orig_len = len; */
 
    mash_addr_and_len(&a, &len);
+
+   VG_(unmap_range)(a, len);
+
    /*
    VG_(printf)("MUNMAP: correct (%p for %d) to (%p for %d) %s\n", 
       orig_a, orig_len, a, len, (orig_a!=start || orig_len!=length) 
                                     ? "CHANGE" : "");
    */
 
-   /* Invalidate translations as necessary (also discarding any basic
-      block-specific info retained by the skin) and unload any debug
-      symbols. */
-   // This doesn't handle partial unmapping of exe segs correctly, if that
-   // ever happens...
-   VG_(remove_if_exeseg) ( a, len );
-
    VG_TRACK( die_mem_munmap, a, len );
 }
 
@@ -185,6 +193,8 @@
 {
    Bool rr, ww, xx;
 
+   VG_(mprotect_range)(a, len, prot);
+
    rr = prot & PROT_READ;
    ww = prot & PROT_WRITE;
    xx = prot & PROT_EXEC;
@@ -280,8 +290,7 @@
    doesn't exist, we just return NULL.  Otherwise, we return a pointer
    to the file name, which the caller is responsible for freeing. */
 
-static
-Char *resolve_fname(Int fd)
+Char *VG_(resolve_filename)(Int fd)
 {
    char tmp[28], buf[PATH_MAX];
 
@@ -535,7 +544,7 @@
 
          if(fno != f)
             if(VG_(clo_track_fds))
-               record_fd_open(-1, fno, resolve_fname(fno));
+               record_fd_open(-1, fno, VG_(resolve_filename)(fno));
       }
 
       VG_(lseek)(f, d.d_off, VKI_SEEK_SET);
@@ -643,7 +652,7 @@
 
          for (i = 0; i < fdc; i++)
             if(VG_(clo_track_fds))
-               record_fd_open (tid, fds[i], resolve_fname(fds[i]));
+               record_fd_open (tid, fds[i], VG_(resolve_filename)(fds[i]));
       }
 
       cm = CMSG_NXTHDR(msg, cm);
@@ -739,11 +748,11 @@
 void buf_and_len_pre_check( ThreadId tid, Addr buf_p, Addr buflen_p,
                             Char* buf_s, Char* buflen_s )
 {
-   if (VG_(track_events).pre_mem_write) {
+   if (VG_(defined_pre_mem_write)()) {
       UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
       if (buflen_in > 0) {
-         VG_(track_events).pre_mem_write ( Vg_CoreSysCall,
-					   tid, buf_s, buf_p, buflen_in );
+         SK_(pre_mem_write) ( Vg_CoreSysCall,
+			      tid, buf_s, buf_p, buflen_in );
       }
    }
 }
@@ -752,10 +761,10 @@
 void buf_and_len_post_check( ThreadId tid, Int res,
                              Addr buf_p, Addr buflen_p, Char* s )
 {
-   if (!VG_(is_kerror)(res) && VG_(track_events).post_mem_write) {
+   if (!VG_(is_kerror)(res) && VG_(defined_post_mem_write)()) {
       UInt buflen_out = deref_UInt( tid, buflen_p, s);
       if (buflen_out > 0 && buf_p != (Addr)NULL) {
-         VG_(track_events).post_mem_write ( buf_p, buflen_out );
+         SK_(post_mem_write) ( buf_p, buflen_out );
       }
    }
 }
@@ -764,18 +773,110 @@
    Data seg end, for brk()
    ------------------------------------------------------------------ */
 
-/* Records the current end of the data segment so we can make sense of
-   calls to brk(). */
-static
-Addr curr_dataseg_end;
-
-void VG_(init_dataseg_end_for_brk) ( void )
+static Addr do_brk(Addr newbrk)
 {
-   curr_dataseg_end = (Addr)VG_(brk)(0);
-   if (curr_dataseg_end == (Addr)(-1))
-      VG_(core_panic)("can't determine data-seg end for brk()");
+   Addr ret = VG_(brk_limit);
+   static const Bool debug = False;
+   Segment *seg;
+
+   if (debug)
+      VG_(printf)("do_brk: brk_base=%p brk_limit=%p newbrk=%p\n",
+		  VG_(brk_base), VG_(brk_limit), newbrk);
+
+   if (newbrk < VG_(brk_base) || newbrk >= VG_(client_end))
+      return VG_(brk_limit);
+
+   /* brk isn't allowed to grow over anything else */
+   seg = VG_(find_segment)(VG_(brk_limit));
+
+   vg_assert(seg != NULL);
+
    if (0)
-      VG_(printf)("DS END is %p\n", (void*)curr_dataseg_end);
+      VG_(printf)("brk_limit=%p seg->addr=%p seg->end=%p\n", 
+		  VG_(brk_limit), seg->addr, seg->addr+seg->len);
+   vg_assert(VG_(brk_limit) >= seg->addr && VG_(brk_limit) <= (seg->addr + seg->len));
+
+   seg = VG_(next_segment)(seg);
+   if (seg != NULL && newbrk > seg->addr)
+      return VG_(brk_limit);
+
+   if (PGROUNDDN(newbrk) != PGROUNDDN(VG_(brk_limit))) {
+      Addr current = PGROUNDUP(VG_(brk_limit));
+      Addr newaddr = PGROUNDUP(newbrk);
+
+      /* new brk in a new page - fix the mappings */
+      if (newbrk > VG_(brk_limit)) {
+	 
+	 if (debug)
+	    VG_(printf)("  extending brk: current=%p newaddr=%p delta=%d\n",
+			current, newaddr, newaddr-current);
+
+	 if (newaddr == current) {
+	    ret = newbrk;
+	 } else if (VG_(mmap)((void *)current , newaddr-current,
+			      VKI_PROT_READ | VKI_PROT_WRITE | VKI_PROT_EXEC,
+			      VKI_MAP_PRIVATE | VKI_MAP_ANONYMOUS | VKI_MAP_FIXED | VKI_MAP_CLIENT,
+			      -1, 0) >= 0) {
+	    VG_(map_segment)(current, newaddr-current, VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC,
+			     SF_FIXED|SF_BRK);
+	    ret = newbrk;
+	 }
+      } else {
+	 vg_assert(newbrk < VG_(brk_limit));
+
+	 if (debug)
+	    VG_(printf)("  shrinking brk: current=%p newaddr=%p delta=%d\n",
+			current, newaddr, current-newaddr);
+
+	 if (newaddr != current) {
+	    VG_(munmap)((void *)newaddr, current - newaddr);
+	    VG_(unmap_range)(newaddr, current-newaddr);
+	 }
+	 ret = newbrk;
+      }
+   } else
+      ret = newbrk;
+
+   VG_(brk_limit) = ret;
+
+   return ret;
+}
+
+
+/* return true if address range entirely contained within client
+   address space */
+static Bool valid_client_addr(Addr start, UInt size, ThreadId tid, const Char *syscall)
+{
+   Addr end = start+size;
+   Addr cl_base = VG_(client_base);
+   Bool ret;
+
+   if (size == 0)
+      return True;
+
+   if (cl_base < 0x10000)
+      cl_base = 0x10000;
+
+   ret =
+      (end >= start) && 
+      start >= cl_base && start < VG_(client_end) &&
+      (end <= VG_(client_end));
+
+   if (0)
+      VG_(printf)("%s: test=%p-%p client=%p-%p ret=%d\n",
+		  syscall, start, end, cl_base, VG_(client_end), ret);
+
+   if (!ret && syscall != NULL) {
+      VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried to modify addresses %p-%p",
+		   syscall, start, end);
+
+      if (VG_(clo_verbosity) > 1) {
+	 ExeContext *ec = VG_(get_ExeContext)(tid);
+	 VG_(pp_ExeContext)(ec);
+      }
+   }
+
+   return ret;
 }
 
 /* ---------------------------------------------------------------------
@@ -813,7 +914,6 @@
 #define POST(x)	\
 	static void after_##x(ThreadId tid, ThreadState *tst)
 
-#define STR(x)	#x
 #define PREALIAS(new, old)	\
 	PRE(new) __attribute__((alias(STR(before_##old))))
 #define POSTALIAS(new, old)	\
@@ -1563,21 +1663,107 @@
       its own new thread.) */
    VG_(nuke_all_threads_except)( VG_INVALID_THREADID );
 
-   /* Make any binding for LD_PRELOAD disappear, so that child
-      processes don't get traced into. */
    if (!VG_(clo_trace_children)) {
+      /* Make the LD_LIBRARY_PATH/LD_PRELOAD disappear so that the
+	 child doesn't get our libpthread and other stuff */
       Int i;
       Char** envp = (Char**)arg3;
       Char*  ld_preload_str = NULL;
       Char*  ld_library_path_str = NULL;
-      for (i = 0; envp[i] != NULL; i++) {
-	 if (VG_(strncmp)(envp[i], "LD_PRELOAD=", 11) == 0)
-	    ld_preload_str = &envp[i][11];
-	 if (VG_(strncmp)(envp[i], "LD_LIBRARY_PATH=", 16) == 0)
-	    ld_library_path_str = &envp[i][16];
+
+      if (envp != NULL) {
+	 Char *buf;
+
+	 for (i = 0; envp[i] != NULL; i++) {
+	    if (VG_(strncmp)(envp[i], "LD_PRELOAD=", 11) == 0)
+	       ld_preload_str = &envp[i][11];
+	    if (VG_(strncmp)(envp[i], "LD_LIBRARY_PATH=", 16) == 0)
+	       ld_library_path_str = &envp[i][16];
+	 }
+
+	 buf = VG_(arena_malloc)(VG_AR_CORE, VG_(strlen)(VG_(libdir)) + 20);
+
+	 VG_(sprintf)(buf, "%s*/vg_inject.so", VG_(libdir));
+	 VG_(mash_colon_env)(ld_preload_str, buf);
+
+	 VG_(sprintf)(buf, "%s*", VG_(libdir));
+	 VG_(mash_colon_env)(ld_library_path_str, buf);
+
+	 VG_(env_unsetenv)(envp, VALGRINDCLO);
+
+	 /* XXX if variable becomes empty, remove it completely? */
       }
-      VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH)(
-	 ld_preload_str, ld_library_path_str );
+   } else {
+      /* If we're tracing the children, then we need to start it
+	 with our starter+arguments.
+      */
+      Int i;
+      Char *exec = (Char *)arg1;
+      Char **env = (Char **)arg3;
+      Char *cp;
+      Char *exename;
+      Bool sawexec = False;
+      Char *optvar;
+      Int  optlen;
+
+      optlen = 1;
+      for(i = 0; i < VG_(vg_argc); i++)
+	 optlen += VG_(strlen)(VG_(vg_argv)[i]) + 1;
+
+      /* All these are leaked - we're either going to exec, or panic
+	 when we fail. */
+      exename  = VG_(arena_malloc)(VG_AR_CORE, 64);
+      exec = VG_(arena_malloc)(VG_AR_CORE, VG_(strlen)(exec) + 7 /* --exec= */ + 1 /* \0 */);
+
+      VG_(sprintf)(exec, "--exec=%s", (Char *)arg1);
+      VG_(sprintf)(exename, "/proc/self/fd/%d", VG_(execfd));
+
+      optlen += VG_(strlen)(exec)+1;
+
+      optvar = VG_(arena_malloc)(VG_AR_CORE, optlen);
+
+      /* valgrind arguments */
+      cp = optvar;
+      
+      for(i = 1; i < VG_(vg_argc); i++) {
+	 Char *arg = VG_(vg_argv)[i];
+	 Int len;
+	 
+	 if (VG_(memcmp)(arg, "--exec=", 7) == 0) {
+	    /* replace existing --exec= arg */
+	    sawexec = True;
+	    arg = exec;
+	 } else if (VG_(strcmp)(VG_(vg_argv)[i], "--") == 0)
+	    break;
+
+	 len = VG_(strlen)(arg);
+	 VG_(memcpy)(cp, arg, len);
+	 cp += len;
+	 *cp++ = '\01';
+      }
+
+      if (!sawexec) {
+	 Int execlen = VG_(strlen)(exec);
+	 VG_(memcpy)(cp, exec, execlen);
+	 cp += execlen;
+	 *cp++ = '\01';
+      }
+      *cp = '\0';
+
+      VG_(env_setenv)(&env, VALGRINDCLO, optvar);
+
+      arg1 = (UInt)exename;
+      arg3 = (UInt)env;
+   }
+
+   if (0) {
+      Char **cpp;
+
+      VG_(printf)("exec: %s\n", (Char *)arg1);
+      for(cpp = (Char **)arg2; cpp && *cpp; cpp++)
+	 VG_(printf)("argv: %s\n", *cpp);
+      for(cpp = (Char **)arg3; cpp && *cpp; cpp++)
+	 VG_(printf)("env: %s\n", *cpp);
    }
 
    res = VG_(do_syscall)(__NR_execve, arg1, arg2, arg3);
@@ -1605,6 +1791,8 @@
 
 PRE(brk)
 {
+   Addr brk_limit = VG_(brk_limit);
+
    /* libc   says: int   brk(void *end_data_segment);
       kernel says: void* brk(void* end_data_segment);  (more or less)
 
@@ -1621,28 +1809,26 @@
       Both will seg fault if you shrink it back into a text segment.
    */
    MAYBE_PRINTF("brk ( %p ) --> ",arg1);
-}
 
-POST(brk)
-{
+   res = do_brk(arg1);
+
    MAYBE_PRINTF("0x%x\n", res);
 
    if (res == arg1) {
       /* brk() succeeded */
-      if (res < curr_dataseg_end) {
+      if (res < brk_limit) {
          /* successfully shrunk the data segment. */
          VG_TRACK( die_mem_brk, (Addr)arg1,
-		   curr_dataseg_end-arg1 );
+		   brk_limit-arg1 );
       } else
-      if (res > curr_dataseg_end && res != 0) {
+      if (res > brk_limit) {
          /* successfully grew the data segment */
-         VG_TRACK( new_mem_brk, curr_dataseg_end,
-                                arg1-curr_dataseg_end );
+         VG_TRACK( new_mem_brk, brk_limit,
+                                arg1-brk_limit );
       }
-      curr_dataseg_end = res;
    } else {
       /* brk() failed */
-      vg_assert(curr_dataseg_end == res);
+      vg_assert(brk_limit == res);
    }
 }
 
@@ -1699,7 +1885,7 @@
       res = -VKI_EMFILE;
    } else {
       if(VG_(clo_track_fds))
-         record_fd_open(tid, res, resolve_fname(res));
+         record_fd_open(tid, res, VG_(resolve_filename)(res));
    }
 }
 
@@ -1717,7 +1903,7 @@
 		VG_(getpid)(), 
 		arg1, arg2, res);
    if(VG_(clo_track_fds))
-      record_fd_open(tid, res, resolve_fname(res));
+      record_fd_open(tid, res, VG_(resolve_filename)(res));
 }
 
 PRE(fcntl)
@@ -1730,7 +1916,7 @@
 {
    if (arg2 == VKI_F_DUPFD)
       if(VG_(clo_track_fds))
-         record_fd_open(tid, res, resolve_fname(res));
+         record_fd_open(tid, res, VG_(resolve_filename)(res));
 }
 
 PRE(fchdir)
@@ -1763,7 +1949,7 @@
 {
    if (arg2 == VKI_F_DUPFD)
       if(VG_(clo_track_fds))
-         record_fd_open(tid, res, resolve_fname(res));
+         record_fd_open(tid, res, VG_(resolve_filename)(res));
 }
 
 PRE(fstat)
@@ -2176,6 +2362,15 @@
    }
    case 21: /* IPCOP_shmat */
    {
+      UInt shmid = arg2;
+      UInt segmentSize = get_shm_size ( shmid );
+      
+      /* If they didn't ask for a particular address, then place it
+	 like an mmap. */
+      if (arg5 == 0)
+	 arg5 = VG_(find_map_space)(0, segmentSize, True);
+      else if (!valid_client_addr(arg5, segmentSize, tid, "shmat"))
+	 res = -VKI_EINVAL;
       break;
    }
    case 22: /* IPCOP_shmdt */
@@ -2287,10 +2482,9 @@
    case 21: /* IPCOP_shmat */
    {
       Int shmid = arg2;
-      /*Int shmflag = arg3;*/
+      Int shmflag = arg3;
       Addr addr;
 
-                  
       /* force readability. before the syscall it is
        * indeed uninitialized, as can be seen in
        * glibc/sysdeps/unix/sysv/linux/shmat.c */
@@ -2300,21 +2494,36 @@
       if ( addr > 0 ) { 
 	 UInt segmentSize = get_shm_size ( shmid );
 	 if ( segmentSize > 0 ) {
+	    UInt prot;
 	    /* we don't distinguish whether it's read-only or
 	     * read-write -- it doesn't matter really. */
 	    VG_TRACK( new_mem_mmap, addr, segmentSize, 
 		      True, True, False );
+
+	    prot = VKI_PROT_READ|VKI_PROT_WRITE;
+	    if (!(shmflag & 010000)) /* = SHM_RDONLY */
+	       prot &= ~VKI_PROT_WRITE;
+	    VG_(map_segment)(addr, segmentSize, prot,
+			     SF_SHARED | SF_SHM);
 	 }
       }
       break;
    }
    case 22: /* IPCOP_shmdt */
+   {
       /* ### FIXME: this should call make_noaccess on the
        * area passed to shmdt. But there's no way to
        * figure out the size of the shared memory segment
        * just from the address...  Maybe we want to keep a
        * copy of the exiting mappings inside valgrind? */
+      Segment *s = VG_(find_segment)(arg1);
+
+      if (s->addr == arg1 && (s->flags & SF_SHM)) {
+	 VG_TRACK( die_mem_munmap, s->addr, s->len );
+	 VG_(unmap_range)(s->addr, s->len);
+      }
       break;
+   }
    case 23: /* IPCOP_shmget */
       break;
    case 24: /* IPCOP_shmctl */
@@ -3173,7 +3382,7 @@
 PRE(lstat)
 {
    /* int lstat(const char *file_name, struct stat *buf); */
-   MAYBE_PRINTF("lstat ( %p, %p )\n",arg1,arg2);
+   MAYBE_PRINTF("lstat ( %p \"%s\", %p )\n",arg1,arg1,arg2);
    SYSCALL_TRACK( pre_mem_read_asciiz, tid, "lstat(file_name)", arg1 );
    SYSCALL_TRACK( pre_mem_write, tid, "lstat(buf)", arg2, 
 		  sizeof(struct stat) );
@@ -3189,7 +3398,7 @@
 PRE(lstat64)
 {
    /* int lstat64(const char *file_name, struct stat64 *buf); */
-   MAYBE_PRINTF("lstat64 ( %p, %p )\n",arg1,arg2);
+   MAYBE_PRINTF("lstat64 ( %p \"%s\", %p )\n",arg1,arg1,arg2);
    SYSCALL_TRACK( pre_mem_read_asciiz, tid, "lstat64(file_name)", arg1 );
    SYSCALL_TRACK( pre_mem_write, tid, "lstat64(buf)", arg2, 
 		  sizeof(struct stat64) );
@@ -3209,31 +3418,40 @@
    SYSCALL_TRACK( pre_mem_read_asciiz, tid, "mkdir(pathname)", arg1 );
 }
 
-void check_mmap_start(ThreadState* tst, Addr start, Int flags)
-{
-   /* Refuse to mmap the first 64KB of memory, so that the cheap sanity test 
-      for tools using shadow memory works. */
-   if (start < 65536 && (flags & VKI_MAP_FIXED))
-      tst->m_eax = -VKI_EINVAL;
-}
-
 PRE(mmap2)
 {
    /* My impression is that this is exactly like __NR_mmap 
       except that all 6 args are passed in regs, rather than in 
-      a memory-block. */
+      a memory-block.
+      
+      Almost.  The big difference is that the file offset is specified
+      in pagesize units rather than bytes, so that it can be used for
+      files bigger than 2^32 bytes. - JSGF
+   */
    /* void* mmap(void *start, size_t length, int prot, 
       int flags, int fd, off_t offset); 
    */
    MAYBE_PRINTF("mmap2 ( %p, %d, %d, %d, %d, %d )\n",
 		arg1, arg2, arg3, arg4, arg5, arg6 );
 
-   check_mmap_start(tst, arg1, arg4);
+   if (arg4 & VKI_MAP_FIXED) {
+      if (!valid_client_addr(arg1, arg2, tid, "mmap2"))
+	 res = -VKI_ENOMEM;
+   } else {
+      arg1 = VG_(find_map_space)(arg1, arg2, True);
+      arg4 |= VKI_MAP_FIXED;
+      if (arg1 == 0)
+	 res = -VKI_ENOMEM;
+   }
 }
 
 POST(mmap2)
 {
-   mmap_segment( (Addr)res, arg2, arg3, arg5 );
+   if (!valid_client_addr(res, arg2, tid, "mmap2")) {
+      VG_(munmap)((void *)res, arg2);
+      res = -VKI_ENOMEM;
+   } else
+      mmap_segment( (Addr)res, arg2, arg3, arg4, arg5, arg6 * (ULong)VKI_BYTES_PER_PAGE );
 }
 
 PRE(mmap)
@@ -3256,19 +3474,30 @@
    MAYBE_PRINTF("mmap ( %p, %d, %d, %d, %d, %d )\n",
 		a1, a2, a3, a4, a5, a6 );
 
-   check_mmap_start(tst, a1, a4);
-}
+   if (a4 & VKI_MAP_FIXED) {
+      if (!valid_client_addr(a1, a2, tid, "mmap")) {
+	 MAYBE_PRINTF("mmap failing: %p-%p\n", a1, a1+a2);
+	 res = -VKI_ENOMEM;
+      }
+   } else {
+      a1 = VG_(find_map_space)(arg_block[0], arg_block[1], True);
+      if (a1 == 0)
+	 res = -VKI_ENOMEM;
+      else
+	 a4 |= VKI_MAP_FIXED;
+   }
 
-POST(mmap)
-{
-   UInt* arg_block = (UInt*)arg1;
-   UInt a2, a3, a5;
-
-   a2 = arg_block[1];
-   a3 = arg_block[2];
-   a5 = arg_block[4];
-
-   mmap_segment( (Addr)res, a2, a3, a5 );
+   if (res != -VKI_ENOMEM) {
+      res = (Int)VG_(mmap)((void *)a1, a2, a3, a4 | VKI_MAP_CLIENT, a5, a6);
+      
+      if (res == -1)
+	 res = -VKI_ENOMEM;
+      else if (!valid_client_addr(res, a2, tid, "mmap")) {
+	 VG_(munmap)((void *)res, a2);
+	 res = -VKI_ENOMEM;
+      } else
+	 mmap_segment( (Addr)res, a2, a3, a4, a5, a6 );
+   }
 }
 
 PRE(mprotect)
@@ -3276,6 +3505,9 @@
    /* int mprotect(const void *addr, size_t len, int prot); */
    /* should addr .. addr+len-1 be checked before the call? */
    MAYBE_PRINTF("mprotect ( %p, %d, %d )\n", arg1,arg2,arg3);
+
+   if (!valid_client_addr(arg1, arg2, tid, "mprotect"))
+      res = -VKI_ENOMEM;
 }
 
 POST(mprotect)
@@ -3288,6 +3520,9 @@
    /* int munmap(void *start, size_t length); */
    /* should start .. start+length-1 be checked before the call? */
    MAYBE_PRINTF("munmap ( %p, %d )\n", arg1,arg2);
+
+   if (!valid_client_addr(arg1, arg2, tid, "munmap"))
+      res = -VKI_EINVAL;
 }
 
 POST(munmap)
@@ -4174,7 +4409,7 @@
 PRE(truncate)
 {
    /* int truncate(const char *path, size_t length); */
-   MAYBE_PRINTF("truncate ( %p, %d )\n", arg1,arg2);
+   MAYBE_PRINTF("truncate ( %p \"%s\", %d )\n", arg1,arg1,arg2);
    SYSCALL_TRACK( pre_mem_read_asciiz, tid, "truncate(path)", arg1 );
 }
 
@@ -4187,7 +4422,7 @@
 PRE(unlink)
 {
    /* int unlink(const char *pathname) */
-   MAYBE_PRINTF("ulink ( %p )\n",arg1);
+   MAYBE_PRINTF("unlink ( %p \"%s\" )\n",arg1, arg1);
    SYSCALL_TRACK( pre_mem_read_asciiz, tid, "unlink(pathname)", arg1 );
 }
 
@@ -4511,6 +4746,10 @@
 {
    VG_(message)
       (Vg_DebugMsg,"WARNING: unhandled syscall: %d", tst->m_eax);
+   if (VG_(clo_verbosity) > 1) {
+      ExeContext *ec = VG_(get_ExeContext)(tid);
+      VG_(pp_ExeContext)(ec);
+   }
    VG_(message)
       (Vg_DebugMsg,"Do not panic.  You may be able to fix this easily.");
    VG_(message)
@@ -4534,6 +4773,8 @@
    SYSB_(modify_ldt,		False),
 
    SYSB_(execve,		False),
+   SYSB_(brk,			False),
+   SYSB_(mmap,			False),
 
 #if SIGNAL_SIMULATION
    SYSBA(sigaltstack,		False),
@@ -4620,7 +4861,6 @@
    SYSBA(capget,		False),
    SYSB_(capset,		False),
    SYSB_(access,		False),
-   SYSBA(brk,			False),
    SYSB_(chdir,			False),
    SYSB_(chmod,			False),
    SYSB_(chown32,		False),
@@ -4674,8 +4914,6 @@
    SYSBA(lstat,			False),
    SYSBA(lstat64,		False),
    SYSB_(mkdir,			True),
-   SYSBA(mmap2,			False),
-   SYSBA(mmap,			False),
    SYSBA(mprotect,		False),
    SYSBA(munmap,		False),
    SYSBA(nanosleep,		True),
@@ -4729,6 +4967,7 @@
    SYSB_(writev,		True),
    SYSB_(prctl,			True),
    SYSBA(adjtimex,		False),
+   SYSBA(mmap2,			False),
    SYSBA(clock_gettime,         False),
 
    /* new signal handling makes these normal blocking syscalls */
diff --git a/coregrind/vg_to_ucode.c b/coregrind/vg_to_ucode.c
index c5125b3..7551dcb 100644
--- a/coregrind/vg_to_ucode.c
+++ b/coregrind/vg_to_ucode.c
@@ -6536,7 +6536,8 @@
          if (eip - eip0 > 2000 && !isEnd) {
             if (VG_(clo_verbosity) > 2)
                VG_(message)(Vg_DebugMsg,
-                  "Warning: splitting giant basic block into pieces");
+			    "Warning: splitting giant basic block into pieces at %p %(y",
+			    eip, eip);
             uInstr1(cb, JMP, 0, Literal, 0);
             uLiteral(cb, eip);
             uCond(cb, CondAlways);
diff --git a/coregrind/vg_translate.c b/coregrind/vg_translate.c
index d33e0db..42fafed 100644
--- a/coregrind/vg_translate.c
+++ b/coregrind/vg_translate.c
@@ -1842,22 +1842,22 @@
 
       } else if (PUT == u->opcode && R_ESP == u->val2 && 4 == u->size) {
 
-#           define DO_GENERIC                                                 \
-               if (VG_(track_events).new_mem_stack ||                         \
-                   VG_(track_events).die_mem_stack) {                         \
-                  uInstr1(cb, CCALL, 0, TempReg, u->val1);                    \
-                  uCCall(cb, (Addr) VG_(unknown_esp_update),                  \
-                         1, 1, False);                                        \
+#           define DO_GENERIC					\
+               if (VG_(defined_new_mem_stack)() ||		\
+                   VG_(defined_die_mem_stack)()) {		\
+                  uInstr1(cb, CCALL, 0, TempReg, u->val1);	\
+                  uCCall(cb, (Addr) VG_(unknown_esp_update),	\
+                         1, 1, False);				\
                } 
 
-#           define DO(kind, size)                                             \
-               if (VG_(track_events).kind##_mem_stack_##size) {               \
-                  uInstr1(cb, CCALL, 0, TempReg, u->val1);                    \
-                  uCCall(cb, (Addr) VG_(track_events).kind##_mem_stack_##size,\
-                         1, 1, False);                                        \
-                                                                              \
-               } else                                                         \
-                  DO_GENERIC                                                  \
+#           define DO(kind, size)								\
+               if (VG_(defined_##kind##_mem_stack_##size)()) {					\
+                  uInstr1(cb, CCALL, 0, TempReg, u->val1);					\
+                  uCCall(cb, (Addr) VG_(tool_interface).track_##kind##_mem_stack_##size,	\
+                         1, 1, False);								\
+												\
+               } else										\
+                  DO_GENERIC									\
                break
 
          if (u->val1 == t_ESP) {
@@ -2354,12 +2354,14 @@
                       /*OUT*/ UInt* trans_size,
 		      /*OUT*/ UShort jumps[VG_MAX_JUMPS])
 {
-   Int         n_disassembled_bytes, final_code_size, i;
+   Int         n_disassembled_bytes, final_code_size;
    Bool        debugging_translation;
    UChar*      final_code;
    UCodeBlock* cb;
    Bool        notrace_until_done;
    UInt        notrace_until_limit = 0;
+   Segment     *seg;
+   Addr		redir;
 
    VGP_PUSHCC(VgpTranslate);
    debugging_translation
@@ -2367,17 +2369,14 @@
 
    /* Look in the code redirect table to see if we should
       translate an alternative address for orig_addr. */
-   for (i = 0; VG_(code_redirect_table)[i].entry_pt_orig != 0; i++) {
-      if (orig_addr == VG_(code_redirect_table)[i].entry_pt_orig) {
-         if (VG_(clo_verbosity) >= 2)
-            VG_(message)(Vg_UserMsg, 
-               "TRANSLATE: %p redirected to %p",
-               orig_addr, 
-               VG_(code_redirect_table)[i].entry_pt_subst );
-         orig_addr = VG_(code_redirect_table)[i].entry_pt_subst;
-         break;
-      }
-   }
+   redir = VG_(code_redirect)(orig_addr);
+
+   if (redir != orig_addr && VG_(clo_verbosity) >= 2)
+      VG_(message)(Vg_UserMsg, 
+		   "TRANSLATE: %p redirected to %p",
+		   orig_addr, 
+		   redir );
+   orig_addr = redir;
 
    /* If codegen tracing, don't start tracing until
       notrace_until_limit blocks have gone by.  This avoids printing
@@ -2388,20 +2387,32 @@
    notrace_until_done
       = VG_(overall_in_count) > notrace_until_limit;
 
+   seg = VG_(find_segment)(orig_addr);
+
    if (!debugging_translation)
       VG_TRACK( pre_mem_read, Vg_CoreTranslate, tid, "", orig_addr, 1 );
 
-   if (!VG_(is_addressable)(orig_addr, 1)) {
-      /* Code address is bad - deliver a signal instead */
+   if (seg == NULL ||
+       !VG_(seg_contains)(seg, orig_addr, 1) || 
+       (seg->prot & (VKI_PROT_READ|VKI_PROT_EXEC)) == 0) {
       vki_ksiginfo_t info;
 
+      /* Code address is bad - deliver a signal instead */
+      vg_assert(!VG_(is_addressable)(orig_addr, 1));
+
       info.si_signo = VKI_SIGSEGV;
-      info.si_code = 1;		/* address not mapped to object */
+
+      if (seg != NULL && VG_(seg_contains)(seg, orig_addr, 1)) {
+	 vg_assert((seg->prot & VKI_PROT_EXEC) == 0);
+	 info.si_code = 2;	/* invalid permissions for mapped object */
+      } else
+	 info.si_code = 1;	/* address not mapped to object */
       info._sifields._sigfault._addr = (void*)orig_addr;
 
       VG_(deliver_signal)(tid, &info, False);
       return;
-   }
+   } else
+      seg->flags |= SF_CODE;	/* contains cached code */
 
    cb = VG_(alloc_UCodeBlock)();
    cb->orig_eip = orig_addr;
diff --git a/coregrind/vg_transtab.c b/coregrind/vg_transtab.c
index c956820..2c0b13e 100644
--- a/coregrind/vg_transtab.c
+++ b/coregrind/vg_transtab.c
@@ -378,8 +378,22 @@
    }
    for (s = 0; s < VG_TC_N_SECTORS; s++) {
       if (vg_tc[s] == NULL) {
+#if 1
          vg_tc[s] = VG_(get_memory_from_mmap) 
                        ( vg_tc_sector_szB, "trans-cache(sector)" );
+#else
+	 Char buf[20];
+	 static Int count = 0;
+	 Int fd;
+	 
+	 VG_(sprintf)(buf, ".transtab.%d", count++);
+
+	 fd = VG_(open)(buf, VKI_O_RDWR|VKI_O_CREAT|VKI_O_TRUNC, 0700);
+	 //VG_(unlink)(buf);
+	 VG_(do_syscall)(__NR_ftruncate, fd, PGROUNDUP(vg_tc_sector_szB));
+	 vg_tc[s] = VG_(mmap)(0, PGROUNDUP(vg_tc_sector_szB), VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC, VKI_MAP_SHARED, fd, 0);
+	 VG_(close)(fd);
+#endif
          vg_tc_used[s] = 0;
          VG_(sprintf)(msg, "after  allocation of sector %d "
                            "(size %d)", 
diff --git a/example/ex_main.c b/example/ex_main.c
index 5f103bc..92debb7 100644
--- a/example/ex_main.c
+++ b/example/ex_main.c
@@ -5,9 +5,7 @@
 
 #include "vg_skin.h"
 
-VG_DETERMINE_INTERFACE_VERSION
-
-void SK_(pre_clo_init)()
+static void SK_(pre_clo_init)()
 {
    VG_(details_name)            ("Example");
    VG_(details_version)         ("0.0.1");
@@ -32,6 +30,9 @@
 {
 }
 
+/* Does not use shadow memory */
+VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 0)
+
 /*--------------------------------------------------------------------*/
 /*--- end                                                ex_main.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/helgrind/Makefile.am b/helgrind/Makefile.am
index c31517a..f9df724 100644
--- a/helgrind/Makefile.am
+++ b/helgrind/Makefile.am
@@ -6,13 +6,24 @@
 		@PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
+inplacedir = $(top_srcdir)/.in_place
 
-val_PROGRAMS = vgskin_helgrind.so
+val_PROGRAMS = vgskin_helgrind.so vgpreload_helgrind.so
 
 vgskin_helgrind_so_SOURCES = hg_main.c
 vgskin_helgrind_so_LDFLAGS = -shared
 vgskin_helgrind_so_LDADD = ../coregrind/vg_replace_malloc.o
 
+vgpreload_helgrind_so_SOURCES = 
+vgpreload_helgrind_so_LDADD = $(top_srcdir)/coregrind/vg_replace_malloc.o
+vgpreload_helgrind_so_DEPENDENCIES = $(top_srcdir)/coregrind/vg_replace_malloc.o
+vgpreload_helgrind_so_LDFLAGS = -shared -Wl,-z,interpose,-z,initfirst
+
 hgincludedir = $(includedir)/valgrind
 
 hginclude_HEADERS = helgrind.h
+
+all-local:
+	mkdir -p $(inplacedir)
+	-rm -f $(addprefix $(inplacedir)/,$(val_PROGRAMS))
+	ln -f -s $(addprefix $(top_srcdir)/$(subdir)/,$(val_PROGRAMS)) $(inplacedir)
diff --git a/helgrind/hg_main.c b/helgrind/hg_main.c
index cf0fe32..4982e68 100644
--- a/helgrind/hg_main.c
+++ b/helgrind/hg_main.c
@@ -32,8 +32,6 @@
 #include "vg_skin.h"
 #include "helgrind.h"
 
-VG_DETERMINE_INTERFACE_VERSION
-
 static UInt n_eraser_warnings = 0;
 static UInt n_lockorder_warnings = 0;
 
@@ -498,7 +496,7 @@
       although this isn't important, so the following assert is
       spurious. (SSS: not true for ESecMaps -- they're 16 pages) */
    sk_assert(0 == (sizeof(ESecMap) % VKI_BYTES_PER_PAGE));
-   map = VG_(get_memory_from_mmap)( sizeof(ESecMap), caller );
+   map = (ESecMap *)VG_(shadow_alloc)(sizeof(ESecMap));
 
    for (i = 0; i < ESEC_MAP_WORDS; i++)
       map->swords[i] = virgin_sword;
@@ -3246,34 +3244,35 @@
    VG_(needs_data_syms)();
    VG_(needs_client_requests)();
    VG_(needs_command_line_options)();
+   VG_(needs_shadow_memory)();
 
-   VG_(track_new_mem_startup)      (& eraser_new_mem_startup);
+   VG_(init_new_mem_startup)      (& eraser_new_mem_startup);
 
    /* stack ones not decided until VG_(post_clo_init)() */
 
-   VG_(track_new_mem_brk)          (& make_writable);
-   VG_(track_new_mem_mmap)         (& eraser_new_mem_startup);
+   VG_(init_new_mem_brk)          (& make_writable);
+   VG_(init_new_mem_mmap)         (& eraser_new_mem_startup);
 
-   VG_(track_change_mem_mprotect)  (& eraser_set_perms);
+   VG_(init_change_mem_mprotect)  (& eraser_set_perms);
 
-   VG_(track_ban_mem_stack)        (NULL);
+   VG_(init_ban_mem_stack)        (NULL);
 
-   VG_(track_die_mem_stack)        (NULL);
-   VG_(track_die_mem_stack_signal) (NULL);
-   VG_(track_die_mem_brk)          (NULL);
-   VG_(track_die_mem_munmap)       (NULL);
+   VG_(init_die_mem_stack)        (NULL);
+   VG_(init_die_mem_stack_signal) (NULL);
+   VG_(init_die_mem_brk)          (NULL);
+   VG_(init_die_mem_munmap)       (NULL);
 
-   VG_(track_pre_mem_read)         (& eraser_pre_mem_read);
-   VG_(track_pre_mem_read_asciiz)  (& eraser_pre_mem_read_asciiz);
-   VG_(track_pre_mem_write)        (& eraser_pre_mem_write);
-   VG_(track_post_mem_write)       (NULL);
+   VG_(init_pre_mem_read)         (& eraser_pre_mem_read);
+   VG_(init_pre_mem_read_asciiz)  (& eraser_pre_mem_read_asciiz);
+   VG_(init_pre_mem_write)        (& eraser_pre_mem_write);
+   VG_(init_post_mem_write)       (NULL);
 
-   VG_(track_post_thread_create)   (& hg_thread_create);
-   VG_(track_post_thread_join)     (& hg_thread_join);
+   VG_(init_post_thread_create)   (& hg_thread_create);
+   VG_(init_post_thread_join)     (& hg_thread_join);
 
-   VG_(track_pre_mutex_lock)       (& eraser_pre_mutex_lock);
-   VG_(track_post_mutex_lock)      (& eraser_post_mutex_lock);
-   VG_(track_post_mutex_unlock)    (& eraser_post_mutex_unlock);
+   VG_(init_pre_mutex_lock)       (& eraser_pre_mutex_lock);
+   VG_(init_post_mutex_lock)      (& eraser_post_mutex_lock);
+   VG_(init_post_mutex_unlock)    (& eraser_post_mutex_unlock);
 
    VG_(register_compact_helper)((Addr) & eraser_mem_help_read_1);
    VG_(register_compact_helper)((Addr) & eraser_mem_help_read_2);
@@ -3392,8 +3391,8 @@
    else
       stack_tracker = & eraser_new_mem_stack;
 
-   VG_(track_new_mem_stack)        (stack_tracker);
-   VG_(track_new_mem_stack_signal) (stack_tracker);
+   VG_(init_new_mem_stack)        (stack_tracker);
+   VG_(init_new_mem_stack_signal) (stack_tracker);
 }
 
 
@@ -3417,6 +3416,9 @@
 		  ((stk_ld+stk_st)*100) / (stk_ld + stk_st + nonstk_ld + nonstk_st));
 }
 
+/* Uses a 1:1 mapping */
+VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 1.0)
+
 /*--------------------------------------------------------------------*/
 /*--- end                                                hg_main.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/include/Makefile.am b/include/Makefile.am
index 4470eb0..776ea8e 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -1,5 +1,6 @@
 EXTRA_DIST = \
-	vg_profile.c
+	vg_profile.c \
+	vg_skin.h.base
 
 incincdir = $(includedir)/valgrind
 
@@ -8,3 +9,13 @@
 	vg_constants_skin.h \
 	vg_skin.h \
 	vg_kerneliface.h
+
+BUILT_SOURCES = vg_skin.h
+CLEANFILES = vg_skin.h
+
+vg_skin.h: $(srcdir)/vg_skin.h.base \
+	 $(top_srcdir)/coregrind/gen_toolint.pl $(top_srcdir)/coregrind/toolfuncs.def
+	rm -f $@
+	cat $(srcdir)/vg_skin.h.base > $@
+	$(PERL) $(top_srcdir)/coregrind/gen_toolint.pl toolproto < $(top_srcdir)/coregrind/toolfuncs.def >> $@ || rm -f $@
+	$(PERL) $(top_srcdir)/coregrind/gen_toolint.pl initproto < $(top_srcdir)/coregrind/toolfuncs.def >> $@ || rm -f $@
diff --git a/include/vg_constants_skin.h b/include/vg_constants_skin.h
index 5347821..09110cb 100644
--- a/include/vg_constants_skin.h
+++ b/include/vg_constants_skin.h
@@ -45,10 +45,17 @@
 #define VGP_(str)   VGAPPEND(vgProf_,str)
 #define VGOFF_(str) VGAPPEND(vgOff_,str)
 #define VGR_(str)   VGAPPEND(vgAllRoadsLeadToRome_,str)
+#define VGINJ_(str) VGAPPEND(__vgInject_,str)
 
 /* Skin specific ones.  Note that final name still starts with "vg". */
 #define SK_(str)    VGAPPEND(vgSkin_,str)
 
+/* This is specifically for stringifying VG_(x) function names.  We
+   need to do two macroexpansions to get the VG_ macro expanded before
+   stringifying */
+#define _STR(x)	#x
+#define STR(x)	_STR(x)
+
 #endif /* ndef __VG_CONSTANTS_SKIN_H */
 
 /*--------------------------------------------------------------------*/
diff --git a/include/vg_kerneliface.h b/include/vg_kerneliface.h
index 30b16f2..8f2c3b1 100644
--- a/include/vg_kerneliface.h
+++ b/include/vg_kerneliface.h
@@ -305,12 +305,16 @@
 
 /* The following are copied from include/asm-i386/mman.h .*/
 
+#define VKI_PROT_NONE      0x0		   /* No page permissions */
 #define VKI_PROT_READ      0x1             /* Page can be read.  */
 #define VKI_PROT_WRITE     0x2             /* Page can be written.  */
 #define VKI_PROT_EXEC      0x4             /* Page can be executed.  */
 #define VKI_MAP_ANONYMOUS  0x20            /* Don't use a file.  */
+#define VKI_MAP_SHARED	   0x01		   /* Share changes.  */
 #define VKI_MAP_PRIVATE    0x02            /* Changes are private.  */
 #define VKI_MAP_FIXED      0x10            /* Interpret addr exactly */
+#define VKI_MAP_NOSYMS     0x40000000	   /* internal pseudo-flag to disable symbol loading */
+#define VKI_MAP_CLIENT     0x80000000	   /* internal pseudo-flag to distinguish client mappings */
 
 /* Copied from linux-2.4.19/include/asm-i386/fcntl.h */
 
@@ -571,6 +575,7 @@
    Logic from     /usr/src/linux-2.4.9-31/fs/binfmt_elf.c
                   and its counterpart in the 2.2.14 kernel sources 
                   in Red Hat 6.2.  */
+#define VKI_AT_NULL   0
 #define VKI_AT_SYSINFO 32   /* address of system info page */
 #define VKI_AT_CLKTCK 17    /* frequency at which times() increments */
 #define VKI_AT_HWCAP  16    /* arch dependent hints at CPU capabilities */
diff --git a/include/vg_skin.h b/include/vg_skin.h
index 5e74d98..8c328ed 100644
--- a/include/vg_skin.h
+++ b/include/vg_skin.h
@@ -1,4 +1,4 @@
-
+/* -*- c -*- */
 /*--------------------------------------------------------------------*/
 /*--- The only header your skin will ever need to #include...      ---*/
 /*---                                                    vg_skin.h ---*/
@@ -126,14 +126,38 @@
 #define VG_CORE_INTERFACE_MAJOR_VERSION   5
 #define VG_CORE_INTERFACE_MINOR_VERSION   0
 
-extern const Int VG_(skin_interface_major_version);
-extern const Int VG_(skin_interface_minor_version);
+typedef struct _ToolInfo {
+   Int	sizeof_ToolInfo;
+   Int	interface_major_version;
+   Int	interface_minor_version;
+
+   /* Initialise skin.   Must do the following:
+      - initialise the `details' struct, via the VG_(details_*)() functions
+      - register any helpers called by generated code
+      
+      May do the following:
+      - initialise the `needs' struct to indicate certain requirements, via
+      the VG_(needs_*)() functions
+      - initialize all the tool's entrypoints via the VG_(init_*)() functions
+      - register any skin-specific profiling events
+      - any other skin-specific initialisation
+   */
+   void        (*sk_pre_clo_init) ( void );
+
+   /* Specifies how big the shadow segment should be as a ratio to the
+      client address space.  0 for no shadow segment. */
+   float	shadow_ratio;
+} ToolInfo;
 
 /* Every skin must include this macro somewhere, exactly once. */
-#define VG_DETERMINE_INTERFACE_VERSION \
-const Int VG_(skin_interface_major_version) = VG_CORE_INTERFACE_MAJOR_VERSION; \
-const Int VG_(skin_interface_minor_version) = VG_CORE_INTERFACE_MINOR_VERSION;
-
+#define VG_DETERMINE_INTERFACE_VERSION(pre_clo_init, shadow)		\
+   const ToolInfo SK_(tool_info) = {					\
+      .sizeof_ToolInfo         = sizeof(ToolInfo),			\
+      .interface_major_version = VG_CORE_INTERFACE_MAJOR_VERSION,	\
+      .interface_minor_version = VG_CORE_INTERFACE_MINOR_VERSION,	\
+      .sk_pre_clo_init         = pre_clo_init,				\
+      .shadow_ratio	       = shadow,				\
+   };
 
 /*====================================================================*/
 /*=== Command-line options                                         ===*/
@@ -419,7 +443,7 @@
 
 /* Mini-regexp function.  Searches for 'pat' in 'str'.  Supports
  * meta-symbols '*' and '?'.  '\' escapes meta-symbols. */
-extern Bool  VG_(string_match)   ( Char* pat, Char* str );
+extern Bool  VG_(string_match)   ( const Char* pat, const Char* str );
 
 
 /* ------------------------------------------------------------------ */
@@ -476,14 +500,33 @@
 
 
 /* ------------------------------------------------------------------ */
-/* system/mman.h */
-extern void* VG_(mmap)( void* start, UInt length,
-                        UInt prot, UInt flags, UInt fd, UInt offset );
-extern Int  VG_(munmap)( void* start, Int length );
-
 /* Get memory by anonymous mmap. */
 extern void* VG_(get_memory_from_mmap) ( Int nBytes, Char* who );
 
+extern Bool VG_(is_client_addr) (Addr a);
+extern Addr VG_(get_client_base)(void);
+extern Addr VG_(get_client_end) (void);
+extern Addr VG_(get_client_size)(void);
+
+extern Bool VG_(is_shadow_addr) (Addr a);
+extern Addr VG_(get_shadow_base)(void);
+extern Addr VG_(get_shadow_end) (void);
+extern Addr VG_(get_shadow_size)(void);
+
+extern void *VG_(shadow_alloc)(UInt size);
+
+extern Bool VG_(is_addressable)(Addr p, Int sz);
+
+extern Addr VG_(client_alloc)(Addr base, UInt len, UInt prot, UInt flags);
+extern void VG_(client_free)(Addr addr);
+
+extern Bool VG_(is_valgrind_addr)(Addr a);
+
+/* initialize shadow pages in the range [p, p+sz) This calls
+   init_shadow_page for each one.  It should be a lot more efficient
+   for bulk-initializing shadow pages than faulting on each one. 
+*/
+extern void VG_(init_shadow_range)(Addr p, UInt sz, Bool call_init);
 
 /* ------------------------------------------------------------------ */
 /* signal.h.
@@ -1254,9 +1297,9 @@
 extern void VG_(emit_testb_lit_reg)      ( Bool upd_cc, UInt lit, Int reg );
 
 /* zero-extended load emitters */
-extern void VG_(emit_movzbl_offregmem_reg) ( Int off, Int regmem, Int reg );
-extern void VG_(emit_movzwl_offregmem_reg) ( Int off, Int areg, Int reg );
-extern void VG_(emit_movzwl_regmem_reg)    ( Int reg1, Int reg2 );
+extern void VG_(emit_movzbl_offregmem_reg) ( Bool bounds, Int off, Int regmem, Int reg );
+extern void VG_(emit_movzwl_offregmem_reg) ( Bool bounds, Int off, Int areg, Int reg );
+extern void VG_(emit_movzwl_regmem_reg)    ( Bool bounds, Int reg1, Int reg2 );
 
 /* misc instruction emitters */
 extern void VG_(emit_call_reg)         ( Int reg );
@@ -1557,6 +1600,72 @@
 
 
 /*====================================================================*/
+/*=== A generic skiplist                                           ===*/
+/*====================================================================*/
+
+/* 
+   The idea here is that the skiplist puts its per-element data at the
+   end of the structure.  When you initialize the skiplist, you tell
+   it what structure your list elements are going to be.  Then you
+   should allocate them with VG_(SkipNode_Alloc), which will allocate
+   enough memory for the extra bits.
+ */
+#include <stddef.h>		/* for offsetof */
+
+typedef struct _SkipList SkipList;
+typedef struct _SkipNode SkipNode;
+
+typedef Int (*SkipCmp_t)(const void *key1, const void *key2);
+
+struct _SkipList {
+   const Short		arena;		/* allocation arena                        */
+   const UShort		size;		/* structure size (not including SkipNode) */
+   const UShort		keyoff;		/* key offset                              */
+   const SkipCmp_t	cmp;		/* compare two keys                        */
+	 Char *		(*strkey)(void *); /* stringify a key (for debugging)      */
+         SkipNode	*head;		/* list head                               */
+};
+
+/* Use this macro to initialize your skiplist head.  The arguments are pretty self explanitory:
+   _type is the type of your element structure
+   _key is the field within that type which you want to use as the key
+   _cmp is the comparison function for keys - it gets two typeof(_key) pointers as args
+   _strkey is a function which can return a string of your key - it's only used for debugging
+   _arena is the arena to use for allocation - -1 is the default
+ */
+#define SKIPLIST_INIT(_type, _key, _cmp, _strkey, _arena)		\
+	{								\
+	   .arena       = _arena,					\
+	   .size	= sizeof(_type),				\
+	   .keyoff	= offsetof(_type, _key),			\
+	   .cmp		= _cmp,						\
+	   .strkey      = _strkey,					\
+	   .head	= NULL,						\
+	}
+
+/* List operations:
+   SkipList_Find searchs a list.  If it can't find an exact match, it either returns NULL
+      or a pointer to the element before where k would go
+   SkipList_Insert inserts a new element into the list.  Duplicates are forbidden.
+   SkipList_Remove removes an element from the list and returns it.  It doesn't free the memory.
+ */
+extern void *VG_(SkipList_Find)  (const SkipList *l, void *key);
+extern void  VG_(SkipList_Insert)(      SkipList *l, void *data);
+extern void *VG_(SkipList_Remove)(      SkipList *l, void *key);
+
+/* Node (element) operations:
+   SkipNode_Alloc: allocate memory for a new element on the list
+   SkipNode_Free: free memory allocated above
+   SkipNode_First: return the first element on the list
+   SkipNode_Next: return the next element after "data" on the list - 
+      NULL for none
+ */
+extern void *VG_(SkipNode_Alloc) (const SkipList *l);
+extern void  VG_(SkipNode_Free)  (const SkipList *l, void *p);
+extern void *VG_(SkipNode_First) (const SkipList *l);
+extern void *VG_(SkipNode_Next)  (const SkipList *l, void *data);
+
+/*====================================================================*/
 /*=== Functions for shadow registers                               ===*/
 /*====================================================================*/
 
@@ -1603,9 +1712,10 @@
 /*=== Specific stuff for replacing malloc() and friends            ===*/
 /*====================================================================*/
 
-/* If a skin replaces malloc() et al, the easiest way to do so is to link
-   with coregrind/vg_replace_malloc.c, and follow the following instructions.
-   You can do it from scratch, though, if you enjoy that sort of thing. */
+/* If a skin replaces malloc() et al, the easiest way to do so is to
+   link with vg_replace_malloc.o into its vgpreload_*.so file, and
+   follow the following instructions.  You can do it from scratch,
+   though, if you enjoy that sort of thing. */
 
 /* Arena size for valgrind's own malloc();  default value is 0, but can
    be overridden by skin -- but must be done so *statically*, eg:
@@ -1616,18 +1726,6 @@
    for example, be controlled with a command line option, unfortunately. */
 extern UInt VG_(vg_malloc_redzone_szB);
 
-/* If a skin links with vg_replace_malloc.c, the following functions will be
-   called appropriately when malloc() et al are called. */
-extern void* SK_(malloc)               ( Int n );
-extern void* SK_(__builtin_new)        ( Int n );
-extern void* SK_(__builtin_vec_new)    ( Int n );
-extern void* SK_(memalign)             ( Int align, Int n );
-extern void* SK_(calloc)               ( Int nmemb, Int n );
-extern void  SK_(free)                 ( void* p );
-extern void  SK_(__builtin_delete)     ( void* p );
-extern void  SK_(__builtin_vec_delete) ( void* p );
-extern void* SK_(realloc)              ( void* p, Int size );
-
 /* Can be called from SK_(malloc) et al to do the actual alloc/freeing. */
 extern void* VG_(cli_malloc) ( UInt align, Int nbytes );
 extern void  VG_(cli_free)   ( void* p );
@@ -1737,6 +1835,13 @@
 /* Do we need to see data symbols? */
 extern void VG_(needs_data_syms) ( void );
 
+/* Does the skin need shadow memory allocated (if you set this, you must also statically initialize 
+   float SK_(shadow_ratio) = n./m;
+   to define how many shadow bits you need per client address space bit.
+*/
+extern void VG_(needs_shadow_memory)( void );
+extern float SK_(shadow_ratio);
+
 /* ------------------------------------------------------------------ */
 /* Core events to track */
 
@@ -1746,299 +1851,6 @@
    enum { Vg_CorePThread, Vg_CoreSignal, Vg_CoreSysCall, Vg_CoreTranslate }
    CorePart;
 
-#define EV  extern void
-
-/* Events happening in core to track.  To be notified, pass a callback
-   function to the appropriate function.  To ignore an event, don't do
-   anything (default is for events to be ignored).
-
-   Note that most events aren't passed a ThreadId.  To find out the ThreadId
-   of the affected thread, use VG_(get_current_or_recent_tid)().  For the
-   ones passed a ThreadId, use that instead, since
-   VG_(get_current_or_recent_tid)() might not give the right ThreadId in
-   that case.
-*/
-
-
-/* Memory events (Nb: to track heap allocation/freeing, a skin must replace
-   malloc() et al.  See above how to do this.) */
-
-/* These ones occur at startup, upon some signals, and upon some syscalls */
-EV VG_(track_new_mem_startup) ( void (*f)(Addr a, UInt len,
-                                          Bool rr, Bool ww, Bool xx) );
-EV VG_(track_new_mem_stack_signal)  ( void (*f)(Addr a, UInt len) );
-EV VG_(track_new_mem_brk)     ( void (*f)(Addr a, UInt len) );
-EV VG_(track_new_mem_mmap)    ( void (*f)(Addr a, UInt len,
-                                          Bool rr, Bool ww, Bool xx) );
-
-EV VG_(track_copy_mem_remap)  ( void (*f)(Addr from, Addr to, UInt len) );
-EV VG_(track_change_mem_mprotect) ( void (*f)(Addr a, UInt len,
-                                              Bool rr, Bool ww, Bool xx) );
-EV VG_(track_die_mem_stack_signal)  ( void (*f)(Addr a, UInt len) );
-EV VG_(track_die_mem_brk)     ( void (*f)(Addr a, UInt len) );
-EV VG_(track_die_mem_munmap)  ( void (*f)(Addr a, UInt len) );
-
-
-/* These ones are called when %esp changes.  A skin could track these itself
-   (except for ban_mem_stack) but it's much easier to use the core's help.
-
-   The specialised ones are called in preference to the general one, if they
-   are defined.  These functions are called a lot if they are used, so
-   specialising can optimise things significantly.  If any of the
-   specialised cases are defined, the general case must be defined too.
-
-   Nb: they must all use the __attribute__((regparm(n))) attribute. */
-EV VG_(track_new_mem_stack_4)  ( void (*f)(Addr new_ESP) );
-EV VG_(track_new_mem_stack_8)  ( void (*f)(Addr new_ESP) );
-EV VG_(track_new_mem_stack_12) ( void (*f)(Addr new_ESP) );
-EV VG_(track_new_mem_stack_16) ( void (*f)(Addr new_ESP) );
-EV VG_(track_new_mem_stack_32) ( void (*f)(Addr new_ESP) );
-EV VG_(track_new_mem_stack)    ( void (*f)(Addr a, UInt len) );
-
-EV VG_(track_die_mem_stack_4)  ( void (*f)(Addr die_ESP) );
-EV VG_(track_die_mem_stack_8)  ( void (*f)(Addr die_ESP) );
-EV VG_(track_die_mem_stack_12) ( void (*f)(Addr die_ESP) );
-EV VG_(track_die_mem_stack_16) ( void (*f)(Addr die_ESP) );
-EV VG_(track_die_mem_stack_32) ( void (*f)(Addr die_ESP) );
-EV VG_(track_die_mem_stack)    ( void (*f)(Addr a, UInt len) );
-
-/* Used for redzone at end of thread stacks */
-EV VG_(track_ban_mem_stack)   ( void (*f)(Addr a, UInt len) );
-
-/* These ones occur around syscalls, signal handling, etc */
-EV VG_(track_pre_mem_read)    ( void (*f)(CorePart part, ThreadId tid,
-                                          Char* s, Addr a, UInt size) );
-EV VG_(track_pre_mem_read_asciiz) ( void (*f)(CorePart part, ThreadId tid,
-                                              Char* s, Addr a) );
-EV VG_(track_pre_mem_write)   ( void (*f)(CorePart part, ThreadId tid,
-                                          Char* s, Addr a, UInt size) );
-/* Not implemented yet -- have to add in lots of places, which is a
-   pain.  Won't bother unless/until there's a need. */
-/* EV VG_(track_post_mem_read)  ( void (*f)(ThreadId tid, Char* s,
-                                            Addr a, UInt size) ); */
-EV VG_(track_post_mem_write) ( void (*f)(Addr a, UInt size) );
-
-
-/* Register events -- if `shadow_regs' need is set, all should probably be
-   used.  Use VG_(set_thread_shadow_archreg)() to set the shadow of the
-   changed register. */
-
-/* Use VG_(set_shadow_archreg)() to set the eight general purpose regs,
-   and use VG_(set_shadow_eflags)() to set eflags. */
-EV VG_(track_post_regs_write_init)  ( void (*f)() );
-
-/* Use VG_(set_thread_shadow_archreg)() to set the shadow regs for these
-   events. */
-EV VG_(track_post_reg_write_syscall_return)
-                                    ( void (*f)(ThreadId tid, UInt reg) );
-EV VG_(track_post_reg_write_deliver_signal)
-                                    ( void (*f)(ThreadId tid, UInt reg) );
-EV VG_(track_post_reg_write_pthread_return)
-                                    ( void (*f)(ThreadId tid, UInt reg) );
-EV VG_(track_post_reg_write_clientreq_return)
-                                    ( void (*f)(ThreadId tid, UInt reg) );
-   /* This one is called for malloc() et al if they are replaced by a skin. */
-EV VG_(track_post_reg_write_clientcall_return)
-                                    ( void (*f)(ThreadId tid, UInt reg,
-                                                Addr called_function) );
-
-
-/* Scheduler events (not exhaustive) */
-
-EV VG_(track_thread_run) ( void (*f)(ThreadId tid) );
-
-/* Thread events (not exhaustive) */
-
-/* Called during thread create, before the new thread has run any
-   instructions (or touched any memory). */
-EV VG_(track_post_thread_create)( void (*f)(ThreadId tid, ThreadId child) );
-/* Called once the joinee thread is terminated and the joining thread is
-   about to resume. */
-EV VG_(track_post_thread_join)  ( void (*f)(ThreadId joiner, ThreadId joinee) );
-
-
-/* Mutex events (not exhaustive) */
-
-/* Called before a thread can block while waiting for a mutex (called
-   regardless of whether the thread will block or not). */
-EV VG_(track_pre_mutex_lock)    ( void (*f)(ThreadId tid,
-                                          void* /*pthread_mutex_t* */ mutex) );
-/* Called once the thread actually holds the mutex (always paired with
-   pre_mutex_lock). */
-EV VG_(track_post_mutex_lock)   ( void (*f)(ThreadId tid,
-                                          void* /*pthread_mutex_t* */ mutex) );
-/* Called after a thread has released a mutex (no need for a corresponding
-   pre_mutex_unlock, because unlocking can't block). */
-EV VG_(track_post_mutex_unlock) ( void (*f)(ThreadId tid,
-                                          void* /*pthread_mutex_t* */ mutex) );
-
-
-/* Signal events (not exhaustive) */
-
-/* ... pre_send_signal, post_send_signal ... */
-
-/* Called before a signal is delivered;  `alt_stack' indicates if it is
-   delivered on an alternative stack. */
-EV VG_(track_pre_deliver_signal)  ( void (*f)(ThreadId tid, Int sigNum,
-                                             Bool alt_stack) );
-/* Called after a signal is delivered.  Nb: unfortunately, if the signal
-   handler longjmps, this won't be called. */
-EV VG_(track_post_deliver_signal) ( void (*f)(ThreadId tid, Int sigNum ) );
-
-
-/* Others... condition variables... */
-/* ... */
-
-#undef EV
-
-/* ------------------------------------------------------------------ */
-/* Template functions */
-
-/* These are the parameterised functions in the core.  The default definitions
-   are overridden by LD_PRELOADed skin version.  At the very least, a skin
-   must define the fundamental template functions.  Depending on what needs
-   are set, extra template functions will be used too.  Functions are
-   grouped under the needs that govern their use. */
-
-
-/* ------------------------------------------------------------------ */
-/* Fundamental template functions */
-
-/* Initialise skin.   Must do the following:
-     - initialise the `details' struct, via the VG_(details_*)() functions
-     - register any helpers called by generated code
-
-   May do the following:
-     - initialise the `needs' struct to indicate certain requirements, via
-       the VG_(needs_*)() functions
-     - initialise the `track' struct to indicate core events of interest, via
-       the VG_(track_*)() functions
-     - register any skin-specific profiling events
-     - any other skin-specific initialisation
-*/
-extern void        SK_(pre_clo_init) ( void );
-
-/* Do initialisation that can only be done after command line processing. */
-extern void        SK_(post_clo_init)( void );
-
-/* Instrument a basic block.  Must be a true function, ie. the same input
-   always results in the same output, because basic blocks can be
-   retranslated.  Unless you're doing something really strange...
-   'orig_addr' is the address of the first instruction in the block. */
-extern UCodeBlock* SK_(instrument)   ( UCodeBlock* cb, Addr orig_addr );
-
-/* Finish up, print out any results, etc.  `exitcode' is program's exit
-   code.  The shadow (if the `shadow_regs' need is set) can be found with
-   VG_(get_shadow_archreg)(R_EBX), since %ebx holds the argument to the
-   exit() syscall.  */
-extern void        SK_(fini)         ( Int exitcode );
-
-
-/* ------------------------------------------------------------------ */
-/* VG_(needs).core_errors */
-
-/* (none needed) */
-
-/* ------------------------------------------------------------------ */
-/* VG_(needs).skin_errors */
-
-/* Identify if two errors are equal, or equal enough.  `res' indicates how
-   close is "close enough".  `res' should be passed on as necessary, eg. if
-   the Error's `extra' part contains an ExeContext, `res' should be
-   passed to VG_(eq_ExeContext)() if the ExeContexts are considered.  Other
-   than that, probably don't worry about it unless you have lots of very
-   similar errors occurring.
- */
-extern Bool SK_(eq_SkinError) ( VgRes res, Error* e1, Error* e2 );
-
-/* Print error context. */
-extern void SK_(pp_SkinError) ( Error* err );
-
-/* Should fill in any details that could be postponed until after the
-   decision whether to ignore the error (ie. details not affecting the
-   result of SK_(eq_SkinError)()).  This saves time when errors are ignored.
-   Yuk.
-
-   Return value: must be the size of the `extra' part in bytes -- used by
-   the core to make a copy.
-*/
-extern UInt SK_(update_extra) ( Error* err );
-
-/* Return value indicates recognition.  If recognised, must set skind using
-   VG_(set_supp_kind)(). */
-extern Bool SK_(recognised_suppression) ( Char* name, Supp* su );
-
-/* Read any extra info for this suppression kind.  Most likely for filling
-   in the `extra' and `string' parts (with VG_(set_supp_{extra,string})())
-   of a suppression if necessary.  Should return False if a syntax error
-   occurred, True otherwise. */
-extern Bool SK_(read_extra_suppression_info) ( Int fd, Char* buf, Int nBuf,
-                                               Supp* su );
-
-/* This should just check the kinds match and maybe some stuff in the
-   `string' and `extra' field if appropriate (using VG_(get_supp_*)() to
-   get the relevant suppression parts). */
-extern Bool SK_(error_matches_suppression) ( Error* err, Supp* su );
-
-/* This should return the suppression name, for --gen-suppressions, or NULL
-   if that error type cannot be suppressed.  This is the inverse of
-   SK_(recognised_suppression)(). */
-extern Char* SK_(get_error_name) ( Error* err );
-
-/* This should print any extra info for the error, for --gen-suppressions,
-   including the newline.  This is the inverse of
-   SK_(read_extra_suppression_info)(). */
-extern void SK_(print_extra_suppression_info) ( Error* err );
-
-
-/* ------------------------------------------------------------------ */
-/* VG_(needs).basic_block_discards */
-
-/* Should discard any information that pertains to specific basic blocks
-   or instructions within the address range given. */
-extern void SK_(discard_basic_block_info) ( Addr a, UInt size );
-
-
-/* ------------------------------------------------------------------ */
-/* VG_(needs).shadow_regs */
-
-/* No functions must be defined, but the post_reg[s]_write_* events should
-   be tracked. */
-
-/* ------------------------------------------------------------------ */
-/* VG_(needs).command_line_options */
-
-/* Return True if option was recognised.  Presumably sets some state to
-   record the option as well. */
-extern Bool SK_(process_cmd_line_option) ( Char* argv );
-
-/* Print out command line usage for options for normal skin operation. */
-extern void SK_(print_usage)             ( void );
-
-/* Print out command line usage for options for debugging the skin. */
-extern void SK_(print_debug_usage)       ( void );
-
-/* ------------------------------------------------------------------ */
-/* VG_(needs).client_requests */
-
-/* If using client requests, the number of the first request should be equal
-   to VG_USERREQ_SKIN_BASE('X','Y'), where 'X' and 'Y' form a suitable two
-   character identification for the string.  The second and subsequent
-   requests should follow. */
-
-/* This function should use the VG_IS_SKIN_USERREQ macro (in
-   include/valgrind.h) to first check if it's a request for this skin.  Then
-   should handle it if it's recognised (and return True), or return False if
-   not recognised.  arg_block[0] holds the request number, any further args
-   from the request are in arg_block[1..].  'ret' is for the return value...
-   it should probably be filled, if only with 0. */
-extern Bool SK_(handle_client_request) ( ThreadId tid, UInt* arg_block,
-                                         UInt *ret );
-
-
-/* ------------------------------------------------------------------ */
-/* VG_(needs).extends_UCode */
-
 /* Useful to use in VG_(get_Xreg_usage)() */
 #define VG_UINSTR_READS_REG(ono,regs,isWrites)  \
    { if (mycat(u->tag,ono) == tag)              \
@@ -2055,42 +1867,654 @@
         }                                       \
    }
 
-/* 'X' prefix indicates eXtended UCode. */
-extern Int   SK_(get_Xreg_usage) ( UInstr* u, Tag tag, Int* regs,
-                                   Bool* isWrites );
-extern void  SK_(emit_XUInstr)   ( UInstr* u, RRegSet regs_live_before );
-extern Bool  SK_(sane_XUInstr)   ( Bool beforeRA, Bool beforeLiveness,
-                                   UInstr* u );
-extern Char* SK_(name_XUOpcode)  ( Opcode opc );
-extern void  SK_(pp_XUInstr)     ( UInstr* u );
+#endif   /* NDEF __VG_SKIN_H */
 
+/* gen_toolint.pl will put the VG_(init_*)() functions here: */
+/* Generated by "gen_toolint.pl toolproto" */
 
-/* ------------------------------------------------------------------ */
-/* VG_(needs).syscall_wrapper */
+/* These are the parameterised functions in the core.  The default definitions
+   are overridden by LD_PRELOADed skin version.  At the very least, a skin
+   must define the fundamental template functions.  Depending on what needs
+   are set, extra template functions will be used too.  Functions are
+   grouped under the needs that govern their use.
 
-/* If either of the pre_ functions malloc() something to return, the
- * corresponding post_ function had better free() it!
+   ------------------------------------------------------------------
+   Fundamental template functions
+
+   Do initialisation that can only be done after command line processing.
  */
-extern void* SK_( pre_syscall) ( ThreadId tid, UInt syscallno,
-                                 Bool is_blocking );
-extern void  SK_(post_syscall) ( ThreadId tid, UInt syscallno,
-                                 void* pre_result, Int res,
-                                 Bool is_blocking );
+void SK_(post_clo_init)(void);
+
+/* Instrument a basic block.  Must be a true function, ie. the same input
+   always results in the same output, because basic blocks can be
+   retranslated.  Unless you're doing something really strange...
+   'orig_addr' is the address of the first instruction in the block.
+ */
+UCodeBlock* SK_(instrument)(UCodeBlock* cb, Addr orig_addr);
+
+/* Finish up, print out any results, etc.  `exitcode' is program's exit
+   code.  The shadow (if the `shadow_regs' need is set) can be found with
+   VG_(get_shadow_archreg)(R_EBX), since %ebx holds the argument to the
+   exit() syscall.
+ */
+void SK_(fini)(Int exitcode);
+
+
+/* ------------------------------------------------------------------
+   VG_(needs).core_errors
+
+   (none needed)
+
+   ------------------------------------------------------------------
+   VG_(needs).skin_errors
+
+   Identify if two errors are equal, or equal enough.  `res' indicates how
+   close is "close enough".  `res' should be passed on as necessary, eg. if
+   the Error's `extra' part contains an ExeContext, `res' should be
+   passed to VG_(eq_ExeContext)() if the ExeContexts are considered.  Other
+   than that, probably don't worry about it unless you have lots of very
+   similar errors occurring.
+ */
+Bool SK_(eq_SkinError)(VgRes res, Error* e1, Error* e2);
+
+/* Print error context. */
+void SK_(pp_SkinError)(Error* err);
+
+/* Should fill in any details that could be postponed until after the
+   decision whether to ignore the error (ie. details not affecting the
+   result of SK_(eq_SkinError)()).  This saves time when errors are ignored.
+   Yuk.
+
+   Return value: must be the size of the `extra' part in bytes -- used by
+   the core to make a copy.
+ */
+UInt SK_(update_extra)(Error* err);
+
+/* Return value indicates recognition.  If recognised, must set skind using
+   VG_(set_supp_kind)().
+ */
+Bool SK_(recognised_suppression)(Char* name, Supp* su);
+
+/* Read any extra info for this suppression kind.  Most likely for filling
+   in the `extra' and `string' parts (with VG_(set_supp_{extra, string})())
+   of a suppression if necessary.  Should return False if a syntax error
+   occurred, True otherwise.
+ */
+Bool SK_(read_extra_suppression_info)(Int fd, Char* buf, Int nBuf, Supp* su);
+
+/* This should just check the kinds match and maybe some stuff in the
+   `string' and `extra' field if appropriate (using VG_(get_supp_*)() to
+   get the relevant suppression parts).
+ */
+Bool SK_(error_matches_suppression)(Error* err, Supp* su);
+
+/* This should return the suppression name, for --gen-suppressions, or NULL
+   if that error type cannot be suppressed.  This is the inverse of
+   SK_(recognised_suppression)().
+ */
+Char* SK_(get_error_name)(Error* err);
+
+/* This should print any extra info for the error, for --gen-suppressions,
+   including the newline.  This is the inverse of
+   SK_(read_extra_suppression_info)().
+ */
+void SK_(print_extra_suppression_info)(Error* err);
+
+
+/* ------------------------------------------------------------------
+   VG_(needs).basic_block_discards
+
+   Should discard any information that pertains to specific basic blocks
+   or instructions within the address range given.
+ */
+void SK_(discard_basic_block_info)(Addr a, UInt size);
+
+
+/* ------------------------------------------------------------------
+   VG_(needs).shadow_regs
+
+   No functions must be defined, but the post_reg[s]_write_* events should
+   be tracked.
+
+   ------------------------------------------------------------------
+   VG_(needs).command_line_options
+
+   Return True if option was recognised.  Presumably sets some state to
+   record the option as well.
+ */
+Bool SK_(process_cmd_line_option)(Char* argv);
+
+/* Print out command line usage for options for normal skin operation. */
+void SK_(print_usage)(void);
+
+/* Print out command line usage for options for debugging the skin. */
+void SK_(print_debug_usage)(void);
+
+/* ------------------------------------------------------------------
+   VG_(needs).client_requests
+
+   If using client requests, the number of the first request should be equal
+   to VG_USERREQ_SKIN_BASE('X', 'Y'), where 'X' and 'Y' form a suitable two
+   character identification for the string.  The second and subsequent
+   requests should follow.
+
+   This function should use the VG_IS_SKIN_USERREQ macro (in
+   include/valgrind.h) to first check if it's a request for this skin.  Then
+   should handle it if it's recognised (and return True), or return False if
+   not recognised.  arg_block[0] holds the request number, any further args
+   from the request are in arg_block[1..].  'ret' is for the return value...
+   it should probably be filled, if only with 0.
+ */
+Bool SK_(handle_client_request)(ThreadId tid, UInt* arg_block, UInt* ret);
+
+
+/* ------------------------------------------------------------------
+   VG_(needs).extends_UCode
+
+   'X' prefix indicates eXtended UCode.
+ */
+Int SK_(get_Xreg_usage)(UInstr* u, Tag tag, Int* regs, Bool* isWrites);
+void SK_(emit_XUInstr)(UInstr* u, RRegSet regs_live_before);
+Bool SK_(sane_XUInstr)(Bool beforeRA, Bool beforeLiveness, UInstr* u);
+Char * SK_(name_XUOpcode)(Opcode opc);
+void SK_(pp_XUInstr)(UInstr* u);
+
+
+/* ------------------------------------------------------------------
+   VG_(needs).syscall_wrapper
+
+   If either of the pre_ functions malloc() something to return, the
+   corresponding post_ function had better free() it!
+
+ */
+void * SK_(pre_syscall)(ThreadId tid, UInt syscallno, Bool is_blocking);
+void SK_(post_syscall)(ThreadId tid, UInt syscallno, void* pre_result, Int res, Bool is_blocking);
 
 
 /* ---------------------------------------------------------------------
-   VG_(needs).sanity_checks */
+     VG_(needs).sanity_checks
 
-/* Can be useful for ensuring a skin's correctness.  SK_(cheap_sanity_check)
+   Can be useful for ensuring a skin's correctness.  SK_(cheap_sanity_check)
    is called very frequently;  SK_(expensive_sanity_check) is called less
-   frequently and can be more involved. */
-extern Bool SK_(cheap_sanity_check)     ( void );
-extern Bool SK_(expensive_sanity_check) ( void );
+   frequently and can be more involved.
+ */
+Bool SK_(cheap_sanity_check)(void);
+Bool SK_(expensive_sanity_check)(void);
 
 
-#endif   /* NDEF __VG_SKIN_H */
+/* ================================================================================
+   Event tracking functions
 
-/*--------------------------------------------------------------------*/
-/*--- end                                                vg_skin.h ---*/
-/*--------------------------------------------------------------------*/
+   Events happening in core to track.  To be notified, pass a callback
+   function to the appropriate function.  To ignore an event, don't do
+   anything (default is for events to be ignored).
 
+   Note that most events aren't passed a ThreadId.  To find out the ThreadId
+   of the affected thread, use VG_(get_current_or_recent_tid)().  For the
+   ones passed a ThreadId, use that instead, since
+   VG_(get_current_or_recent_tid)() might not give the right ThreadId in
+   that case.
+
+   Memory events (Nb: to track heap allocation/freeing, a skin must replace
+   malloc() et al.  See above how to do this.)
+
+   These ones occur at startup, upon some signals, and upon some syscalls
+ */
+void SK_(new_mem_startup)(Addr a, UInt len, Bool rr, Bool ww, Bool xx);
+void SK_(new_mem_stack_signal)(Addr a, UInt len);
+void SK_(new_mem_brk)(Addr a, UInt len);
+void SK_(new_mem_mmap)(Addr a, UInt len, Bool rr, Bool ww, Bool xx);
+
+void SK_(copy_mem_remap)(Addr from, Addr to, UInt len);
+void SK_(change_mem_mprotect)(Addr a, UInt len, Bool rr, Bool ww, Bool xx);
+void SK_(die_mem_stack_signal)(Addr a, UInt len);
+void SK_(die_mem_brk)(Addr a, UInt len);
+void SK_(die_mem_munmap)(Addr a, UInt len);
+
+/* These ones are called when %esp changes.  A skin could track these itself
+   (except for ban_mem_stack) but it's much easier to use the core's help.
+
+   The specialised ones are called in preference to the general one, if they
+   are defined.  These functions are called a lot if they are used, so
+   specialising can optimise things significantly.  If any of the
+   specialised cases are defined, the general case must be defined too.
+
+   Nb: they must all use the __attribute__((regparm(n))) attribute.
+ */
+void SK_(new_mem_stack_4)(Addr new_ESP);
+void SK_(new_mem_stack_8)(Addr new_ESP);
+void SK_(new_mem_stack_12)(Addr new_ESP);
+void SK_(new_mem_stack_16)(Addr new_ESP);
+void SK_(new_mem_stack_32)(Addr new_ESP);
+void SK_(new_mem_stack)(Addr a, UInt len);
+
+void SK_(die_mem_stack_4)(Addr die_ESP);
+void SK_(die_mem_stack_8)(Addr die_ESP);
+void SK_(die_mem_stack_12)(Addr die_ESP);
+void SK_(die_mem_stack_16)(Addr die_ESP);
+void SK_(die_mem_stack_32)(Addr die_ESP);
+void SK_(die_mem_stack)(Addr a, UInt len);
+
+/* Used for redzone at end of thread stacks */
+void SK_(ban_mem_stack)(Addr a, UInt len);
+
+/* These ones occur around syscalls, signal handling, etc */
+void SK_(pre_mem_read)(CorePart part, ThreadId tid, Char* s, Addr a, UInt size);
+void SK_(pre_mem_read_asciiz)(CorePart part, ThreadId tid, Char* s, Addr a);
+void SK_(pre_mem_write)(CorePart part, ThreadId tid, Char* s, Addr a, UInt size);
+/* Not implemented yet -- have to add in lots of places, which is a
+   pain.  Won't bother unless/until there's a need.
+   void (*post_mem_read)  ( ThreadState* tst, Char* s, Addr a, UInt size );
+ */
+void SK_(post_mem_write)(Addr a, UInt size);
+
+
+/* Register events -- if `shadow_regs' need is set, all should probably be
+   used.  Use VG_(set_thread_shadow_archreg)() to set the shadow of the
+   changed register.
+
+   Use VG_(set_shadow_archreg)() to set the eight general purpose regs,
+   and use VG_(set_shadow_eflags)() to set eflags.
+ */
+void SK_(post_regs_write_init)(void);
+
+/* Use VG_(set_thread_shadow_archreg)() to set the shadow regs for these
+   events.
+ */
+void SK_(post_reg_write_syscall_return)(ThreadId tid, UInt reg);
+void SK_(post_reg_write_deliver_signal)(ThreadId tid, UInt reg);
+void SK_(post_reg_write_pthread_return)(ThreadId tid, UInt reg);
+void SK_(post_reg_write_clientreq_return)(ThreadId tid, UInt reg);
+/* This one is called for malloc() et al if they are replaced by a skin. */
+void SK_(post_reg_write_clientcall_return)(ThreadId tid, UInt reg, Addr f);
+
+
+/* Scheduler events (not exhaustive) */
+void SK_(thread_run)(ThreadId tid);
+
+
+/* Thread events (not exhaustive)
+
+   Called during thread create, before the new thread has run any
+   instructions (or touched any memory).
+ */
+void SK_(post_thread_create)(ThreadId tid, ThreadId child);
+void SK_(post_thread_join)(ThreadId joiner, ThreadId joinee);
+
+
+/* Mutex events (not exhaustive)
+   "void *mutex" is really a pthread_mutex *
+
+   Called before a thread can block while waiting for a mutex (called
+   regardless of whether the thread will block or not).
+ */
+void SK_(pre_mutex_lock)(ThreadId tid, void* mutex);
+/* Called once the thread actually holds the mutex (always paired with
+   pre_mutex_lock).
+ */
+void SK_(post_mutex_lock)(ThreadId tid, void* mutex);
+/* Called after a thread has released a mutex (no need for a corresponding
+   pre_mutex_unlock, because unlocking can't block).
+ */
+void SK_(post_mutex_unlock)(ThreadId tid, void* mutex);
+
+/* Signal events (not exhaustive)
+
+   ... pre_send_signal, post_send_signal ...
+
+   Called before a signal is delivered;  `alt_stack' indicates if it is
+   delivered on an alternative stack. 
+ */
+void SK_(pre_deliver_signal)(ThreadId tid, Int sigNo, Bool alt_stack);
+/* Called after a signal is delivered.  Nb: unfortunately, if the signal
+   handler longjmps, this won't be called.
+ */
+void SK_(post_deliver_signal)(ThreadId tid, Int sigNo);
+
+
+/* Others... condition variable...
+   ...
+
+   Shadow memory management
+ */
+void SK_(init_shadow_page)(Addr p);
+
+/* ================================================================================
+   malloc and friends
+ */
+void* SK_(malloc)(Int n);
+void* SK_(__builtin_new)(Int n);
+void* SK_(__builtin_vec_new)(Int n);
+void* SK_(memalign)(Int align, Int n);
+void* SK_(calloc)(Int nmemb, Int n);
+void SK_(free)(void* p);
+void SK_(__builtin_delete)(void* p);
+void SK_(__builtin_vec_delete)(void* p);
+void* SK_(realloc)(void* p, Int size);
+/* Generated by "gen_toolint.pl initproto" */
+
+#ifndef VG_toolint_initproto
+#define VG_toolint_initproto
+
+
+/* These are the parameterised functions in the core.  The default definitions
+   are overridden by LD_PRELOADed skin version.  At the very least, a skin
+   must define the fundamental template functions.  Depending on what needs
+   are set, extra template functions will be used too.  Functions are
+   grouped under the needs that govern their use.
+
+   ------------------------------------------------------------------
+   Fundamental template functions
+
+   Do initialisation that can only be done after command line processing.
+ */
+void VG_(init_post_clo_init)(void (*func)(void));
+
+/* Instrument a basic block.  Must be a true function, ie. the same input
+   always results in the same output, because basic blocks can be
+   retranslated.  Unless you're doing something really strange...
+   'orig_addr' is the address of the first instruction in the block.
+ */
+void VG_(init_instrument)(UCodeBlock* (*func)(UCodeBlock* cb, Addr orig_addr));
+
+/* Finish up, print out any results, etc.  `exitcode' is program's exit
+   code.  The shadow (if the `shadow_regs' need is set) can be found with
+   VG_(get_shadow_archreg)(R_EBX), since %ebx holds the argument to the
+   exit() syscall.
+ */
+void VG_(init_fini)(void (*func)(Int exitcode));
+
+
+/* ------------------------------------------------------------------
+   VG_(needs).core_errors
+
+   (none needed)
+
+   ------------------------------------------------------------------
+   VG_(needs).skin_errors
+
+   Identify if two errors are equal, or equal enough.  `res' indicates how
+   close is "close enough".  `res' should be passed on as necessary, eg. if
+   the Error's `extra' part contains an ExeContext, `res' should be
+   passed to VG_(eq_ExeContext)() if the ExeContexts are considered.  Other
+   than that, probably don't worry about it unless you have lots of very
+   similar errors occurring.
+ */
+void VG_(init_eq_SkinError)(Bool (*func)(VgRes res, Error* e1, Error* e2));
+
+/* Print error context. */
+void VG_(init_pp_SkinError)(void (*func)(Error* err));
+
+/* Should fill in any details that could be postponed until after the
+   decision whether to ignore the error (ie. details not affecting the
+   result of SK_(eq_SkinError)()).  This saves time when errors are ignored.
+   Yuk.
+
+   Return value: must be the size of the `extra' part in bytes -- used by
+   the core to make a copy.
+ */
+void VG_(init_update_extra)(UInt (*func)(Error* err));
+
+/* Return value indicates recognition.  If recognised, must set skind using
+   VG_(set_supp_kind)().
+ */
+void VG_(init_recognised_suppression)(Bool (*func)(Char* name, Supp* su));
+
+/* Read any extra info for this suppression kind.  Most likely for filling
+   in the `extra' and `string' parts (with VG_(set_supp_{extra, string})())
+   of a suppression if necessary.  Should return False if a syntax error
+   occurred, True otherwise.
+ */
+void VG_(init_read_extra_suppression_info)(Bool (*func)(Int fd, Char* buf, Int nBuf, Supp* su));
+
+/* This should just check the kinds match and maybe some stuff in the
+   `string' and `extra' field if appropriate (using VG_(get_supp_*)() to
+   get the relevant suppression parts).
+ */
+void VG_(init_error_matches_suppression)(Bool (*func)(Error* err, Supp* su));
+
+/* This should return the suppression name, for --gen-suppressions, or NULL
+   if that error type cannot be suppressed.  This is the inverse of
+   SK_(recognised_suppression)().
+ */
+void VG_(init_get_error_name)(Char* (*func)(Error* err));
+
+/* This should print any extra info for the error, for --gen-suppressions,
+   including the newline.  This is the inverse of
+   SK_(read_extra_suppression_info)().
+ */
+void VG_(init_print_extra_suppression_info)(void (*func)(Error* err));
+
+
+/* ------------------------------------------------------------------
+   VG_(needs).basic_block_discards
+
+   Should discard any information that pertains to specific basic blocks
+   or instructions within the address range given.
+ */
+void VG_(init_discard_basic_block_info)(void (*func)(Addr a, UInt size));
+
+
+/* ------------------------------------------------------------------
+   VG_(needs).shadow_regs
+
+   No functions must be defined, but the post_reg[s]_write_* events should
+   be tracked.
+
+   ------------------------------------------------------------------
+   VG_(needs).command_line_options
+
+   Return True if option was recognised.  Presumably sets some state to
+   record the option as well.
+ */
+void VG_(init_process_cmd_line_option)(Bool (*func)(Char* argv));
+
+/* Print out command line usage for options for normal skin operation. */
+void VG_(init_print_usage)(void (*func)(void));
+
+/* Print out command line usage for options for debugging the skin. */
+void VG_(init_print_debug_usage)(void (*func)(void));
+
+/* ------------------------------------------------------------------
+   VG_(needs).client_requests
+
+   If using client requests, the number of the first request should be equal
+   to VG_USERREQ_SKIN_BASE('X', 'Y'), where 'X' and 'Y' form a suitable two
+   character identification for the string.  The second and subsequent
+   requests should follow.
+
+   This function should use the VG_IS_SKIN_USERREQ macro (in
+   include/valgrind.h) to first check if it's a request for this skin.  Then
+   should handle it if it's recognised (and return True), or return False if
+   not recognised.  arg_block[0] holds the request number, any further args
+   from the request are in arg_block[1..].  'ret' is for the return value...
+   it should probably be filled, if only with 0.
+ */
+void VG_(init_handle_client_request)(Bool (*func)(ThreadId tid, UInt* arg_block, UInt* ret));
+
+
+/* ------------------------------------------------------------------
+   VG_(needs).extends_UCode
+
+   'X' prefix indicates eXtended UCode.
+ */
+void VG_(init_get_Xreg_usage)(Int (*func)(UInstr* u, Tag tag, Int* regs, Bool* isWrites));
+void VG_(init_emit_XUInstr)(void (*func)(UInstr* u, RRegSet regs_live_before));
+void VG_(init_sane_XUInstr)(Bool (*func)(Bool beforeRA, Bool beforeLiveness, UInstr* u));
+void VG_(init_name_XUOpcode)(Char * (*func)(Opcode opc));
+void VG_(init_pp_XUInstr)(void (*func)(UInstr* u));
+
+
+/* ------------------------------------------------------------------
+   VG_(needs).syscall_wrapper
+
+   If either of the pre_ functions malloc() something to return, the
+   corresponding post_ function had better free() it!
+
+ */
+void VG_(init_pre_syscall)(void * (*func)(ThreadId tid, UInt syscallno, Bool is_blocking));
+void VG_(init_post_syscall)(void (*func)(ThreadId tid, UInt syscallno, void* pre_result, Int res, Bool is_blocking));
+
+
+/* ---------------------------------------------------------------------
+     VG_(needs).sanity_checks
+
+   Can be useful for ensuring a skin's correctness.  SK_(cheap_sanity_check)
+   is called very frequently;  SK_(expensive_sanity_check) is called less
+   frequently and can be more involved.
+ */
+void VG_(init_cheap_sanity_check)(Bool (*func)(void));
+void VG_(init_expensive_sanity_check)(Bool (*func)(void));
+
+
+/* ================================================================================
+   Event tracking functions
+
+   Events happening in core to track.  To be notified, pass a callback
+   function to the appropriate function.  To ignore an event, don't do
+   anything (default is for events to be ignored).
+
+   Note that most events aren't passed a ThreadId.  To find out the ThreadId
+   of the affected thread, use VG_(get_current_or_recent_tid)().  For the
+   ones passed a ThreadId, use that instead, since
+   VG_(get_current_or_recent_tid)() might not give the right ThreadId in
+   that case.
+
+   Memory events (Nb: to track heap allocation/freeing, a skin must replace
+   malloc() et al.  See above how to do this.)
+
+   These ones occur at startup, upon some signals, and upon some syscalls
+ */
+void VG_(init_new_mem_startup)(void (*func)(Addr a, UInt len, Bool rr, Bool ww, Bool xx));
+void VG_(init_new_mem_stack_signal)(void (*func)(Addr a, UInt len));
+void VG_(init_new_mem_brk)(void (*func)(Addr a, UInt len));
+void VG_(init_new_mem_mmap)(void (*func)(Addr a, UInt len, Bool rr, Bool ww, Bool xx));
+
+void VG_(init_copy_mem_remap)(void (*func)(Addr from, Addr to, UInt len));
+void VG_(init_change_mem_mprotect)(void (*func)(Addr a, UInt len, Bool rr, Bool ww, Bool xx));
+void VG_(init_die_mem_stack_signal)(void (*func)(Addr a, UInt len));
+void VG_(init_die_mem_brk)(void (*func)(Addr a, UInt len));
+void VG_(init_die_mem_munmap)(void (*func)(Addr a, UInt len));
+
+/* These ones are called when %esp changes.  A skin could track these itself
+   (except for ban_mem_stack) but it's much easier to use the core's help.
+
+   The specialised ones are called in preference to the general one, if they
+   are defined.  These functions are called a lot if they are used, so
+   specialising can optimise things significantly.  If any of the
+   specialised cases are defined, the general case must be defined too.
+
+   Nb: they must all use the __attribute__((regparm(n))) attribute.
+ */
+void VG_(init_new_mem_stack_4)(void (*func)(Addr new_ESP));
+void VG_(init_new_mem_stack_8)(void (*func)(Addr new_ESP));
+void VG_(init_new_mem_stack_12)(void (*func)(Addr new_ESP));
+void VG_(init_new_mem_stack_16)(void (*func)(Addr new_ESP));
+void VG_(init_new_mem_stack_32)(void (*func)(Addr new_ESP));
+void VG_(init_new_mem_stack)(void (*func)(Addr a, UInt len));
+
+void VG_(init_die_mem_stack_4)(void (*func)(Addr die_ESP));
+void VG_(init_die_mem_stack_8)(void (*func)(Addr die_ESP));
+void VG_(init_die_mem_stack_12)(void (*func)(Addr die_ESP));
+void VG_(init_die_mem_stack_16)(void (*func)(Addr die_ESP));
+void VG_(init_die_mem_stack_32)(void (*func)(Addr die_ESP));
+void VG_(init_die_mem_stack)(void (*func)(Addr a, UInt len));
+
+/* Used for redzone at end of thread stacks */
+void VG_(init_ban_mem_stack)(void (*func)(Addr a, UInt len));
+
+/* These ones occur around syscalls, signal handling, etc */
+void VG_(init_pre_mem_read)(void (*func)(CorePart part, ThreadId tid, Char* s, Addr a, UInt size));
+void VG_(init_pre_mem_read_asciiz)(void (*func)(CorePart part, ThreadId tid, Char* s, Addr a));
+void VG_(init_pre_mem_write)(void (*func)(CorePart part, ThreadId tid, Char* s, Addr a, UInt size));
+/* Not implemented yet -- have to add in lots of places, which is a
+   pain.  Won't bother unless/until there's a need.
+   void (*post_mem_read)  ( ThreadState* tst, Char* s, Addr a, UInt size );
+ */
+void VG_(init_post_mem_write)(void (*func)(Addr a, UInt size));
+
+
+/* Register events -- if `shadow_regs' need is set, all should probably be
+   used.  Use VG_(set_thread_shadow_archreg)() to set the shadow of the
+   changed register.
+
+   Use VG_(set_shadow_archreg)() to set the eight general purpose regs,
+   and use VG_(set_shadow_eflags)() to set eflags.
+ */
+void VG_(init_post_regs_write_init)(void (*func)(void));
+
+/* Use VG_(set_thread_shadow_archreg)() to set the shadow regs for these
+   events.
+ */
+void VG_(init_post_reg_write_syscall_return)(void (*func)(ThreadId tid, UInt reg));
+void VG_(init_post_reg_write_deliver_signal)(void (*func)(ThreadId tid, UInt reg));
+void VG_(init_post_reg_write_pthread_return)(void (*func)(ThreadId tid, UInt reg));
+void VG_(init_post_reg_write_clientreq_return)(void (*func)(ThreadId tid, UInt reg));
+/* This one is called for malloc() et al if they are replaced by a skin. */
+void VG_(init_post_reg_write_clientcall_return)(void (*func)(ThreadId tid, UInt reg, Addr f));
+
+
+/* Scheduler events (not exhaustive) */
+void VG_(init_thread_run)(void (*func)(ThreadId tid));
+
+
+/* Thread events (not exhaustive)
+
+   Called during thread create, before the new thread has run any
+   instructions (or touched any memory).
+ */
+void VG_(init_post_thread_create)(void (*func)(ThreadId tid, ThreadId child));
+void VG_(init_post_thread_join)(void (*func)(ThreadId joiner, ThreadId joinee));
+
+
+/* Mutex events (not exhaustive)
+   "void *mutex" is really a pthread_mutex *
+
+   Called before a thread can block while waiting for a mutex (called
+   regardless of whether the thread will block or not).
+ */
+void VG_(init_pre_mutex_lock)(void (*func)(ThreadId tid, void* mutex));
+/* Called once the thread actually holds the mutex (always paired with
+   pre_mutex_lock).
+ */
+void VG_(init_post_mutex_lock)(void (*func)(ThreadId tid, void* mutex));
+/* Called after a thread has released a mutex (no need for a corresponding
+   pre_mutex_unlock, because unlocking can't block).
+ */
+void VG_(init_post_mutex_unlock)(void (*func)(ThreadId tid, void* mutex));
+
+/* Signal events (not exhaustive)
+
+   ... pre_send_signal, post_send_signal ...
+
+   Called before a signal is delivered;  `alt_stack' indicates if it is
+   delivered on an alternative stack. 
+ */
+void VG_(init_pre_deliver_signal)(void (*func)(ThreadId tid, Int sigNo, Bool alt_stack));
+/* Called after a signal is delivered.  Nb: unfortunately, if the signal
+   handler longjmps, this won't be called.
+ */
+void VG_(init_post_deliver_signal)(void (*func)(ThreadId tid, Int sigNo));
+
+
+/* Others... condition variable...
+   ...
+
+   Shadow memory management
+ */
+void VG_(init_init_shadow_page)(void (*func)(Addr p));
+
+/* ================================================================================
+   malloc and friends
+ */
+void VG_(init_malloc)(void* (*func)(Int n));
+void VG_(init___builtin_new)(void* (*func)(Int n));
+void VG_(init___builtin_vec_new)(void* (*func)(Int n));
+void VG_(init_memalign)(void* (*func)(Int align, Int n));
+void VG_(init_calloc)(void* (*func)(Int nmemb, Int n));
+void VG_(init_free)(void (*func)(void* p));
+void VG_(init___builtin_delete)(void (*func)(void* p));
+void VG_(init___builtin_vec_delete)(void (*func)(void* p));
+void VG_(init_realloc)(void* (*func)(void* p, Int size));
+
+#endif /* VG_toolint_initproto */
diff --git a/lackey/Makefile.am b/lackey/Makefile.am
index 202ac99..1906161 100644
--- a/lackey/Makefile.am
+++ b/lackey/Makefile.am
@@ -6,9 +6,15 @@
 		@PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
+inplacedir = $(top_srcdir)/.in_place
 
 val_PROGRAMS = vgskin_lackey.so
 
 vgskin_lackey_so_SOURCES = lk_main.c
 vgskin_lackey_so_LDFLAGS = -shared
 
+
+all-local:
+	mkdir -p $(inplacedir)
+	-rm -f $(inplacedir)/$(val_PROGRAMS)
+	ln -f -s $(top_srcdir)/$(subdir)/$(val_PROGRAMS) $(inplacedir)/$(val_PROGRAMS)
diff --git a/lackey/lk_main.c b/lackey/lk_main.c
index 79f9b9d..66e9132 100644
--- a/lackey/lk_main.c
+++ b/lackey/lk_main.c
@@ -31,8 +31,6 @@
 
 #include "vg_skin.h"
 
-VG_DETERMINE_INTERFACE_VERSION
-
 /* Nb: use ULongs because the numbers can get very big */
 static ULong n_dlrr_calls   = 0;
 static ULong n_BBs          = 0;
@@ -224,6 +222,9 @@
     VG_(message)(Vg_UserMsg, "Exit code:     %d", exitcode);
 }
 
+VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 0)
+
+
 /*--------------------------------------------------------------------*/
 /*--- end                                                lk_main.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/memcheck/Makefile.am b/memcheck/Makefile.am
index 3463d2c..9f92680 100644
--- a/memcheck/Makefile.am
+++ b/memcheck/Makefile.am
@@ -4,19 +4,25 @@
 all_includes = -I$(top_srcdir)/include
 
 AM_CPPFLAGS = $(all_includes) -DVG_LIBDIR="\"$(libdir)"\"
-AM_CFLAGS = $(WERROR) -Winline -Wall -Wshadow -O -fomit-frame-pointer \
+AM_CFLAGS = $(WERROR) -Winline -Wall -Wshadow -O2 -fomit-frame-pointer \
 		@PREFERRED_STACK_BOUNDARY@ -g
 AM_CCASFLAGS = $(all_includes)
 
 valdir = $(libdir)/valgrind
+inplacedir = $(top_srcdir)/.in_place
 
-val_PROGRAMS = vgskin_memcheck.so
+val_PROGRAMS = vgskin_memcheck.so vgpreload_memcheck.so
+
+vgpreload_memcheck_so_SOURCES = \
+	mac_replace_strmem.c
+vgpreload_memcheck_so_LDADD = $(top_srcdir)/coregrind/vg_replace_malloc.o
+vgpreload_memcheck_so_DEPENDENCIES = $(top_srcdir)/coregrind/vg_replace_malloc.o
+vgpreload_memcheck_so_LDFLAGS = -shared -Wl,-z,interpose,-z,initfirst
 
 vgskin_memcheck_so_SOURCES = \
 	mac_leakcheck.c \
 	mac_malloc_wrappers.c \
 	mac_needs.c \
-	mac_replace_strmem.c \
 	mc_main.c \
 	mc_clientreqs.c \
 	mc_errcontext.c \
@@ -24,7 +30,6 @@
 	mc_translate.c \
 	mc_helpers.S
 vgskin_memcheck_so_LDFLAGS = -shared
-vgskin_memcheck_so_LDADD = ../coregrind/vg_replace_malloc.o
 
 mcincludedir = $(includedir)/valgrind
 
@@ -38,3 +43,7 @@
 
 mac_replace_strmem.o: CFLAGS += -fno-omit-frame-pointer
 
+all-local:
+	mkdir -p $(inplacedir)
+	-rm -f $(addprefix $(inplacedir)/,$(val_PROGRAMS))
+	ln -f -s $(addprefix $(top_srcdir)/$(subdir)/,$(val_PROGRAMS)) $(inplacedir)
diff --git a/memcheck/mac_leakcheck.c b/memcheck/mac_leakcheck.c
index a596612..a11e128 100644
--- a/memcheck/mac_leakcheck.c
+++ b/memcheck/mac_leakcheck.c
@@ -311,10 +311,7 @@
       where the .bss segment has been put.  If you can, drop me a
       line.  
    */
-   if (VG_(within_stack)(a))                      return;
-   if (VG_(within_m_state_static_OR_threads)(a))  return;
-   if (a == (Addr)(&lc_min_mallocd_addr))         return;
-   if (a == (Addr)(&lc_max_mallocd_addr))         return;
+   if (!VG_(is_client_addr)(a))			  return;
 
    /* OK, let's get on and do something Useful for a change. */
 
diff --git a/memcheck/mac_replace_strmem.c b/memcheck/mac_replace_strmem.c
index 6ae5800..12560f6 100644
--- a/memcheck/mac_replace_strmem.c
+++ b/memcheck/mac_replace_strmem.c
@@ -31,8 +31,26 @@
 */
 
 #include "mc_include.h"
+#include "memcheck.h"
 #include "valgrind.h"
 
+static Addr record_overlap_error;
+
+static int init_done;
+
+/* Startup hook - called as init section */
+static void init(void) __attribute__((constructor));
+static void init(void) 
+{
+   if (init_done)
+      return;
+
+   VALGRIND_MAGIC_SEQUENCE(record_overlap_error, 0,
+			   _VG_USERREQ__MEMCHECK_GET_RECORD_OVERLAP,
+			   0, 0, 0, 0);
+   init_done = 1;
+}
+
 /* ---------------------------------------------------------------------
    The normal versions of these functions are hyper-optimised, which fools
    Memcheck and cause spurious value warnings.  So we replace them with
@@ -80,7 +98,8 @@
    OverlapExtra extra = {
       .src = (Addr)src, .dst = (Addr)dst, .len = -1,
    };
-   VALGRIND_NON_SIMD_CALL2( MAC_(record_overlap_error), s, &extra );
+   init();
+   VALGRIND_NON_SIMD_CALL2( record_overlap_error, s, &extra );
 }
 
 static __inline__
@@ -90,7 +109,8 @@
    OverlapExtra extra = {
       .src = (Addr)src, .dst = (Addr)dst, .len = n,
    };
-   VALGRIND_NON_SIMD_CALL2( MAC_(record_overlap_error), s, &extra );
+   init();
+   VALGRIND_NON_SIMD_CALL2( record_overlap_error, s, &extra );
 }
 
 char* strrchr ( const char* s, int c )
diff --git a/memcheck/mc_clientreqs.c b/memcheck/mc_clientreqs.c
index ddfb05d..e8c078c 100644
--- a/memcheck/mc_clientreqs.c
+++ b/memcheck/mc_clientreqs.c
@@ -232,6 +232,10 @@
                    ( tid, arg[1], arg[2], arg[3], True /* set them */ );
          break;
 
+      case _VG_USERREQ__MEMCHECK_GET_RECORD_OVERLAP:
+	 *ret = (Addr)MAC_(record_overlap_error);
+	 break;
+
       default:
          if (MAC_(handle_common_client_requests)(tid, arg, ret )) {
             return True;
diff --git a/memcheck/mc_from_ucode.c b/memcheck/mc_from_ucode.c
index e1f2877..6cfe815 100644
--- a/memcheck/mc_from_ucode.c
+++ b/memcheck/mc_from_ucode.c
@@ -246,16 +246,16 @@
                                         R_EBP, reg );
          break;
       case 2: 
-         VG_(emit_movzwl_offregmem_reg) ( VG_(shadow_reg_offset)(arch),
+         VG_(emit_movzwl_offregmem_reg) ( False, VG_(shadow_reg_offset)(arch),
                                           R_EBP, reg );
          VG_(emit_nonshiftopv_lit_reg) ( False, 4, OR, 0xFFFF0000, reg );
          break;
       case 1: 
          if (arch < 4) {
-            VG_(emit_movzbl_offregmem_reg) ( VG_(shadow_reg_offset)(arch),
+            VG_(emit_movzbl_offregmem_reg) ( False, VG_(shadow_reg_offset)(arch),
                                              R_EBP, reg );
          } else {
-            VG_(emit_movzbl_offregmem_reg) ( VG_(shadow_reg_offset)(arch-4)+1,
+            VG_(emit_movzbl_offregmem_reg) ( False, VG_(shadow_reg_offset)(arch-4)+1,
                                              R_EBP, reg );
          }
          VG_(emit_nonshiftopv_lit_reg) ( False, 4, OR, 0xFFFFFF00, reg );
diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c
index 8b46797..8f3fbc8 100644
--- a/memcheck/mc_main.c
+++ b/memcheck/mc_main.c
@@ -34,8 +34,6 @@
 #include "memcheck.h"   /* for client requests */
 //#include "vg_profile.c"
 
-VG_DETERMINE_INTERFACE_VERSION
-
 /* Define to debug the mem audit system. */
 /* #define VG_DEBUG_MEMORY */
 
@@ -118,7 +116,6 @@
 static SecMap* primary_map[ /*65536*/ 262144 ];
 static SecMap  distinguished_secondary_map;
 
-
 static void init_shadow_memory ( void )
 {
    Int i;
@@ -157,7 +154,7 @@
       although this isn't important, so the following assert is
       spurious. */
    sk_assert(0 == (sizeof(SecMap) % VKI_BYTES_PER_PAGE));
-   map = VG_(get_memory_from_mmap)( sizeof(SecMap), caller );
+   map = (SecMap *)VG_(shadow_alloc)(sizeof(SecMap));
 
    for (i = 0; i < 8192; i++)
       map->abits[i] = VGM_BYTE_INVALID; /* Invalid address */
@@ -1671,6 +1668,7 @@
    VG_(needs_extended_UCode)      ();
    VG_(needs_syscall_wrapper)     ();
    VG_(needs_sanity_checks)       ();
+   VG_(needs_shadow_memory)       ();
 
    MAC_( new_mem_heap)             = & mc_new_mem_heap;
    MAC_( ban_mem_heap)             = & MC_(make_noaccess);
@@ -1678,45 +1676,45 @@
    MAC_( die_mem_heap)             = & MC_(make_noaccess);
    MAC_(check_noaccess)            = & MC_(check_noaccess);
 
-   VG_(track_new_mem_startup)      ( & mc_new_mem_startup );
-   VG_(track_new_mem_stack_signal) ( & MC_(make_writable) );
-   VG_(track_new_mem_brk)          ( & MC_(make_writable) );
-   VG_(track_new_mem_mmap)         ( & mc_set_perms );
+   VG_(init_new_mem_startup)      ( & mc_new_mem_startup );
+   VG_(init_new_mem_stack_signal) ( & MC_(make_writable) );
+   VG_(init_new_mem_brk)          ( & MC_(make_writable) );
+   VG_(init_new_mem_mmap)         ( & mc_set_perms );
    
-   VG_(track_copy_mem_remap)       ( & mc_copy_address_range_state );
-   VG_(track_change_mem_mprotect)  ( & mc_set_perms );
+   VG_(init_copy_mem_remap)       ( & mc_copy_address_range_state );
+   VG_(init_change_mem_mprotect)  ( & mc_set_perms );
       
-   VG_(track_die_mem_stack_signal) ( & MC_(make_noaccess) ); 
-   VG_(track_die_mem_brk)          ( & MC_(make_noaccess) );
-   VG_(track_die_mem_munmap)       ( & MC_(make_noaccess) ); 
+   VG_(init_die_mem_stack_signal) ( & MC_(make_noaccess) ); 
+   VG_(init_die_mem_brk)          ( & MC_(make_noaccess) );
+   VG_(init_die_mem_munmap)       ( & MC_(make_noaccess) ); 
 
-   VG_(track_new_mem_stack_4)      ( & MAC_(new_mem_stack_4)  );
-   VG_(track_new_mem_stack_8)      ( & MAC_(new_mem_stack_8)  );
-   VG_(track_new_mem_stack_12)     ( & MAC_(new_mem_stack_12) );
-   VG_(track_new_mem_stack_16)     ( & MAC_(new_mem_stack_16) );
-   VG_(track_new_mem_stack_32)     ( & MAC_(new_mem_stack_32) );
-   VG_(track_new_mem_stack)        ( & MAC_(new_mem_stack)    );
+   VG_(init_new_mem_stack_4)      ( & MAC_(new_mem_stack_4)  );
+   VG_(init_new_mem_stack_8)      ( & MAC_(new_mem_stack_8)  );
+   VG_(init_new_mem_stack_12)     ( & MAC_(new_mem_stack_12) );
+   VG_(init_new_mem_stack_16)     ( & MAC_(new_mem_stack_16) );
+   VG_(init_new_mem_stack_32)     ( & MAC_(new_mem_stack_32) );
+   VG_(init_new_mem_stack)        ( & MAC_(new_mem_stack)    );
 
-   VG_(track_die_mem_stack_4)      ( & MAC_(die_mem_stack_4)  );
-   VG_(track_die_mem_stack_8)      ( & MAC_(die_mem_stack_8)  );
-   VG_(track_die_mem_stack_12)     ( & MAC_(die_mem_stack_12) );
-   VG_(track_die_mem_stack_16)     ( & MAC_(die_mem_stack_16) );
-   VG_(track_die_mem_stack_32)     ( & MAC_(die_mem_stack_32) );
-   VG_(track_die_mem_stack)        ( & MAC_(die_mem_stack)    );
+   VG_(init_die_mem_stack_4)      ( & MAC_(die_mem_stack_4)  );
+   VG_(init_die_mem_stack_8)      ( & MAC_(die_mem_stack_8)  );
+   VG_(init_die_mem_stack_12)     ( & MAC_(die_mem_stack_12) );
+   VG_(init_die_mem_stack_16)     ( & MAC_(die_mem_stack_16) );
+   VG_(init_die_mem_stack_32)     ( & MAC_(die_mem_stack_32) );
+   VG_(init_die_mem_stack)        ( & MAC_(die_mem_stack)    );
    
-   VG_(track_ban_mem_stack)        ( & MC_(make_noaccess) );
+   VG_(init_ban_mem_stack)        ( & MC_(make_noaccess) );
 
-   VG_(track_pre_mem_read)         ( & mc_check_is_readable );
-   VG_(track_pre_mem_read_asciiz)  ( & mc_check_is_readable_asciiz );
-   VG_(track_pre_mem_write)        ( & mc_check_is_writable );
-   VG_(track_post_mem_write)       ( & MC_(make_readable) );
+   VG_(init_pre_mem_read)         ( & mc_check_is_readable );
+   VG_(init_pre_mem_read_asciiz)  ( & mc_check_is_readable_asciiz );
+   VG_(init_pre_mem_write)        ( & mc_check_is_writable );
+   VG_(init_post_mem_write)       ( & MC_(make_readable) );
 
-   VG_(track_post_regs_write_init)             ( & mc_post_regs_write_init );
-   VG_(track_post_reg_write_syscall_return)    ( & mc_post_reg_write );
-   VG_(track_post_reg_write_deliver_signal)    ( & mc_post_reg_write );
-   VG_(track_post_reg_write_pthread_return)    ( & mc_post_reg_write );
-   VG_(track_post_reg_write_clientreq_return)  ( & mc_post_reg_write );
-   VG_(track_post_reg_write_clientcall_return) ( & mc_post_reg_write_clientcall );
+   VG_(init_post_regs_write_init)             ( & mc_post_regs_write_init );
+   VG_(init_post_reg_write_syscall_return)    ( & mc_post_reg_write );
+   VG_(init_post_reg_write_deliver_signal)    ( & mc_post_reg_write );
+   VG_(init_post_reg_write_pthread_return)    ( & mc_post_reg_write );
+   VG_(init_post_reg_write_clientreq_return)  ( & mc_post_reg_write );
+   VG_(init_post_reg_write_clientcall_return) ( & mc_post_reg_write_clientcall );
 
    /* Three compact slots taken up by stack memory helpers */
    VG_(register_compact_helper)((Addr) & MC_(helper_value_check4_fail));
@@ -1760,6 +1758,8 @@
    }
 }
 
+VG_DETERMINE_INTERFACE_VERSION(SK_(pre_clo_init), 9./8)
+
 /*--------------------------------------------------------------------*/
 /*--- end                                                mc_main.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/memcheck/memcheck.h b/memcheck/memcheck.h
index 5dc7c80..58284a0 100644
--- a/memcheck/memcheck.h
+++ b/memcheck/memcheck.h
@@ -89,7 +89,10 @@
       VG_USERREQ__FREELIKE_BLOCK__OLD_DO_NOT_USE,
 
       VG_USERREQ__GET_VBITS,
-      VG_USERREQ__SET_VBITS
+      VG_USERREQ__SET_VBITS,
+
+      /* This is just for memcheck's internal use - don't use it */
+      _VG_USERREQ__MEMCHECK_GET_RECORD_OVERLAP = VG_USERREQ_SKIN_BASE('M','C')+256,
    } Vg_MemCheckClientRequest;
 
 
diff --git a/memcheck/tests/badjump.stderr.exp b/memcheck/tests/badjump.stderr.exp
index 2bec474..a01f6ec 100644
--- a/memcheck/tests/badjump.stderr.exp
+++ b/memcheck/tests/badjump.stderr.exp
@@ -6,7 +6,7 @@
  Address 0x........ is not stack'd, malloc'd or free'd
 
 Process terminating with default action of signal 11 (SIGSEGV): dumping core
- Address not mapped to object at address 0x........
+ Access not within mapped region at address 0x........
    at 0x........: ???
    by 0x........: __libc_start_main (...libc...)
    by 0x........: ...
diff --git a/memcheck/tests/fprw.vgtest b/memcheck/tests/fprw.vgtest
index a5dfb42..6dfbf0c 100644
--- a/memcheck/tests/fprw.vgtest
+++ b/memcheck/tests/fprw.vgtest
@@ -1,2 +1,2 @@
-vgopts: --single-step=yes -q
+vgopts: -q
 prog:   fprw
diff --git a/memcheck/tests/nanoleak.supp b/memcheck/tests/nanoleak.supp
index 6c87853..584c93e 100644
--- a/memcheck/tests/nanoleak.supp
+++ b/memcheck/tests/nanoleak.supp
@@ -3,6 +3,5 @@
    Addrcheck,Memcheck:Leak
    fun:malloc
    fun:main
-   fun:__libc_start_main
 }
 
diff --git a/memcheck/tests/sigaltstack.c b/memcheck/tests/sigaltstack.c
index f310df5..279e315 100644
--- a/memcheck/tests/sigaltstack.c
+++ b/memcheck/tests/sigaltstack.c
@@ -26,6 +26,7 @@
   fprintf(stderr,"setting sigaction\n");
   act.sa_flags=SA_ONSTACK;
   act.sa_handler=&sig_handler;
+  sigemptyset(&act.sa_mask);
   res = sigaction(SIGUSR1,&act,0);
   fprintf(stderr, "res = %d\n", res);
   fprintf(stderr, "raising the signal\n");
diff --git a/memcheck/tests/sigaltstack.stderr.exp b/memcheck/tests/sigaltstack.stderr.exp
index 6c0b2fc..b95833f 100644
--- a/memcheck/tests/sigaltstack.stderr.exp
+++ b/memcheck/tests/sigaltstack.stderr.exp
@@ -1,8 +1,5 @@
 calling sigaltstack, stack base is 0x........
 setting sigaction
-Syscall param sigaction(act) contains uninitialised or unaddressable byte(s)
-   at 0x........: __libc_sigaction (...libc...)
- Address 0x........ is on thread 1's stack
 res = 0
 raising the signal
 caught signal, local var is on 0x........
diff --git a/memcheck/tests/threadederrno.c b/memcheck/tests/threadederrno.c
index bc05be5..d924ca4 100644
--- a/memcheck/tests/threadederrno.c
+++ b/memcheck/tests/threadederrno.c
@@ -2,22 +2,20 @@
 #include <pthread.h>
 #include <stdio.h>
 #include <errno.h>
-
+#include <string.h>
 
 
 void* thr2 ( void* v )
 {
   FILE* f = fopen("bogus2", "r");
-  printf("f2 = %p, errno2 = %d\n", f, errno);
-  perror("wurble2");
+  printf("f2 = %p, errno2 = %d (%s)\n", f, errno, strerror(errno));
   return NULL;
 }
 
 void* thr3 ( void* v )
 {
   FILE* f = fopen("bogus3", "r");
-  printf("f3 = %p, errno3 = %d\n", f, errno);
-  perror("wurble3");
+  printf("f3 = %p, errno3 = %d (%s)\n", f, errno, strerror(errno));
   return NULL;
 }
 
@@ -29,8 +27,7 @@
   pthread_create(&tid2, NULL, &thr2, NULL);
   pthread_create(&tid3, NULL, &thr3, NULL);
   f = fopen("bogus", "r");
-  printf("f1 = %p, errno1 = %d\n", f, errno);
-  perror("wurble1");
+  printf("f1 = %p, errno1 = %d (%s)\n", f, errno, strerror(errno));
   pthread_join(tid2, NULL);
   pthread_join(tid3, NULL);
   return 0;
diff --git a/memcheck/tests/threadederrno.stderr.exp b/memcheck/tests/threadederrno.stderr.exp
index 99cd1a3..e69de29 100644
--- a/memcheck/tests/threadederrno.stderr.exp
+++ b/memcheck/tests/threadederrno.stderr.exp
@@ -1,3 +0,0 @@
-wurble1: No such file or directory
-wurble2: No such file or directory
-wurble3: No such file or directory
diff --git a/memcheck/tests/threadederrno.stdout.exp b/memcheck/tests/threadederrno.stdout.exp
index a05dec2..c68b951 100644
--- a/memcheck/tests/threadederrno.stdout.exp
+++ b/memcheck/tests/threadederrno.stdout.exp
@@ -1,3 +1,3 @@
-f1 = (nil), errno1 = 2
-f2 = (nil), errno2 = 2
-f3 = (nil), errno3 = 2
+f1 = (nil), errno1 = 2 (No such file or directory)
+f2 = (nil), errno2 = 2 (No such file or directory)
+f3 = (nil), errno3 = 2 (No such file or directory)
diff --git a/memcheck/tests/zeropage.stderr.exp b/memcheck/tests/zeropage.stderr.exp
index e69de29..d3e3f43 100644
--- a/memcheck/tests/zeropage.stderr.exp
+++ b/memcheck/tests/zeropage.stderr.exp
@@ -0,0 +1,3 @@
+Warning: client syscall mmap2 tried to modify addresses 0x........-0x........
+Warning: client syscall mmap2 tried to modify addresses 0x........-0x........
+Warning: client syscall mmap2 tried to modify addresses 0x........-0x........
diff --git a/none/Makefile.am b/none/Makefile.am
index 8f8ce9d..95ab243 100644
--- a/none/Makefile.am
+++ b/none/Makefile.am
@@ -6,9 +6,14 @@
 		@PREFERRED_STACK_BOUNDARY@ -g
 
 valdir = $(libdir)/valgrind
+inplacedir = $(top_srcdir)/.in_place
 
 val_PROGRAMS = vgskin_none.so
 
 vgskin_none_so_SOURCES 	 = nl_main.c
-vgskin_none_so_LDFLAGS   = -shared
+vgskin_none_so_LDFLAGS   = -shared -Wl,-rpath,$(top_srcdir)/coregrind
 
+all-local:
+	mkdir -p $(inplacedir)
+	-rm -f $(inplacedir)/$(val_PROGRAMS)
+	ln -f -s $(top_srcdir)/$(subdir)/$(val_PROGRAMS) $(inplacedir)/$(val_PROGRAMS)
diff --git a/none/nl_main.c b/none/nl_main.c
index 844c63e..e864467 100644
--- a/none/nl_main.c
+++ b/none/nl_main.c
@@ -30,9 +30,22 @@
 
 #include "vg_skin.h"
 
-VG_DETERMINE_INTERFACE_VERSION
+//float SK_(shadow_ratio) = 9. / 8.;
 
-void SK_(pre_clo_init)(void)
+static void post_clo_init(void)
+{
+}
+
+static UCodeBlock* instrument(UCodeBlock* cb, Addr a)
+{
+    return cb;
+}
+
+static void fini(Int exitcode)
+{
+}
+
+static void pre_clo_init(void)
 {
    VG_(details_name)            ("Nulgrind");
    VG_(details_version)         (NULL);
@@ -42,20 +55,14 @@
    VG_(details_bug_reports_to)  (VG_BUGS_TO);
 
    /* No needs, no core events to track */
+
+   /* entrypoints */
+   VG_(init_post_clo_init)(post_clo_init);
+   VG_(init_instrument)(instrument);
+   VG_(init_fini)(fini);
 }
 
-void SK_(post_clo_init)(void)
-{
-}
-
-UCodeBlock* SK_(instrument)(UCodeBlock* cb, Addr a)
-{
-    return cb;
-}
-
-void SK_(fini)(Int exitcode)
-{
-}
+VG_DETERMINE_INTERFACE_VERSION(pre_clo_init, 0)
 
 /*--------------------------------------------------------------------*/
 /*--- end                                                nl_main.c ---*/
diff --git a/none/tests/Makefile.am b/none/tests/Makefile.am
index 65221fe..e6f0637 100644
--- a/none/tests/Makefile.am
+++ b/none/tests/Makefile.am
@@ -45,7 +45,7 @@
 	pth_blockedsig \
 	coolo_sigaction gxx304
 
-AM_CFLAGS   = $(WERROR) -Winline -Wall -Wshadow -g
+AM_CFLAGS   = $(WERROR) -Winline -Wall -Wshadow -g -I$(top_srcdir)/include
 AM_CXXFLAGS = $(AM_CFLAGS)
 
 # generic C ones
diff --git a/none/tests/smc1.c b/none/tests/smc1.c
index 0b0ebdf..2b28051 100644
--- a/none/tests/smc1.c
+++ b/none/tests/smc1.c
@@ -30,6 +30,7 @@
 */
 
 #include <stdio.h>
+#include "valgrind.h"
 
 typedef unsigned int Addr;
 typedef unsigned char UChar;
@@ -44,7 +45,7 @@
    printf("in p %d\n", n);
 }
 
-UChar code[100];
+UChar code[10];
 
 /* Make `code' be JMP-32 dest */
 void set_dest ( Addr dest )
@@ -58,6 +59,9 @@
    code[2] = ((delta >> 8) & 0xFF);
    code[3] = ((delta >> 16) & 0xFF);
    code[4] = ((delta >> 24) & 0xFF);
+
+   /* XXX this should be automatic */
+   VALGRIND_DISCARD_TRANSLATIONS(code, sizeof(code));
 }
 
 int main ( void )
diff --git a/none/tests/smc1.stdout.exp b/none/tests/smc1.stdout.exp
index d7fc032..cf04673 100644
--- a/none/tests/smc1.stdout.exp
+++ b/none/tests/smc1.stdout.exp
@@ -1,10 +1,10 @@
 in p 0
-in p 1
+in q 1
 in p 2
-in p 3
+in q 3
 in p 4
-in p 5
+in q 5
 in p 6
-in p 7
+in q 7
 in p 8
-in p 9
+in q 9
diff --git a/tests/filter_stderr_basic b/tests/filter_stderr_basic
index 55b2f29..ba07f02 100755
--- a/tests/filter_stderr_basic
+++ b/tests/filter_stderr_basic
@@ -23,6 +23,10 @@
 # Anonymise vg_libpthread lines
 sed "s/vg_libpthread.c:[0-9]\+/vg_libpthread.c:.../"                   |
 
+# Hide suppressed error counts
+sed "s/^\(ERROR SUMMARY[^(]*(suppressed: \)[0-9]\+\( from \)[0-9]\+)$/\10\20)/" |
+
+
 # Reduce some libc incompatibility
 sed "s/ __getsockname / getsockname /"                                 |
 sed "s/ __sigaction / sigaction /"                                     |
diff --git a/tests/vg_regtest.in b/tests/vg_regtest.in
index 2aa689b..84182d8 100755
--- a/tests/vg_regtest.in
+++ b/tests/vg_regtest.in
@@ -226,10 +226,8 @@
 
     # Pass the appropriate --tool option for the directory (can be overridden
     # by an "args:" or "args.dev:" line, though).  
-    # Also, because the default valgrind is coregrind/valgrind which isn't
-    # executable, prepend `sh'.
     my $tool=determine_tool();
-    mysystem("sh $valgrind --tool=$tool --in-place=$tests_dir $vgopts $prog $args > $name.stdout.out 2> $name.stderr.out");
+    mysystem("VALGRINDLIB=$tests_dir/.in_place $valgrind --tool=$tool $vgopts $prog $args > $name.stdout.out 2> $name.stderr.out");
 
     if (defined $stdout_filter) {
         mysystem("$stdout_filter < $name.stdout.out > $tmp");
diff --git a/valgrind.spec.in b/valgrind.spec.in
index 91f2307..f0ccaa3 100644
--- a/valgrind.spec.in
+++ b/valgrind.spec.in
@@ -41,8 +41,10 @@
 /usr/include/valgrind/vg_skin.h
 /usr/bin/valgrind
 /usr/bin/cg_annotate
+/usr/lib/valgrind
 /usr/lib/valgrind/*
 /usr/bin/valgrind-listener
+/usr/lib/pkgconfig/valgrind.pc
 
 %doc
 /usr/share/doc/valgrind/*