Merge the Ptrcheck tool from branches/PTRCHECK r8619.



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@8620 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/exp-ptrcheck/Makefile.am b/exp-ptrcheck/Makefile.am
new file mode 100644
index 0000000..c66b931
--- /dev/null
+++ b/exp-ptrcheck/Makefile.am
@@ -0,0 +1,129 @@
+include $(top_srcdir)/Makefile.tool.am
+
+noinst_PROGRAMS = 
+if VGP_X86_LINUX
+noinst_PROGRAMS += exp-ptrcheck-x86-linux vgpreload_exp-ptrcheck-x86-linux.so
+endif
+if VGP_AMD64_LINUX
+noinst_PROGRAMS += exp-ptrcheck-amd64-linux vgpreload_exp-ptrcheck-amd64-linux.so
+endif
+if VGP_PPC32_LINUX
+noinst_PROGRAMS += exp-ptrcheck-ppc32-linux vgpreload_exp-ptrcheck-ppc32-linux.so
+endif
+if VGP_PPC64_LINUX
+noinst_PROGRAMS += exp-ptrcheck-ppc64-linux vgpreload_exp-ptrcheck-ppc64-linux.so
+endif
+if VGP_PPC32_AIX5
+noinst_PROGRAMS += exp-ptrcheck-ppc32-aix5 vgpreload_exp-ptrcheck-ppc32-aix5.so
+endif
+if VGP_PPC64_AIX5
+noinst_PROGRAMS += exp-ptrcheck-ppc64-aix5 vgpreload_exp-ptrcheck-ppc64-aix5.so
+endif
+
+
+VGPRELOAD_EXP_PTRCHECK_SOURCES_COMMON = h_intercepts.c
+
+vgpreload_exp_ptrcheck_x86_linux_so_SOURCES      = $(VGPRELOAD_EXP_PTRCHECK_SOURCES_COMMON)
+vgpreload_exp_ptrcheck_x86_linux_so_CPPFLAGS     = $(AM_CPPFLAGS_X86_LINUX)
+vgpreload_exp_ptrcheck_x86_linux_so_CFLAGS       = $(AM_CFLAGS_X86_LINUX) $(AM_CFLAGS_PIC) -O2
+vgpreload_exp_ptrcheck_x86_linux_so_CCASFLAGS    = $(AM_CCASFLAGS_X86_LINUX)
+vgpreload_exp_ptrcheck_x86_linux_so_DEPENDENCIES = $(LIBREPLACEMALLOC_X86_LINUX)
+vgpreload_exp_ptrcheck_x86_linux_so_LDFLAGS      = \
+	$(PRELOAD_LDFLAGS_X86_LINUX) \
+	$(LIBREPLACEMALLOC_LDFLAGS_X86_LINUX)
+
+vgpreload_exp_ptrcheck_amd64_linux_so_SOURCES      = $(VGPRELOAD_EXP_PTRCHECK_SOURCES_COMMON)
+vgpreload_exp_ptrcheck_amd64_linux_so_CPPFLAGS     = $(AM_CPPFLAGS_AMD64_LINUX)
+vgpreload_exp_ptrcheck_amd64_linux_so_CFLAGS       = $(AM_CFLAGS_AMD64_LINUX) $(AM_CFLAGS_PIC) -O2
+vgpreload_exp_ptrcheck_amd64_linux_so_CCASFLAGS    = $(AM_CCASFLAGS_AMD64_LINUX)
+vgpreload_exp_ptrcheck_amd64_linux_so_DEPENDENCIES = $(LIBREPLACEMALLOC_AMD64_LINUX)
+vgpreload_exp_ptrcheck_amd64_linux_so_LDFLAGS      = \
+	$(PRELOAD_LDFLAGS_AMD64_LINUX) \
+	$(LIBREPLACEMALLOC_LDFLAGS_AMD64_LINUX)
+
+vgpreload_exp_ptrcheck_ppc32_linux_so_SOURCES      = $(VGPRELOAD_EXP_PTRCHECK_SOURCES_COMMON)
+vgpreload_exp_ptrcheck_ppc32_linux_so_CPPFLAGS     = $(AM_CPPFLAGS_PPC32_LINUX)
+vgpreload_exp_ptrcheck_ppc32_linux_so_CFLAGS       = $(AM_CFLAGS_PPC32_LINUX) $(AM_CFLAGS_PIC) -O2
+vgpreload_exp_ptrcheck_ppc32_linux_so_CCASFLAGS    = $(AM_CCASFLAGS_PPC32_LINUX)
+vgpreload_exp_ptrcheck_ppc32_linux_so_DEPENDENCIES = $(LIBREPLACEMALLOC_PPC32_LINUX)
+vgpreload_exp_ptrcheck_ppc32_linux_so_LDFLAGS      = \
+	$(PRELOAD_LDFLAGS_PPC32_LINUX) \
+	$(LIBREPLACEMALLOC_LDFLAGS_PPC32_LINUX)
+
+vgpreload_exp_ptrcheck_ppc64_linux_so_SOURCES      = $(VGPRELOAD_EXP_PTRCHECK_SOURCES_COMMON)
+vgpreload_exp_ptrcheck_ppc64_linux_so_CPPFLAGS     = $(AM_CPPFLAGS_PPC64_LINUX)
+vgpreload_exp_ptrcheck_ppc64_linux_so_CFLAGS       = $(AM_CFLAGS_PPC64_LINUX) $(AM_CFLAGS_PIC) -O2
+vgpreload_exp_ptrcheck_ppc64_linux_so_CCASFLAGS    = $(AM_CCASFLAGS_PPC64_LINUX)
+vgpreload_exp_ptrcheck_ppc64_linux_so_DEPENDENCIES = $(LIBREPLACEMALLOC_PPC64_LINUX)
+vgpreload_exp_ptrcheck_ppc64_linux_so_LDFLAGS      = \
+	$(PRELOAD_LDFLAGS_PPC64_LINUX) \
+	$(LIBREPLACEMALLOC_LDFLAGS_PPC64_LINUX)
+
+vgpreload_exp_ptrcheck_ppc32_aix5_so_SOURCES      = $(VGPRELOAD_EXP_PTRCHECK_SOURCES_COMMON)
+vgpreload_exp_ptrcheck_ppc32_aix5_so_CPPFLAGS     = $(AM_CPPFLAGS_PPC32_AIX5)
+vgpreload_exp_ptrcheck_ppc32_aix5_so_CFLAGS       = $(AM_CFLAGS_PPC32_AIX5) $(AM_CFLAGS_PIC) -O2
+vgpreload_exp_ptrcheck_ppc32_aix5_so_CCASFLAGS    = $(AM_CCASFLAGS_PPC32_AIX5)
+vgpreload_exp_ptrcheck_ppc32_aix5_so_DEPENDENCIES = $(LIBREPLACEMALLOC_PPC32_AIX5)
+vgpreload_exp_ptrcheck_ppc32_aix5_so_LDFLAGS      = \
+	$(PRELOAD_LDFLAGS_PPC32_AIX5) \
+	$(LIBREPLACEMALLOC_LDFLAGS_PPC32_AIX5)
+
+vgpreload_exp_ptrcheck_ppc64_aix5_so_SOURCES      = $(VGPRELOAD_EXP_PTRCHECK_SOURCES_COMMON)
+vgpreload_exp_ptrcheck_ppc64_aix5_so_CPPFLAGS     = $(AM_CPPFLAGS_PPC64_AIX5)
+vgpreload_exp_ptrcheck_ppc64_aix5_so_CFLAGS       = $(AM_CFLAGS_PPC64_AIX5) $(AM_CFLAGS_PIC) -O2
+vgpreload_exp_ptrcheck_ppc64_aix5_so_CCASFLAGS    = $(AM_CCASFLAGS_PPC64_AIX5)
+vgpreload_exp_ptrcheck_ppc64_aix5_so_DEPENDENCIES = $(LIBREPLACEMALLOC_PPC64_AIX5)
+vgpreload_exp_ptrcheck_ppc64_aix5_so_LDFLAGS      = \
+	$(PRELOAD_LDFLAGS_PPC64_AIX5) \
+	$(LIBREPLACEMALLOC_LDFLAGS_PPC64_AIX5)
+
+
+
+EXP_PTRCHECK_SOURCES_COMMON = \
+	pc_common.c h_main.c sg_main.c pc_main.c
+
+exp_ptrcheck_x86_linux_SOURCES      = $(EXP_PTRCHECK_SOURCES_COMMON)
+exp_ptrcheck_x86_linux_CPPFLAGS     = $(AM_CPPFLAGS_X86_LINUX)
+exp_ptrcheck_x86_linux_CFLAGS       = $(AM_CFLAGS_X86_LINUX)
+exp_ptrcheck_x86_linux_DEPENDENCIES = $(COREGRIND_LIBS_X86_LINUX)
+exp_ptrcheck_x86_linux_LDADD        = $(TOOL_LDADD_X86_LINUX)
+exp_ptrcheck_x86_linux_LDFLAGS      = $(TOOL_LDFLAGS_X86_LINUX)
+
+exp_ptrcheck_amd64_linux_SOURCES      = $(EXP_PTRCHECK_SOURCES_COMMON)
+exp_ptrcheck_amd64_linux_CPPFLAGS     = $(AM_CPPFLAGS_AMD64_LINUX)
+exp_ptrcheck_amd64_linux_CFLAGS       = $(AM_CFLAGS_AMD64_LINUX)
+exp_ptrcheck_amd64_linux_DEPENDENCIES = $(COREGRIND_LIBS_AMD64_LINUX)
+exp_ptrcheck_amd64_linux_LDADD        = $(TOOL_LDADD_AMD64_LINUX)
+exp_ptrcheck_amd64_linux_LDFLAGS      = $(TOOL_LDFLAGS_AMD64_LINUX)
+
+exp_ptrcheck_ppc32_linux_SOURCES      = $(EXP_PTRCHECK_SOURCES_COMMON)
+exp_ptrcheck_ppc32_linux_CPPFLAGS     = $(AM_CPPFLAGS_PPC32_LINUX)
+exp_ptrcheck_ppc32_linux_CFLAGS       = $(AM_CFLAGS_PPC32_LINUX)
+exp_ptrcheck_ppc32_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_LINUX)
+exp_ptrcheck_ppc32_linux_LDADD        = $(TOOL_LDADD_PPC32_LINUX)
+exp_ptrcheck_ppc32_linux_LDFLAGS      = $(TOOL_LDFLAGS_PPC32_LINUX)
+
+exp_ptrcheck_ppc64_linux_SOURCES      = $(EXP_PTRCHECK_SOURCES_COMMON)
+exp_ptrcheck_ppc64_linux_CPPFLAGS     = $(AM_CPPFLAGS_PPC64_LINUX)
+exp_ptrcheck_ppc64_linux_CFLAGS       = $(AM_CFLAGS_PPC64_LINUX)
+exp_ptrcheck_ppc64_linux_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_LINUX)
+exp_ptrcheck_ppc64_linux_LDADD        = $(TOOL_LDADD_PPC64_LINUX)
+exp_ptrcheck_ppc64_linux_LDFLAGS      = $(TOOL_LDFLAGS_PPC64_LINUX)
+
+exp_ptrcheck_ppc32_aix5_SOURCES      = $(EXP_PTRCHECK_SOURCES_COMMON)
+exp_ptrcheck_ppc32_aix5_CPPFLAGS     = $(AM_CPPFLAGS_PPC32_AIX5)
+exp_ptrcheck_ppc32_aix5_CFLAGS       = $(AM_CFLAGS_PPC32_AIX5)
+exp_ptrcheck_ppc32_aix5_DEPENDENCIES = $(COREGRIND_LIBS_PPC32_AIX5)
+exp_ptrcheck_ppc32_aix5_LDADD        = $(TOOL_LDADD_PPC32_AIX5)
+exp_ptrcheck_ppc32_aix5_LDFLAGS      = $(TOOL_LDFLAGS_PPC32_AIX5)
+
+exp_ptrcheck_ppc64_aix5_SOURCES      = $(EXP_PTRCHECK_SOURCES_COMMON)
+exp_ptrcheck_ppc64_aix5_CPPFLAGS     = $(AM_CPPFLAGS_PPC64_AIX5)
+exp_ptrcheck_ppc64_aix5_CFLAGS       = $(AM_CFLAGS_PPC64_AIX5)
+exp_ptrcheck_ppc64_aix5_DEPENDENCIES = $(COREGRIND_LIBS_PPC64_AIX5)
+exp_ptrcheck_ppc64_aix5_LDADD        = $(TOOL_LDADD_PPC64_AIX5)
+exp_ptrcheck_ppc64_aix5_LDFLAGS      = $(TOOL_LDFLAGS_PPC64_AIX5)
+
+noinst_HEADERS = h_main.h sg_main.h pc_common.h
+
+EXTRA_DIST = README.ABOUT.PTRCHECK.txt
diff --git a/exp-ptrcheck/README.ABOUT.PTRCHECK.txt b/exp-ptrcheck/README.ABOUT.PTRCHECK.txt
new file mode 100644
index 0000000..c528098
--- /dev/null
+++ b/exp-ptrcheck/README.ABOUT.PTRCHECK.txt
@@ -0,0 +1,370 @@
+
+0. CONTENTS
+~~~~~~~~~~~
+
+This document introduces Ptrcheck, a new, experimental Valgrind tool.
+It contains the following sections:
+
+   1. INTRODUCING PTRCHECK
+   2. HOW TO RUN IT
+   3. HOW IT WORKS: HEAP CHECKING
+   4. HOW IT WORKS: STACK & GLOBAL CHECKING
+   5. COMPARISON WITH MEMCHECK
+   6. LIMITATIONS
+   7. STILL TO DO -- User visible things
+   8. STILL TO DO -- Implementation tidying
+
+
+
+1. INTRODUCING PTRCHECK
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Ptrcheck is a Valgrind tool for finding overruns of heap, stack and
+global arrays.  Its functionality overlaps somewhat with Memcheck's,
+but it is able to catch invalid accesses in a number of cases that
+Memcheck would miss.  A detailed comparison against Memcheck is
+presented below.
+
+Ptrcheck is composed of two almost completely independent tools that
+have been glued together.  One part, in h_main.[ch], checks accesses
+through heap-derived pointers.  The other part, in sg_main.[ch],
+checks accesses to stack and global arrays.  The remaining files
+pc_{common,main}.[ch], provide common error-management and
+coordination functions, so as to make it appear as a single tool.
+
+The heap-check part is an extensively-hacked (largely rewritten)
+version of the experimental "Annelid" tool developed and described by
+Nicholas Nethercote and Jeremy Fitzhardinge.  The stack- and global-
+check part uses a heuristic approach derived from an observation about
+the likely forms of stack and global array accesses, and, as far as is
+known, is entirely novel.
+
+
+
+2. HOW TO RUN IT
+~~~~~~~~~~~~~~~~
+
+valgrind --tool=exp-ptrcheck [myprog] [args for myprog]
+
+There are no Ptrcheck specific flags at present.
+
+
+
+3. HOW IT WORKS: HEAP CHECKING
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Ptrcheck can check for invalid uses of heap pointers, including out of
+range accesses and accesses to freed memory.  The mechanism is however
+completely different from Memcheck's, and the checking is more
+powerful.
+
+For each pointer in the program, Ptrcheck keeps track of which heap
+block (if any) it was derived from.  Then, when an access is made
+through that pointer, Ptrcheck compares the access address with the
+bounds of the associated block, and reports an error if the address is
+out of bounds, or if the block has been freed.
+
+Of course it is rarely the case that one wants to access a block only
+at the exact address returned by malloc (et al).  Ptrcheck understands
+that adding or subtracting offsets from a pointer to a block results
+in a pointer to the same block.
+
+At a fundamental level, this scheme works because a correct program
+cannot make assumptions about the addresses returned by malloc.  In
+particular it cannot make any assumptions about the differences in
+addresses returned by subsequent calls to malloc.  Hence there are
+very few ways to take an address returned by malloc, modify it, and
+still have a valid address.  In short, the only allowable operations
+are adding and subtracting other non-pointer values.  Almost all other
+operations produce a value which cannot possibly be a valid pointer.
+
+
+
+4. HOW IT WORKS: STACK & GLOBAL CHECKING
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When a source file is compiled with "-g", the compiler attaches Dwarf3
+debugging information which describes the location of all stack and
+global arrays in the file.
+
+Checking of accesses to such arrays would then be relatively simple,
+if the compiler could also tell us which array (if any) each memory
+referencing instruction was supposed to access.  Unfortunately the
+Dwarf3 debugging format does not provide a way to represent such
+information, so we have to resort to a heuristic technique to
+approximate the same information.  The key observation is that
+
+   if a memory referencing instruction accesses inside a stack or
+   global array once, then it is highly likely to always access that
+   same array
+
+To see how this might be useful, consider the following buggy
+fragment:
+
+   { int i, a[10];  // both are auto vars
+     for (i = 0; i <= 10; i++)
+        a[i] = 42;
+   }
+
+At run time we will know the precise address of a[] on the stack, and
+so we can observer that the first store resulting from "a[i] = 42"
+writes a[], and we will (correctly) assume that that instruction is
+intended always to access a[].  Then, on the 11th iteration, it
+accesses somewhere else, possibly a different local, possibly an
+un-accounted for area of the stack (eg, spill slot), so Ptrcheck
+reports an error.
+
+There is an important caveat.
+
+Imagine a function such as memcpy, which is used to read and write
+many different areas of memory over the lifetime of the program.  If
+we insist that the read and write instructions in its memory copying
+loop only ever access one particular stack or global variable, we will
+be flooded with errors resulting from calls to memcpy.
+
+To avoid this problem, Ptrcheck instantiates fresh likely-target
+records for each entry to a function, and discards them on exit.  This
+allows detection of cases where (eg) memcpy overflows its source or
+destination buffers for any specific call, but does not carry any
+restriction from one call to the next.  Indeed, multiple threads may
+be multiple simultaneous calls to (eg) memcpy without mutual
+interference.
+
+
+
+5. COMPARISON WITH MEMCHECK
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Memcheck does not do any access checks for stack or global arrays, so
+the presence of those in Ptrcheck is a straight win.  (But see
+LIMITATIONS below).
+
+Memcheck and Ptrcheck use different approaches for checking heap
+accesses.  Memcheck maintains bitmaps telling it which areas of memory
+are accessible and which are not.  If a memory access falls in an
+unaccessible area, it reports an error.  By marking the 16 bytes
+before and after an allocated block unaccessible, Memcheck is able to
+detect small over- and underruns of the block.  Similarly, by marking
+freed memory as unaccessible, Memcheck can detect all accesses to
+freed memory.
+
+Memcheck's approach is simple.  But it's also weak.  It can't catch
+block overruns beyond 16 bytes.  And, more generally, because it
+focusses only on the question "is the target address accessible", it
+fails to detect invalid accesses which just happen to fall within some
+other valid area.  This is not improbable, especially in crowded areas
+of the process' address space.
+
+Ptrcheck's approach is to keep track of pointers derived from heap
+blocks.  It tracks pointers which are derived directly from calls to
+malloc et al, but also ones derived indirectly, by adding or
+subtracting offsets from the directly-derived pointers.  When a
+pointer is finally used to access memory, Ptrcheck compares the access
+address with that of the block it was originally derived from, and
+reports an error if the access address is not within the block bounds.
+
+Consequently Ptrcheck can detect any out of bounds access through a
+heap-derived pointer, no matter how far from the original block it is.
+
+A second advantage is that Ptrcheck is better at detecting accesses to
+blocks freed very far in the past.  Memcheck can detect these too, but
+only for blocks freed relatively recently.  To detect accesses to a
+freed block, Memcheck must make it inaccessible, hence requiring a
+space overhead proportional to the size of the block.  If the blocks
+are large, Memcheck will have to make them available for re-allocation
+relatively quickly, thereby losing the ability to detect invalid
+accesses to them.
+
+By contrast, Ptrcheck has a constant per-block space requirement of
+four machine words, for detection of accesses to freed blocks.  A
+freed block can be reallocated immediately, yet Ptrcheck can still
+detect all invalid accesses through any pointers derived from the old
+allocation, providing only that the four-word descriptor for the old
+allocation is stored.  For example, on a 64-bit machine, to detect
+accesses in any of the most recently freed 10 million blocks, Ptrcheck
+will require only 320MB of extra storage.  Achieveing the same level
+of detection with Memcheck is close to impossible and would likely
+involve several gigabytes of extra storage.
+
+In defense of Memcheck ...
+
+Remember that Memcheck performs uninitialised value checking, which
+Ptrcheck does not.  Memcheck has also benefitted from years of
+refinement, tuning, and experience with production-level usage, and so
+is much faster than Ptrcheck as it currently stands, as of September
+2008.
+
+Consequently it is recommended to first make your programs run
+Memcheck clean.  Once that's done, try Ptrcheck to see if you can
+shake out any further heap, global or stack errors.
+
+
+
+6. LIMITATIONS
+~~~~~~~~~~~~~~
+
+This is an experimental tool, which relies rather too heavily on some
+not-as-robust-as-I-would-like assumptions on the behaviour of correct
+programs.  There are a number of limitations which you should be aware
+of.
+
+* Heap checks: Ptrcheck can occasionally lose track of, or become
+  confused about, which heap block a given pointer has been derived
+  from.  This can cause it to falsely report errors, or to miss some
+  errors.  This is not believed to be a serious problem.
+
+* Heap checks: Ptrcheck only tracks pointers that are stored properly
+  aligned in memory.  If a pointer is stored at a misaligned address,
+  and then later read again, Ptrcheck will lose track of what it
+  points at.  Similar problem if a pointer is split into pieces and
+  later reconsitituted.
+
+* Heap checks: Ptrcheck needs to "understand" which system calls
+  return pointers and which don't.  Many, but not all system calls are
+  handled.  If an unhandled one is encountered, Ptrcheck will abort.
+
+* Stack checks: It follows from the description above (HOW IT WORKS:
+  STACK & GLOBAL CHECKING) that the first access by a memory
+  referencing instruction to a stack or global array creates an
+  association between that instruction and the array, which is checked
+  on subsequent accesses by that instruction, until the containing
+  function exits.  Hence, the first access by an instruction to an
+  array (in any given function instantiation) is not checked for
+  overrun, since Ptrcheck uses that as the "example" of how subsequent
+  accesses should behave.
+
+* Stack checks: Similarly, and more serious, it is clearly possible to
+  write legitimate pieces of code which break the basic assumption
+  upon which the stack/global checking rests.  For example:
+
+  { int a[10], b[10], *p, i;
+    for (i = 0; i < 10; i++) {
+       p = /* arbitrary condition */  ? &a[i]  : &b[i];
+       *p = 42;
+    }
+  }
+
+  In this case the store sometimes accesses a[] and sometimes b[], but
+  in no cases is the addressed array overrun.  Nevertheless the change
+  in target will cause an error to be reported.
+
+  It is hard to see how to get around this problem.  The only
+  mitigating factor is that such constructions appear very rare, at
+  least judging from the results using the tool so far.  Such a
+  construction appears only once in the Valgrind sources (running
+  Valgrind on Valgrind) and perhaps two or three times for a start and
+  exit of Firefox.  The best that can be done is to suppress the
+  errors.
+
+* Performance: the stack/global checks require reading all of the
+  Dwarf3 type and variable information on the executable and its
+  shared objects.  This is computationally expensive and makes startup
+  quite slow.  You can expect debuginfo reading time to be in the
+  region of a minute for an OpenOffice sized application, on a 2.4 GHz
+  Core 2 machine.  Reading this information also requires a lot of
+  memory.  To make it viable, Ptrcheck goes to considerable trouble to
+  compress the in-memory representation of the Dwarf3 data, which is
+  why the process of reading it appears slow.
+
+* Performance: Ptrcheck runs slower than Memcheck.  This is partly due
+  to a lack of tuning, but partly due to algorithmic difficulties.
+  The heap-check side is potentially quite fast.  The stack and global
+  checks can sometimes require a number of range checks per memory
+  access, and these are difficult to short-circuit (despite
+  considerable efforts having been made).
+
+* Coverage: the heap checking is relatively robust, requiring only
+  that Ptrcheck can see calls to malloc/free et al.  In that sense it
+  has debug-info requirements comparable with Memcheck, and is able to
+  heap-check programs even with no debugging information attached.
+
+  Stack/global checking is much more fragile.  If a shared object does
+  not have debug information attached, then Ptrcheck will not be able
+  to determine the bounds of any stack or global arrays defined within
+  that shared object, and so will not be able to check accesses to
+  them.  This is true even when those arrays are accessed from some
+  other shared object which was compiled with debug info.
+
+  At the moment Ptrcheck accepts objects lacking debuginfo without
+  comment.  This is dangerous as it causes Ptrcheck to silently skip
+  stack & global checking for such objects.  It would be better to
+  print a warning in such circumstances.
+
+* Coverage: Ptrcheck checks that the areas read or written by system
+  calls do not overrun heap blocks.  But it doesn't currently check
+  them for overruns stack and global arrays.  This would be easy to
+  add.
+
+* Performance: for implementation reasons, system call checking has a
+  cost proportional to the number of live and freed heap blocks being
+  tracked, and so can be very expensive.  This is stupid and could
+  easily be fixed (see "STILL TO DO -- User visible things" below).
+
+* Platforms: the stack/global checks won't work properly on any
+  PowerPC platforms, only on x86 and amd64 targets.  That's because
+  the stack and global checking requires tracking function calls and
+  exits reliably, and there's no obvious way to do it with the PPC
+  ABIs.  (cf with the x86 and amd64 ABIs this is relatively
+  straightforward.)
+
+* Robustness: related to the previous point.  Function call/exit
+  tracking for x86/amd64 is believed to work properly even in the
+  presence of longjmps within the same stack (although this has not
+  been tested).  However, code which switches stacks is likely to
+  cause breakage/chaos.
+
+
+7. STILL TO DO -- User visible things
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* Extend system call checking to work on stack and global arrays
+
+* Fix big performance problem to do with heap-vs-syscall checking.
+  How: in h_main.c: get rid of get_Seg_containing_addr_SLOW and
+  implement the same by doing a search in addr_to_seg_map.  This would
+  fix the heap-vs-syscall performance problem noted above.
+
+* Print a warning if a shared object does not have debug info attached
+
+
+
+8. STILL TO DO -- Implementation tidying
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Items marked CRITICAL are considered important for correctness:
+non-fixage of them is liable to lead to crashes or assertion failures
+in real use.
+
+* h_main.c: make N_FREED_SEGS command-line configurable
+
+* Maybe add command line options to enable only heap checking, or only
+  stack/global checking
+
+* sg_main.c: Improve the performance of the stack / global checks by
+  doing some up-front filtering to ignore references in areas which
+  "obviously" can't be stack or globals.  This will require
+  using information that m_aspacemgr knows about the address space
+  layout.
+
+* h_main.c: get rid of the last_seg_added hack; add suitable plumbing
+  to the core/tool interface to do this cleanly
+
+* h_main.c: move vast amounts of arch-dependent uglyness
+  (get_IntRegInfo et al) to its own source file, a la mc_machine.c.
+
+* h_main.c: make the lossage-check stuff work again, as a way of doing
+  quality assurance on the implementation
+
+* h_main.c: schemeEw_Atom: don't generate a call to nonptr_or_unknown,
+  this is really stupid, since it could be done at translation time
+  instead
+
+* CRITICAL: h_main.c: h_instrument (main instrumentation fn): generate
+  shadows for word-sized temps defined in the block's preamble.  (Why
+  does this work at all, as it stands?)
+
+* CRITICAL: sg_main.c: make preen_Invar work properly again.  Why
+  isn't it being called?
+
+* sg_main.c: fix compute_II_hash to make it a bit more sensible
+  for ppc32/64 targets
diff --git a/exp-ptrcheck/h_intercepts.c b/exp-ptrcheck/h_intercepts.c
new file mode 100644
index 0000000..8b95cb5
--- /dev/null
+++ b/exp-ptrcheck/h_intercepts.c
@@ -0,0 +1,163 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Ptrcheck: a pointer-use checker.             pc_intercepts.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Ptrcheck, a Valgrind tool for checking pointer
+   use in programs.
+
+   Copyright (C) 2003-2008 Nicholas Nethercote
+      njn@valgrind.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Nothing actually in here.  However it appears this file is needed
+   to make malloc intercepting work. (jrs, 2 july 08 -- not sure about
+   that).
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_hashtable.h"
+#include "pub_tool_redir.h"
+#include "pub_tool_tooliface.h"
+#include "valgrind.h"
+
+
+/* The following intercepts are copied verbatim from
+   memcheck/mc_replace_strmem.c. */
+
+/* --------- Some handy Z-encoded names. --------- */
+
+/* --- Soname of the standard C library. --- */
+
+#if defined(VGO_linux)
+#  define  m_libc_soname     libcZdsoZa              // libc.so*
+#elif defined(VGP_ppc32_aix5)
+   /* AIX has both /usr/lib/libc.a and /usr/lib/libc_r.a. */
+#  define  m_libc_soname     libcZaZdaZLshrZdoZR     // libc*.a(shr.o)
+#elif defined(VGP_ppc64_aix5)
+#  define  m_libc_soname     libcZaZdaZLshrZu64ZdoZR // libc*.a(shr_64.o)
+#else
+#  error "Unknown platform"
+#endif
+
+/* --- Sonames for Linux ELF linkers. --- */
+
+#define  m_ld_linux_so_2         ldZhlinuxZdsoZd2           // ld-linux.so.2
+#define  m_ld_linux_x86_64_so_2  ldZhlinuxZhx86Zh64ZdsoZd2  // ld-linux-x86-64.so.2
+#define  m_ld64_so_1             ld64ZdsoZd1                // ld64.so.1
+#define  m_ld_so_1               ldZdsoZd1                  // ld.so.1
+
+
+
+
+#define STRCMP(soname, fnname) \
+   int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
+          ( const char* s1, const char* s2 ); \
+   int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
+          ( const char* s1, const char* s2 ) \
+   { \
+      register unsigned char c1; \
+      register unsigned char c2; \
+      while (True) { \
+         c1 = *(unsigned char *)s1; \
+         c2 = *(unsigned char *)s2; \
+         if (c1 != c2) break; \
+         if (c1 == 0) break; \
+         s1++; s2++; \
+      } \
+      if ((unsigned char)c1 < (unsigned char)c2) return -1; \
+      if ((unsigned char)c1 > (unsigned char)c2) return 1; \
+      return 0; \
+   }
+
+STRCMP(m_libc_soname,          strcmp)
+STRCMP(m_ld_linux_x86_64_so_2, strcmp)
+STRCMP(m_ld64_so_1,            strcmp)
+
+
+// Note that this replacement often doesn't get used because gcc inlines
+// calls to strlen() with its own built-in version.  This can be very
+// confusing if you aren't expecting it.  Other small functions in this file
+// may also be inline by gcc.
+#define STRLEN(soname, fnname) \
+   SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* str ); \
+   SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* str ) \
+   { \
+      SizeT i = 0; \
+      while (str[i] != 0) i++; \
+      return i; \
+   }
+
+STRLEN(m_libc_soname,          strlen)
+STRLEN(m_ld_linux_so_2,        strlen)
+STRLEN(m_ld_linux_x86_64_so_2, strlen)
+
+
+#define MEMCPY(soname, fnname) \
+   void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
+            ( void *dst, const void *src, SizeT sz ); \
+   void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
+            ( void *dest, const void *src, SizeT sz ) \
+   { \
+   const UChar*  s  = (const UChar*)src; \
+         UChar*  d  =       (UChar*)dest; \
+   const UWord*  sW = (const UWord*)src; \
+         UWord*  dW =       (UWord*)dest; \
+   const UWord   al = sizeof(UWord)-1; \
+   \
+   if (0 == (((UWord)dW) & al) && 0 == (((UWord)sW) & al)) { \
+      while (sz >= 4 * sizeof(UWord)) { \
+         dW[0] = sW[0]; \
+         dW[1] = sW[1]; \
+         dW[2] = sW[2]; \
+         dW[3] = sW[3]; \
+         sz -= 4 * sizeof(UWord); \
+         dW += 4; \
+         sW += 4; \
+      } \
+      if (sz == 0) \
+         return dest; \
+      while (sz >= 1 * sizeof(UWord)) { \
+         dW[0] = sW[0]; \
+         sz -= 1 * sizeof(UWord); \
+         dW += 1; \
+         sW += 1; \
+      } \
+      if (sz == 0) \
+         return dest; \
+      s = (const UChar*)sW; \
+      d = (UChar*)dW; \
+   } \
+   \
+   while (sz--) \
+      *d++ = *s++; \
+   \
+   return dest; \
+   }
+
+MEMCPY(m_libc_soname, memcpy)
+MEMCPY(m_ld_so_1,     memcpy) /* ld.so.1 */
+MEMCPY(m_ld64_so_1,   memcpy) /* ld64.so.1 */
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                          pc_intercepts.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/exp-ptrcheck/h_main.c b/exp-ptrcheck/h_main.c
new file mode 100644
index 0000000..fe01712
--- /dev/null
+++ b/exp-ptrcheck/h_main.c
@@ -0,0 +1,4669 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Ptrcheck: a pointer-use checker.                             ---*/
+/*--- This file checks heap accesses.                              ---*/
+/*---                                                     h_main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Ptrcheck, a Valgrind tool for checking pointer
+   use in programs.
+
+   Initial version (Annelid):
+
+   Copyright (C) 2003-2008 Nicholas Nethercote
+      njn@valgrind.org
+
+   Valgrind-3.X port:
+
+   Copyright (C) 2008-2008 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+// FIXME: 64-bit cleanness, check the following
+// struct _ISNode.ownerCount is 32-bit
+// struct _ISNode.topLevel is 32-bit
+// or is that not really right now?  add assertion checks about
+// the max size of a node
+
+// FIXME: should we shadow %RIP?  Maybe not.
+
+// FIXME: shadows of temporaries created in preamble, a la memcheck?
+
+// FIXME: result of add_new_segment is always ignored
+
+// FIXME: the mechanism involving last_seg_added is really ugly.
+// Do something cleaner.
+
+// FIXME: post_reg_write_clientcall: check function pointer comparisons
+// are safe on toc-afflicted platforms
+
+// FIXME: tidy up findShadowTmp
+
+// FIXME: post_reg_write_demux(Vg_CoreSysCall) is redundant w.r.t.
+// the default 'NONPTR' behaviour of post_syscall.  post_reg_write_demux
+// is called first, then post_syscall.
+
+// FIXME: check nothing is mapped in the lowest 1M of memory at
+// startup, or quit (to do with nonptr_or_unknown, also sync 1M
+// magic value with PIE default load address in m_ume.c.
+
+// FIXME: consider whether we could paint memory acquired from
+// sys_read etc as NONPTR rather than UNKNOWN.
+
+// XXX: recycle freed segments
+
+//--------------------------------------------------------------
+// Metadata:
+//   HeapBlock.id :: Seg (stored as heap shadowchunk; always non-zero)
+//   MemLoc.aseg  :: Seg (implicitly stored)
+//   MemLoc.vseg  :: Seg (explicitly stored as the shadow memory)
+//   RegLoc.vseg  :: Seg (explicitly stored as shadow registers)
+//
+// A Seg is made when new memory is created, eg. with malloc() or mmap().
+// There are two other Segs:
+//  - NONPTR:  for something that's definitely not a pointer
+//  - UNKNOWN: for something that could be a pointer
+//  - BOTTOM:  used with pointer differences (see below)
+//
+// MemLoc.vseg is done at word granularity.  If a pointer is written
+// to memory misaligned, the information about it will be lost -- it's
+// treated as two sub-word writes to two adjacent words.  This avoids
+// certain nasty cases that could arise if we tried to track unaligned
+// pointers.  Fortunately, misalignment is rare so we don't lose much
+// information this way.
+//
+// MemLoc.aseg is done at byte granularity, and *implicitly* -- ie. not
+// directly accessible like MemLoc.vseg, but only by searching through all
+// the segments.  Fortunately, it's mostly checked at LOADs/STOREs;  at that
+// point we have a pointer p to the MemLoc m as the other arg of the
+// LOAD/STORE, so we can check to see if the p.vseg's range includes m.  If
+// not, it's an error and we have to search through all segments to find out
+// what m.aseg really is.  That's still pretty fast though, thanks to the
+// interval skip-list used.  With syscalls we must also do the skip-list
+// search, but only on the first and last bytes touched.
+//--------------------------------------------------------------
+
+//--------------------------------------------------------------
+// Assumptions, etc:
+// - see comment at top of SK_(instrument)() for how sub-word ops are
+//   handled.
+//
+// - ioctl(), socketcall() (and ipc() will be) assumed to return non-pointers
+//
+// - FPU_W is assumed to never write pointers.
+//
+// - Assuming none of the post_mem_writes create segments worth tracking.
+//
+// - Treating mmap'd segments (all! including code) like heap segments.  But
+//   their ranges can change, new ones can be created by unmapping parts of
+//   old segments, etc.  But this nasty behaviour seems to never happen -- 
+//   there are assertions checking it.
+//--------------------------------------------------------------
+
+//--------------------------------------------------------------
+// What I am checking:
+// - Type errors:
+//    * ADD, OR, LEA2: error if two pointer inputs.
+//    * ADC, SBB: error if one or two pointer inputs.
+//    * AND, OR: error if two unequal pointer inputs.
+//    * NEG: error if pointer input.
+//    * {,i}mul_32_64 if either input is a pointer.
+//    * shldl/shrdl, bsf/bsr if any inputs are pointers.
+//
+// - LOAD, STORE:
+//    * ptr.vseg must match ptee.aseg.
+//    * ptee.aseg must not be a freed segment.
+//
+// - syscalls: for those accessing memory, look at first and last bytes:
+//    * check first.aseg == last.aseg
+//    * check first.aseg and last.aseg are not freed segments.
+//
+// What I am not checking, that I expected to when I started:
+// - AND, XOR: allowing two pointers to be used if both from the same segment,
+//   because "xor %r,%r" is commonly used to zero %r, and "test %r,%r"
+//   (which is translated with an AND) is common too.
+//
+// - div_64_32/idiv_64_32 can take pointer inputs for the dividend;
+//   division doesn't make sense, but modulo does, and they're done with the
+//   same instruction.  (Could try to be super-clever and watch the outputs
+//   to see if the quotient is used, but not worth it.)
+//
+// - mul_64_32/imul_64_32 can take pointers inputs for one arg or the
+//   other, but not both.  This is because some programs (eg. Mozilla
+//   Firebird) multiply pointers in hash routines.
+//
+// - NEG: can take a pointer.  It happens in glibc in a few places.  I've
+//   seen the code, didn't understand it, but it's done deliberately.
+//
+// What I am not checking/doing, but could, but it would require more
+// instrumentation and/or slow things down a bit:
+// - SUB: when differencing two pointers, result is BOTTOM, ie. "don't
+//   check".  Could link segments instead, slower but a bit more accurate.
+//   Also use BOTTOM when doing (ptr - unknown), which could be a pointer
+//   difference with a stack/static pointer.
+//
+// - PUTF: input should be non-pointer
+//
+// - arithmetic error messages: eg. for adding two pointers, just giving the
+//   segments, not the actual pointers.
+//
+// What I am not checking, and would be difficult:
+// - mmap(...MAP_FIXED...) is not handled specially.  It might be used in
+//   ways that fool Ptrcheck into giving false positives.
+//
+// - syscalls: for those accessing memory, not checking that the asegs of the
+//   accessed words match the vseg of the accessing pointer, because the
+//   vseg is not easily accessible at the required time (would required
+//   knowing for every syscall which register each arg came in, and looking
+//   there).
+//
+// What I am not checking, and would be difficult, but doesn't matter:
+// - free(p): similar to syscalls, not checking that the p.vseg matches the
+//   aseg of the first byte in the block.  However, Memcheck does an
+//   equivalent "bad free" check using shadow_chunks;  indeed, Ptrcheck could
+//   do the same check, but there's no point duplicating functionality.  So
+//   no loss, really.
+//
+// Other:
+// - not doing anything with mprotect();  probably not worth the effort.
+//--------------------------------------------------------------
+
+//--------------------------------------------------------------
+// Todo:
+// - Segments for stack frames.  Would detect (some, large) stack
+//   over/under-runs, dangling pointers.
+//
+// - Segments for static data.  Would detect over/under-runs.  Requires
+//   reading debug info.
+//--------------------------------------------------------------
+
+//--------------------------------------------------------------
+// Some profiling results:
+//                                                 twolf   konq    date sz
+// 1. started                                              35.0s   14.7
+// 2. introduced GETV/PUTV                                 30.2s   10.1
+// 3. inlined check_load_or_store                  5.6s    27.5s   10.1
+// 4. (made check_load, check_store4 regparm(0))          (27.9s) (11.0)
+// 5. um, not sure                                 5.3s    27.3s   10.6
+//    ...
+// 6. after big changes, corrections              11.2s    32.8s   14.0
+// 7. removed link-segment chasing in check/L/S    8.9s    30.8s   14.0
+// 8. avoiding do_lea1 if k is a nonptr            8.0s    28.0s   12.9
+//--------------------------------------------------------------
+
+//#include "vg_skin.h"
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcprint.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_mallocfree.h"
+#include "pub_tool_execontext.h"
+#include "pub_tool_hashtable.h"
+#include "pub_tool_tooliface.h"
+#include "pub_tool_replacemalloc.h"
+#include "pub_tool_options.h"
+#include "pub_tool_execontext.h"
+#include "pub_tool_aspacemgr.h"    // VG_(am_shadow_malloc)
+#include "pub_tool_vki.h"          // VKI_MAX_PAGE_SIZE
+#include "pub_tool_machine.h"      // VG_({get,set}_shadow_regs_area) et al
+#include "pub_tool_debuginfo.h"    // VG_(get_fnname)
+#include "pub_tool_threadstate.h"  // VG_(get_running_tid)
+#include "pub_tool_oset.h"
+#include "pub_tool_vkiscnums.h"
+#include "pub_tool_machine.h"
+#include "pub_tool_wordfm.h"
+#include "pub_tool_xarray.h"
+
+#include "pc_common.h"
+
+//#include "h_list.h"
+#include "h_main.h"
+
+#include "sg_main.h"   // sg_instrument_*, and struct _SGEnv
+
+
+
+/*------------------------------------------------------------*/
+/*--- Debug/trace options                                  ---*/
+/*------------------------------------------------------------*/
+
+/* Set to 1 to do sanity checks on Seg values in many places, which
+   checks if bogus Segs are in circulation.  Quite expensive from a
+   performance point of view. */
+#define SC_SEGS 0
+
+static ULong stats__client_mallocs = 0;
+static ULong stats__client_frees   = 0;
+static ULong stats__segs_allocd    = 0;
+static ULong stats__segs_recycled  = 0;
+static ULong stats__slow_searches  = 0;
+static ULong stats__slow_totcmps   = 0;
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// Segments low level storage                               //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+// NONPTR, UNKNOWN, BOTTOM defined in h_main.h since 
+// pc_common.c needs to see them, for error processing
+
+// we only start recycling segs when this many exist
+#define N_FREED_SEGS (1 * 1000 * 1000)
+
+struct _Seg {
+   Addr  addr;
+   SizeT szB; /* may be zero */
+   ExeContext* ec;  /* where malloc'd or freed */
+   /* When 1, indicates block is in use.  Otherwise, used to form a
+      linked list of freed blocks, running from oldest freed block to
+      the most recently freed block. */
+   struct _Seg* nextfree;
+};
+
+// Determines if 'a' is before, within, or after seg's range.  Sets 'cmp' to
+// -1/0/1 accordingly.  Sets 'n' to the number of bytes before/within/after.
+void Seg__cmp(Seg* seg, Addr a, Int* cmp, UWord* n)
+{
+   if (a < seg->addr) {
+      *cmp = -1;
+      *n   = seg->addr - a;
+   } else if (a < seg->addr + seg->szB && seg->szB > 0) {
+      *cmp = 0;
+      *n = a - seg->addr;
+   } else {
+      *cmp = 1;
+      *n = a - (seg->addr + seg->szB);
+   }
+}
+
+inline Bool Seg__is_freed(Seg* seg)
+{
+   if (!is_known_segment(seg))
+      return False;
+   else
+      return seg->nextfree != (Seg*)1;
+}
+
+ExeContext* Seg__where(Seg* seg)
+{
+   tl_assert(is_known_segment(seg));
+   return seg->ec;
+}
+
+SizeT Seg__size(Seg* seg)
+{
+   tl_assert(is_known_segment(seg));
+   return seg->szB;
+}
+
+Addr Seg__addr(Seg* seg)
+{
+   tl_assert(is_known_segment(seg));
+   return seg->addr;
+}
+
+
+#define N_SEGS_PER_GROUP 10000
+
+typedef
+   struct _SegGroup {
+      struct _SegGroup* admin;
+      UWord nextfree; /* 0 .. N_SEGS_PER_GROUP */
+      Seg segs[N_SEGS_PER_GROUP];
+   }
+   SegGroup;
+
+static SegGroup* group_list = NULL;
+static UWord     nFreeSegs = 0;
+static Seg*      freesegs_youngest = NULL;
+static Seg*      freesegs_oldest = NULL;
+
+
+static SegGroup* new_SegGroup ( void ) {
+   SegGroup* g = VG_(malloc)("pc.h_main.nTG.1", sizeof(SegGroup));
+   VG_(memset)(g, 0, sizeof(*g));
+   return g;
+}
+
+/* Get a completely new Seg */
+static Seg* new_Seg ( void )
+{
+   Seg*      teg;
+   SegGroup* g;
+   if (group_list == NULL) {
+      g = new_SegGroup();
+      g->admin = NULL;
+      group_list = g;
+   }
+   tl_assert(group_list->nextfree <= N_SEGS_PER_GROUP);
+   if (group_list->nextfree == N_SEGS_PER_GROUP) {
+      g = new_SegGroup();
+      g->admin = group_list;
+      group_list = g;
+   }
+   tl_assert(group_list->nextfree < N_SEGS_PER_GROUP);
+   teg = &group_list->segs[ group_list->nextfree ];
+   group_list->nextfree++;
+   stats__segs_allocd++;
+   return teg;
+}
+
+static Seg* get_Seg_for_malloc ( void )
+{
+   Seg* seg;
+   if (nFreeSegs < N_FREED_SEGS) {
+      seg = new_Seg();
+      seg->nextfree = (Seg*)1;
+      return seg;
+   }
+   /* else recycle the oldest Seg in the free list */
+   tl_assert(freesegs_youngest);
+   tl_assert(freesegs_oldest);
+   tl_assert(freesegs_youngest != freesegs_oldest);
+   seg = freesegs_oldest;
+   freesegs_oldest = seg->nextfree;
+   nFreeSegs--;
+   seg->nextfree = (Seg*)1;
+   stats__segs_recycled++;
+   return seg;
+}
+
+static void set_Seg_freed ( Seg* seg )
+{
+   tl_assert(seg);
+   tl_assert(!Seg__is_freed(seg));
+   if (nFreeSegs == 0) {
+      tl_assert(freesegs_oldest == NULL);
+      tl_assert(freesegs_youngest == NULL);
+      seg->nextfree = NULL;
+      freesegs_youngest = seg;
+      freesegs_oldest = seg;
+      nFreeSegs++;
+   } else {
+      tl_assert(freesegs_youngest);
+      tl_assert(freesegs_oldest);
+      if (nFreeSegs == 1) {
+         tl_assert(freesegs_youngest == freesegs_oldest);
+      } else {
+         tl_assert(freesegs_youngest != freesegs_oldest);
+      }
+      tl_assert(freesegs_youngest->nextfree == NULL);
+      tl_assert(seg != freesegs_youngest && seg != freesegs_oldest);
+      seg->nextfree = NULL;
+      freesegs_youngest->nextfree = seg;
+      freesegs_youngest = seg;
+      nFreeSegs++;
+   }
+}
+
+static WordFM* addr_to_seg_map = NULL;
+
+static void addr_to_seg_map_ENSURE_INIT ( void )
+{
+   if (UNLIKELY(addr_to_seg_map == NULL)) {
+      addr_to_seg_map = VG_(newFM)( VG_(malloc), "pc.h_main.attmEI.1",
+                                    VG_(free), NULL );
+   }
+}
+
+static Seg* find_Seg_by_addr ( Addr ga )
+{
+   UWord keyW, valW;
+   addr_to_seg_map_ENSURE_INIT();
+   if (VG_(lookupFM)( addr_to_seg_map, &keyW, &valW, (UWord)ga )) {
+      tl_assert(keyW == ga);
+      return (Seg*)valW;
+   } else {
+      return NULL;
+   }
+}
+
+static void bind_addr_to_Seg ( Addr ga, Seg* seg )
+{
+   Bool b;
+   addr_to_seg_map_ENSURE_INIT();
+   b = VG_(addToFM)( addr_to_seg_map, (UWord)ga, (UWord)seg );
+   tl_assert(!b); /* else ga is already bound */
+}
+
+static void unbind_addr_from_Seg ( Addr ga )
+{
+   Bool b;
+   UWord keyW, valW;
+   addr_to_seg_map_ENSURE_INIT();
+   b = VG_(delFromFM)( addr_to_seg_map, &keyW, &valW, (UWord)ga );
+   tl_assert(b); /* else ga was not already bound */
+   tl_assert(keyW == ga);
+   tl_assert(valW != 0);
+}
+
+
+//////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////
+
+// So that post_reg_write_clientcall knows the segment just allocated.
+static Seg* last_seg_added = NULL;
+
+// Returns the added heap segment
+static Seg* add_new_segment ( ThreadId tid, Addr p, SizeT size )
+{
+   Seg* seg = get_Seg_for_malloc();
+   tl_assert(seg != (Seg*)1); /* since we're using 1 as a special value */
+   seg->addr = p;
+   seg->szB  = size;
+   seg->ec   = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
+   tl_assert(!Seg__is_freed(seg));
+
+   bind_addr_to_Seg(p, seg);
+
+   last_seg_added = seg;
+
+   return seg;
+}
+
+// Forward declarations
+static void copy_mem( Addr from, Addr to, SizeT len );
+static void set_mem_unknown ( Addr a, SizeT len );
+
+static inline VG_REGPARM(1) Seg* nonptr_or_unknown(UWord x); /*fwds*/
+
+static
+void* alloc_and_new_mem_heap ( ThreadId tid,
+                               SizeT size, SizeT alignment, Bool is_zeroed )
+{
+   Addr p;
+
+   if ( ((SSizeT)size) < 0) return NULL;
+
+   p = (Addr)VG_(cli_malloc)(alignment, size);
+   if (is_zeroed) VG_(memset)((void*)p, 0, size);
+
+   set_mem_unknown( p, size );
+   add_new_segment( tid, p, size );
+
+   stats__client_mallocs++;
+   return (void*)p;
+}
+
+static void die_and_free_mem_heap ( ThreadId tid, Seg* seg )
+{
+   // Empty and free the actual block
+   tl_assert(!Seg__is_freed(seg));
+   set_mem_unknown( seg->addr, seg->szB );
+
+   VG_(cli_free)( (void*)seg->addr );
+
+   // Remember where freed
+   seg->ec = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
+
+   set_Seg_freed(seg);
+   unbind_addr_from_Seg( seg->addr );
+
+   stats__client_frees++;
+}
+
+static void handle_free_heap( ThreadId tid, void* p )
+{
+   Seg* seg = find_Seg_by_addr( (Addr)p );
+   if (!seg) {
+      /* freeing a block that wasn't malloc'd.  Ignore. */
+      return;
+   }
+   die_and_free_mem_heap( tid, seg );
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Shadow memory                                        ---*/
+/*------------------------------------------------------------*/
+
+/* Shadow memory holds one Seg for each naturally aligned (guest)
+   word.  For a 32 bit target (assuming host word size == guest word
+   size) that means one Seg per 4 bytes, and each Seg occupies 4
+   bytes.  For a 64 bit target that means one Seg per 8 bytes, and
+   each Seg occupies 8 bytes.  Hence in each case the overall space
+   overhead for shadow memory is 1:1.
+
+   This does however make it a bit tricky to size SecMap.vseg[], simce
+   it needs to hold 16384 entries for 32 bit targets but only 8192
+   entries for 64 bit targets. */
+
+#if 0
+__attribute__((unused))
+static void pp_curr_ExeContext(void)
+{
+   VG_(pp_ExeContext)(
+      VG_(get_ExeContext)(
+         VG_(get_current_or_recent_tid)() ) );
+   VG_(message)(Vg_UserMsg, "");
+}
+#endif
+
+#if defined(VGA_x86) || defined(VGA_ppc32)
+#  define SHMEM_SECMAP_MASK         0xFFFC
+#  define SHMEM_SECMAP_SHIFT        2
+#  define SHMEM_IS_WORD_ALIGNED(_a) VG_IS_4_ALIGNED(_a)
+#  define SEC_MAP_WORDS             (0x10000UL / 4UL) /* 16k */
+#elif defined(VGA_amd64) || defined(VGA_ppc64)
+#  define SHMEM_SECMAP_MASK         0xFFF8
+#  define SHMEM_SECMAP_SHIFT        3
+#  define SHMEM_IS_WORD_ALIGNED(_a) VG_IS_8_ALIGNED(_a)
+#  define SEC_MAP_WORDS             (0x10000UL / 8UL) /* 8k */
+#else
+#  error "Unknown arch"
+#endif
+
+typedef
+   struct {
+      Seg* vseg[SEC_MAP_WORDS];
+   }
+   SecMap;
+
+static SecMap  distinguished_secondary_map;
+
+/* An entry in the primary map.  base must be a 64k-aligned value, and
+   sm points at the relevant secondary map.  The secondary may be
+   either a real secondary, or the distinguished secondary.  DO NOT
+   CHANGE THIS LAYOUT: the first word has to be the key for OSet fast
+   lookups.
+*/
+typedef
+   struct {
+      Addr    base;
+      SecMap* sm;
+   }
+   PriMapEnt;
+
+/* Primary map is an OSet of PriMapEnt (primap_L2), "fronted" by a
+   cache (primap_L1). */
+
+/* Tunable parameter: How big is the L1 queue? */
+#define N_PRIMAP_L1 24
+
+/* Tunable parameter: How far along the L1 queue to insert
+   entries resulting from L2 lookups? */
+#define PRIMAP_L1_INSERT_IX 12
+
+static struct {
+          Addr       base; // must be 64k aligned
+          PriMapEnt* ent; // pointer to the matching primap_L2 node
+       }
+       primap_L1[N_PRIMAP_L1];
+
+static OSet* primap_L2 = NULL;
+
+
+/* # searches initiated in auxmap_L1, and # base cmps required */
+static ULong n_primap_L1_searches  = 0;
+static ULong n_primap_L1_cmps      = 0;
+/* # of searches that missed in auxmap_L1 and therefore had to
+   be handed to auxmap_L2. And the number of nodes inserted. */
+static ULong n_primap_L2_searches  = 0;
+static ULong n_primap_L2_nodes     = 0;
+
+
+static void init_shadow_memory ( void )
+{
+   Int i;
+
+   for (i = 0; i < SEC_MAP_WORDS; i++)
+      distinguished_secondary_map.vseg[i] = NONPTR;
+
+   for (i = 0; i < N_PRIMAP_L1; i++) {
+      primap_L1[i].base = 1; /* not 64k aligned, so doesn't match any
+                                request ==> slot is empty */
+      primap_L1[i].ent  = NULL;
+   }
+
+   tl_assert(0 == offsetof(PriMapEnt,base));
+   tl_assert(sizeof(Addr) == sizeof(void*));
+   primap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(PriMapEnt,base),
+                                    /*fastCmp*/ NULL,
+                                    VG_(malloc), "pc.h_main.ism.1",
+                                    VG_(free) );
+   tl_assert(primap_L2);
+}
+
+static void insert_into_primap_L1_at ( Word rank, PriMapEnt* ent )
+{
+   Word i;
+   tl_assert(ent);
+   tl_assert(rank >= 0 && rank < N_PRIMAP_L1);
+   for (i = N_PRIMAP_L1-1; i > rank; i--)
+      primap_L1[i] = primap_L1[i-1];
+   primap_L1[rank].base = ent->base;
+   primap_L1[rank].ent  = ent;
+}
+
+static inline PriMapEnt* maybe_find_in_primap ( Addr a )
+{
+   PriMapEnt  key;
+   PriMapEnt* res;
+   Word       i;
+
+   a &= ~(Addr)0xFFFF;
+
+   /* First search the front-cache, which is a self-organising
+      list containing the most popular entries. */
+
+   if (LIKELY(primap_L1[0].base == a))
+      return primap_L1[0].ent;
+   if (LIKELY(primap_L1[1].base == a)) {
+      Addr       t_base = primap_L1[0].base;
+      PriMapEnt* t_ent  = primap_L1[0].ent;
+      primap_L1[0].base = primap_L1[1].base;
+      primap_L1[0].ent  = primap_L1[1].ent;
+      primap_L1[1].base = t_base;
+      primap_L1[1].ent  = t_ent;
+      return primap_L1[0].ent;
+   }
+
+   n_primap_L1_searches++;
+
+   for (i = 0; i < N_PRIMAP_L1; i++) {
+      if (primap_L1[i].base == a) {
+         break;
+      }
+   }
+   tl_assert(i >= 0 && i <= N_PRIMAP_L1);
+
+   n_primap_L1_cmps += (ULong)(i+1);
+
+   if (i < N_PRIMAP_L1) {
+      if (i > 0) {
+         Addr       t_base = primap_L1[i-1].base;
+         PriMapEnt* t_ent  = primap_L1[i-1].ent;
+         primap_L1[i-1].base = primap_L1[i-0].base;
+         primap_L1[i-1].ent  = primap_L1[i-0].ent;
+         primap_L1[i-0].base = t_base;
+         primap_L1[i-0].ent  = t_ent;
+         i--;
+      }
+      return primap_L1[i].ent;
+   }
+
+   n_primap_L2_searches++;
+
+   /* First see if we already have it. */
+   key.base = a;
+   key.sm   = 0;
+
+   res = VG_(OSetGen_Lookup)(primap_L2, &key);
+   if (res)
+      insert_into_primap_L1_at( PRIMAP_L1_INSERT_IX, res );
+   return res;
+}
+
+static SecMap* alloc_secondary_map ( void )
+{
+   SecMap* map;
+   UInt  i;
+
+   // JRS 2008-June-25: what's the following assertion for?
+   tl_assert(0 == (sizeof(SecMap) % VKI_MAX_PAGE_SIZE));
+
+   map = VG_(am_shadow_alloc)( sizeof(SecMap) );
+   if (map == NULL)
+      VG_(out_of_memory_NORETURN)( "annelid:allocate new SecMap",
+                                   sizeof(SecMap) );
+
+   for (i = 0; i < SEC_MAP_WORDS; i++)
+      map->vseg[i] = NONPTR;
+   if (0) VG_(printf)("XXX new secmap %p\n", map);
+   return map;
+}
+
+static PriMapEnt* find_or_alloc_in_primap ( Addr a )
+{
+   PriMapEnt *nyu, *res;
+
+   /* First see if we already have it. */
+   res = maybe_find_in_primap( a );
+   if (LIKELY(res))
+      return res;
+
+   /* Ok, there's no entry in the secondary map, so we'll have
+      to allocate one. */
+   a &= ~(Addr)0xFFFF;
+
+   nyu = (PriMapEnt*) VG_(OSetGen_AllocNode)( 
+                         primap_L2, sizeof(PriMapEnt) );
+   tl_assert(nyu);
+   nyu->base = a;
+   nyu->sm   = alloc_secondary_map();
+   tl_assert(nyu->sm);
+   VG_(OSetGen_Insert)( primap_L2, nyu );
+   insert_into_primap_L1_at( PRIMAP_L1_INSERT_IX, nyu );
+   n_primap_L2_nodes++;
+   return nyu;
+}
+
+/////////////////////////////////////////////////
+
+// Nb: 'a' must be naturally word aligned for the host.
+static inline Seg* get_mem_vseg ( Addr a )
+{
+   SecMap* sm     = find_or_alloc_in_primap(a)->sm;
+   UWord   sm_off = (a & SHMEM_SECMAP_MASK) >> SHMEM_SECMAP_SHIFT;
+   tl_assert(SHMEM_IS_WORD_ALIGNED(a));
+   return sm->vseg[sm_off];
+}
+
+// Nb: 'a' must be naturally word aligned for the host.
+static inline void set_mem_vseg ( Addr a, Seg* vseg )
+{
+   SecMap* sm     = find_or_alloc_in_primap(a)->sm;
+   UWord   sm_off = (a & SHMEM_SECMAP_MASK) >> SHMEM_SECMAP_SHIFT;
+   tl_assert(SHMEM_IS_WORD_ALIGNED(a));
+   sm->vseg[sm_off] = vseg;
+}
+
+// Returns UNKNOWN if no matches.  Never returns BOTTOM or NONPTR.
+// Also, only returns in-use segments, not freed ones.
+static Seg* get_Seg_containing_addr_SLOW( Addr a )
+{
+   SegGroup* group;
+   UWord i;
+   stats__slow_searches++;
+   for (group = group_list; group; group = group->admin) {
+      for (i = 0; i < group->nextfree; i++) {
+         stats__slow_totcmps++;
+         if (Seg__is_freed(&group->segs[i]))
+            continue;
+         if (group->segs[i].addr <= a
+             && a < group->segs[i].addr + group->segs[i].szB)
+            return &group->segs[i];
+      }
+   }
+   return UNKNOWN;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- malloc() et al replacements                          ---*/
+/*------------------------------------------------------------*/
+
+void* h_replace_malloc ( ThreadId tid, SizeT n )
+{
+   return alloc_and_new_mem_heap ( tid, n, VG_(clo_alignment),
+                                        /*is_zeroed*/False );
+}
+
+void* h_replace___builtin_new ( ThreadId tid, SizeT n )
+{
+   return alloc_and_new_mem_heap ( tid, n, VG_(clo_alignment),
+                                           /*is_zeroed*/False );
+}
+
+void* h_replace___builtin_vec_new ( ThreadId tid, SizeT n )
+{
+   return alloc_and_new_mem_heap ( tid, n, VG_(clo_alignment),
+                                           /*is_zeroed*/False );
+}
+
+void* h_replace_memalign ( ThreadId tid, SizeT align, SizeT n )
+{
+   return alloc_and_new_mem_heap ( tid, n, align,
+                                        /*is_zeroed*/False );
+}
+
+void* h_replace_calloc ( ThreadId tid, SizeT nmemb, SizeT size1 )
+{
+   return alloc_and_new_mem_heap ( tid, nmemb*size1, VG_(clo_alignment),
+                                        /*is_zeroed*/True );
+}
+
+void h_replace_free ( ThreadId tid, void* p )
+{
+   // Should arguably check here if p.vseg matches the segID of the
+   // pointed-to block... unfortunately, by this stage, we don't know what
+   // p.vseg is, because we don't know the address of p (the p here is a
+   // copy, and we've lost the address of its source).  To do so would
+   // require passing &p in, which would require rewriting part of
+   // vg_replace_malloc.c... argh.
+   //
+   // However, Memcheck does free checking, and will catch almost all
+   // violations this checking would have caught.  (Would only miss if we
+   // unluckily passed an unrelated pointer to the very start of a heap
+   // block that was unrelated to that block.  This is very unlikely!)    So
+   // we haven't lost much.
+
+   handle_free_heap(tid, p);
+}
+
+void h_replace___builtin_delete ( ThreadId tid, void* p )
+{
+   handle_free_heap(tid, p);
+}
+
+void h_replace___builtin_vec_delete ( ThreadId tid, void* p )
+{
+   handle_free_heap(tid, p);
+}
+
+void* h_replace_realloc ( ThreadId tid, void* p_old, SizeT new_size )
+{
+   Seg* seg;
+
+   /* First try and find the block. */
+   seg = find_Seg_by_addr( (Addr)p_old );
+   if (!seg)
+      return NULL;
+
+   tl_assert(seg->addr == (Addr)p_old);
+
+   if (new_size <= seg->szB) {
+      /* new size is smaller: allocate, copy from old to new */
+      Addr p_new = (Addr)VG_(cli_malloc)(VG_(clo_alignment), new_size);
+      VG_(memcpy)((void*)p_new, p_old, new_size);
+
+      /* Notification: copy retained part */
+      copy_mem       ( (Addr)p_old, p_new, new_size );
+
+      /* Free old memory */
+      die_and_free_mem_heap( tid, seg );
+
+      /* This has to be after die_and_free_mem_heap, otherwise the
+         former succeeds in shorting out the new block, not the
+         old, in the case when both are on the same list.  */
+      add_new_segment ( tid, p_new, new_size );
+
+      return (void*)p_new;
+   } else {
+      /* new size is bigger: allocate, copy from old to new */
+      Addr p_new = (Addr)VG_(cli_malloc)(VG_(clo_alignment), new_size);
+      VG_(memcpy)((void*)p_new, p_old, seg->szB);
+
+      /* Notification: first half kept and copied, second half new */
+      copy_mem       ( (Addr)p_old, p_new, seg->szB );
+      set_mem_unknown( p_new + seg->szB, new_size - seg->szB );
+
+      /* Free old memory */
+      die_and_free_mem_heap( tid, seg );
+
+      /* This has to be after die_and_free_mem_heap, otherwise the
+         former succeeds in shorting out the new block, not the old,
+         in the case when both are on the same list.  NB jrs
+         2008-Sept-11: not sure if this comment is valid/correct any
+         more -- I suspect not. */
+      add_new_segment ( tid, p_new, new_size );
+
+      return (void*)p_new;
+   }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Memory events                                        ---*/
+/*------------------------------------------------------------*/
+
+static inline
+void set_mem ( Addr a, SizeT len, Seg* seg )
+{
+   Addr end;
+
+   if (0 == len)
+      return;
+
+   if (len > 100 * 1000 * 1000)
+      VG_(message)(Vg_UserMsg,
+                   "Warning: set address range state: large range %lu", len);
+
+   a   = VG_ROUNDDN(a,       sizeof(UWord));
+   end = VG_ROUNDUP(a + len, sizeof(UWord));
+   for ( ; a < end; a += sizeof(UWord))
+      set_mem_vseg(a, seg);
+}
+
+static void set_mem_unknown( Addr a, SizeT len )
+{
+   set_mem( a, len, UNKNOWN );
+}
+
+//zz static void set_mem_nonptr( Addr a, UInt len )
+//zz {
+//zz    set_mem( a, len, NONPTR );
+//zz }
+
+void h_new_mem_startup( Addr a, SizeT len,
+                        Bool rr, Bool ww, Bool xx, ULong di_handle )
+{
+   if (0) VG_(printf)("new_mem_startup(%#lx,%lu)\n", a, len);
+   set_mem_unknown( a, len );
+   //add_new_segment( VG_(get_running_tid)(), a, len, SegMmap );
+}
+
+//zz // XXX: Currently not doing anything with brk() -- new segments, or not?
+//zz // Proper way to do it would be to grow/shrink a single, special brk segment.
+//zz //
+//zz // brk is difficult: it defines a single segment, of changeable size.
+//zz // It starts off with size zero, at the address given by brk(0).  There are
+//zz // no pointers within the program to it.  Any subsequent calls by the
+//zz // program to brk() (possibly growing or shrinking it) return pointers to
+//zz // the *end* of the segment (nb: this is the kernel brk(), which is
+//zz // different to the libc brk()).
+//zz //
+//zz // If fixing this, don't forget to update the brk case in SK_(post_syscall).
+//zz //
+//zz // Nb: not sure if the return value is the last byte addressible, or one
+//zz // past the end of the segment.
+//zz //
+//zz static void new_mem_brk( Addr a, UInt len )
+//zz {
+//zz    set_mem_unknown(a, len);
+//zz    //VG_(skin_panic)("can't handle new_mem_brk");
+//zz }
+
+// Not quite right:  if you mmap a segment into a specified place, it could
+// be legitimate to do certain arithmetic with the pointer that it wouldn't
+// otherwise.  Hopefully this is rare, though.
+void h_new_mem_mmap( Addr a, SizeT len,
+                     Bool rr, Bool ww, Bool xx, ULong di_handle )
+{
+   if (0) VG_(printf)("new_mem_mmap(%#lx,%lu)\n", a, len);
+//zz #if 0
+//zz    Seg seg = NULL;
+//zz 
+//zz    // Check for overlapping segments
+//zz #if 0
+//zz    is_overlapping_seg___a   = a;    // 'free' variable
+//zz    is_overlapping_seg___len = len;  // 'free' variable
+//zz    seg = (Seg)VG_(HT_first_match) ( mlist, is_overlapping_seg );
+//zz    is_overlapping_seg___a   = 0;    // paranoia, reset
+//zz    is_overlapping_seg___len = 0;    // paranoia, reset
+//zz #endif
+//zz 
+//zz    // XXX: do this check properly with ISLists
+//zz 
+//zz    if ( ISList__findI( seglist, a, &seg )) {
+//zz       sk_assert(SegMmap == seg->status || SegMmapFree == seg->status);
+//zz       if (SegMmap == seg->status)
+//zz    
+//zz    }
+//zz 
+//zz    if (NULL != seg) {
+//zz       // Right, we found an overlap
+//zz       if (VG_(clo_verbosity) > 1)
+//zz          VG_(message)(Vg_UserMsg, "mmap overlap:  old: %#lx, %d;  new: %#lx, %d",
+//zz                                   seg->left, Seg__size(seg), a, len);
+//zz       if (seg->left <= a && a <= seg->right) {
+//zz          // New one truncates end of the old one.  Nb: we don't adjust its
+//zz          // size, because the first segment's pointer can be (and for
+//zz          // Konqueror, is) legitimately used to access parts of the second
+//zz          // segment.  At least, I assume Konqueror is doing something legal.
+//zz          // so that a size mismatch upon munmap isn't a problem.
+//zz //         seg->size = a - seg->data;
+//zz //         seg->is_truncated_map = True;
+//zz //         if (VG_(clo_verbosity) > 1)
+//zz //            VG_(message)(Vg_UserMsg, "old seg truncated to length %d",
+//zz //                                     seg->size);
+//zz       } else {
+//zz          VG_(skin_panic)("Can't handle this mmap() overlap case");
+//zz       }
+//zz    }
+   set_mem_unknown( a, len );
+   //add_new_segment( VG_(get_running_tid)(), a, len, SegMmap );
+//zz #endif
+}
+
+static void copy_mem( Addr from, Addr to, SizeT len )
+{
+   Addr fromend = from + len;
+
+   // Must be aligned due to malloc always returning aligned objects.
+   tl_assert(VG_IS_8_ALIGNED(from) && VG_IS_8_ALIGNED(to));
+
+   // Must only be called with positive len.
+   if (0 == len)
+      return;
+
+   for ( ; from < fromend; from += sizeof(UWord), to += sizeof(UWord))
+      set_mem_vseg( to, get_mem_vseg(from) );
+}
+
+//zz // Similar to SK_(realloc)()
+//zz static void copy_mem_remap( Addr from, Addr to, UInt len )
+//zz {
+//zz    VG_(skin_panic)("argh: copy_mem_remap");
+//zz }
+//zz 
+//zz static void die_mem_brk( Addr a, UInt len )
+//zz {
+//zz    set_mem_unknown(a, len);
+//zz //   VG_(skin_panic)("can't handle die_mem_brk()");
+//zz }
+
+void h_die_mem_munmap( Addr a, SizeT len )
+{
+//   handle_free_munmap( (void*)a, len );
+}
+
+// Don't need to check all addresses within the block; in the absence of
+// discontiguous segments, the segments for the first and last bytes should
+// be the same.  Can't easily check the pointer segment matches the block
+// segment, unfortunately, but the first/last check should catch most
+// errors.
+static void pre_mem_access2 ( CorePart part, ThreadId tid, Char* str,
+                              Addr s/*tart*/, Addr e/*nd*/ )
+{
+   Seg  *seglo, *seghi;
+   Bool s_in_seglo, s_in_seghi, e_in_seglo, e_in_seghi;
+
+   // Don't check code being translated -- very slow, and not much point
+   if (Vg_CoreTranslate == part) return;
+
+   // Don't check the signal case -- only happens in core, no need to check
+   if (Vg_CoreSignal == part) return;
+
+   // Only expect syscalls after this point
+   if (part != Vg_CoreSysCall) {
+      VG_(printf)("part = %d\n", part);
+      VG_(tool_panic)("unknown corepart in pre_mem_access2");
+   }
+
+   // Check first and last bytes match
+   seglo = get_Seg_containing_addr_SLOW( s );
+   seghi = get_Seg_containing_addr_SLOW( e );
+   tl_assert( BOTTOM != seglo && NONPTR != seglo );
+   tl_assert( BOTTOM != seghi && NONPTR != seghi );
+
+   /* so seglo and seghi are either UNKNOWN or P(..) */
+   s_in_seglo
+      = is_known_segment(seglo)
+        && seglo->addr <= s && s < seglo->addr + seglo->szB;
+   s_in_seghi
+      = is_known_segment(seghi)
+        && seghi->addr <= s && s < seghi->addr + seghi->szB;
+   e_in_seglo
+      = is_known_segment(seglo)
+        && seglo->addr <= e && e < seglo->addr + seglo->szB;
+   e_in_seghi
+      = is_known_segment(seghi)
+        && seghi->addr <= e && e < seghi->addr + seghi->szB;
+
+   /* record an error if start and end are in different, but known
+      segments */
+   if (is_known_segment(seglo) && is_known_segment(seghi)
+       && seglo != seghi) {
+      h_record_sysparam_error(tid, part, str, s, e, seglo, seghi);
+   }
+   else
+   /* record an error if start is in a known segment but end isn't */
+   if (is_known_segment(seglo) && !is_known_segment(seghi)) {
+      h_record_sysparam_error(tid, part, str, s, e, seglo, UNKNOWN);
+   }
+   else
+   /* record an error if end is in a known segment but start isn't */
+   if (!is_known_segment(seglo) && is_known_segment(seghi)) {
+      h_record_sysparam_error(tid, part, str, s, e, UNKNOWN, seghi);
+   }
+}
+
+void h_pre_mem_access ( CorePart part, ThreadId tid, Char* s,
+                        Addr base, SizeT size )
+{
+   pre_mem_access2( part, tid, s, base, base + size - 1 );
+}
+
+void h_pre_mem_read_asciiz ( CorePart part, ThreadId tid, 
+                             Char* s, Addr lo )
+{
+   Addr hi = lo;
+
+   // Nb: the '\0' must be included in the lo...hi range
+   while ('\0' != *(Char*)hi) hi++;
+   pre_mem_access2( part, tid, s, lo, hi );
+}
+
+//zz static void post_mem_write(Addr a, UInt len)
+//zz {
+//zz    set_mem_unknown(a, len);
+//zz }
+
+
+/*------------------------------------------------------------*/
+/*--- Register event handlers                              ---*/
+/*------------------------------------------------------------*/
+
+//zz static void post_regs_write_init ( void )
+//zz {
+//zz    UInt i;
+//zz    for (i = R_EAX; i <= R_EDI; i++)
+//zz       VG_(set_shadow_archreg)( i, (UInt)UNKNOWN );
+//zz 
+//zz    // Don't bother about eflags
+//zz }
+
+// BEGIN move this uglyness to pc_machine.c
+
+static inline Bool host_is_big_endian ( void ) {
+   UInt x = 0x11223344;
+   return 0x1122 == *(UShort*)(&x);
+}
+static inline Bool host_is_little_endian ( void ) {
+   UInt x = 0x11223344;
+   return 0x3344 == *(UShort*)(&x);
+}
+
+#define N_INTREGINFO_OFFSETS 4
+
+/* Holds the result of a query to 'get_IntRegInfo'.  Valid values for
+   n_offsets are:
+
+   -1: means the queried guest state slice exactly matches
+       one integer register
+
+   0: means the queried guest state slice does not overlap any
+      integer registers
+
+   1 .. N_INTREGINFO_OFFSETS: means the queried guest state offset
+      overlaps n_offsets different integer registers, and their base
+      offsets are placed in the offsets array.
+*/
+typedef
+   struct {
+      Int offsets[N_INTREGINFO_OFFSETS];
+      Int n_offsets;
+   }
+   IntRegInfo;
+
+
+#if defined(VGA_x86)
+# include "libvex_guest_x86.h"
+# define MC_SIZEOF_GUEST_STATE sizeof(VexGuestX86State)
+#endif
+
+#if defined(VGA_amd64)
+# include "libvex_guest_amd64.h"
+# define MC_SIZEOF_GUEST_STATE sizeof(VexGuestAMD64State)
+# define PC_OFF_FS_ZERO offsetof(VexGuestAMD64State,guest_FS_ZERO)
+# define PC_SZB_FS_ZERO sizeof( ((VexGuestAMD64State*)0)->guest_FS_ZERO)
+#endif
+
+#if defined(VGA_ppc32)
+# include "libvex_guest_ppc32.h"
+# define MC_SIZEOF_GUEST_STATE sizeof(VexGuestPPC32State)
+#endif
+
+#if defined(VGA_ppc64)
+# include "libvex_guest_ppc64.h"
+# define MC_SIZEOF_GUEST_STATE sizeof(VexGuestPPC64State)
+#endif
+
+
+/* See description on definition of type IntRegInfo. */
+static void get_IntRegInfo ( /*OUT*/IntRegInfo* iii, Int offset, Int szB )
+{
+   /* --------------------- x86 --------------------- */
+
+#  if defined(VGA_x86)
+
+#  define GOF(_fieldname) \
+      (offsetof(VexGuestX86State,guest_##_fieldname))
+
+   Int  o    = offset;
+   Int  sz   = szB;
+   Bool is4  = sz == 4;
+   Bool is21 = sz == 2 || sz == 1;
+
+   tl_assert(sz > 0);
+   tl_assert(host_is_little_endian());
+
+   /* Set default state to "does not intersect any int register". */
+   VG_(memset)( iii, 0, sizeof(*iii) );
+
+   /* Exact accesses to integer registers */
+   if (o == GOF(EAX)     && is4) goto exactly1;
+   if (o == GOF(ECX)     && is4) goto exactly1;
+   if (o == GOF(EDX)     && is4) goto exactly1;
+   if (o == GOF(EBX)     && is4) goto exactly1;
+   if (o == GOF(ESP)     && is4) goto exactly1;
+   if (o == GOF(EBP)     && is4) goto exactly1;
+   if (o == GOF(ESI)     && is4) goto exactly1;
+   if (o == GOF(EDI)     && is4) goto exactly1;
+   if (o == GOF(EIP)     && is4) goto none;
+   if (o == GOF(CC_OP)   && is4) goto none;
+   if (o == GOF(CC_DEP1) && is4) goto none;
+   if (o == GOF(CC_DEP2) && is4) goto none;
+   if (o == GOF(CC_NDEP) && is4) goto none;
+   if (o == GOF(DFLAG)   && is4) goto none;
+   if (o == GOF(IDFLAG)  && is4) goto none;
+   if (o == GOF(ACFLAG)  && is4) goto none;
+
+   /* Partial accesses to integer registers */
+   if (o == GOF(EAX)     && is21) {         o -= 0; goto contains_o; }
+   if (o == GOF(EAX)+1   && is21) { o -= 1; o -= 0; goto contains_o; }
+   if (o == GOF(ECX)     && is21) {         o -= 0; goto contains_o; }
+   if (o == GOF(ECX)+1   && is21) { o -= 1; o -= 0; goto contains_o; }
+   if (o == GOF(EBX)     && is21) {         o -= 0; goto contains_o; }
+   // bl case
+   if (o == GOF(EDX)     && is21) {         o -= 0; goto contains_o; }
+   if (o == GOF(EDX)+1   && is21) { o -= 1; o -= 0; goto contains_o; }
+   if (o == GOF(ESI)     && is21) {         o -= 0; goto contains_o; }
+   if (o == GOF(EDI)     && is21) {         o -= 0; goto contains_o; }
+
+   /* Segment related guff */
+   if (o == GOF(GS)  && sz == 2) goto none;
+   if (o == GOF(LDT) && is4) goto none;
+   if (o == GOF(GDT) && is4) goto none;
+
+   /* FP admin related */
+   if (o == GOF(SSEROUND) && is4) goto none;
+   if (o == GOF(FPROUND)  && is4) goto none;
+   if (o == GOF(EMWARN)   && is4) goto none;
+   if (o == GOF(FTOP)     && is4) goto none;
+   if (o == GOF(FPTAG)    && sz == 8) goto none;
+   if (o == GOF(FC3210)   && is4) goto none;
+
+   /* xmm registers, including arbitrary sub-parts */
+   if (o >= GOF(XMM0) && o+sz <= GOF(XMM0)+16) goto none;
+   if (o >= GOF(XMM1) && o+sz <= GOF(XMM1)+16) goto none;
+   if (o >= GOF(XMM2) && o+sz <= GOF(XMM2)+16) goto none;
+   if (o >= GOF(XMM3) && o+sz <= GOF(XMM3)+16) goto none;
+   if (o >= GOF(XMM4) && o+sz <= GOF(XMM4)+16) goto none;
+   if (o >= GOF(XMM5) && o+sz <= GOF(XMM5)+16) goto none;
+   if (o >= GOF(XMM6) && o+sz <= GOF(XMM6)+16) goto none;
+   if (o >= GOF(XMM7) && o+sz <= GOF(XMM7)+16) goto none;
+
+   /* mmx/x87 registers (a bit of a kludge, since 'o' is not checked
+      to be exactly equal to one of FPREG[0] .. FPREG[7]) */
+   if (o >= GOF(FPREG[0]) && o < GOF(FPREG[7])+8 && sz == 8) goto none;
+
+   /* the entire mmx/x87 register bank in one big piece */
+   if (o == GOF(FPREG) && sz == 64) goto none;
+
+   VG_(printf)("get_IntRegInfo(x86):failing on (%d,%d)\n", o, sz);
+   tl_assert(0);
+#  undef GOF
+
+   /* -------------------- amd64 -------------------- */
+
+#  elif defined(VGA_amd64)
+
+#  define GOF(_fieldname) \
+      (offsetof(VexGuestAMD64State,guest_##_fieldname))
+
+   Int  o     = offset;
+   Int  sz    = szB;
+   Bool is421 = sz == 4 || sz == 2 || sz == 1;
+   Bool is8   = sz == 8;
+
+   tl_assert(sz > 0);
+   tl_assert(host_is_little_endian());
+
+   /* Set default state to "does not intersect any int register". */
+   VG_(memset)( iii, 0, sizeof(*iii) );
+
+   /* Exact accesses to integer registers */
+   if (o == GOF(RAX)     && is8) goto exactly1;
+   if (o == GOF(RCX)     && is8) goto exactly1;
+   if (o == GOF(RDX)     && is8) goto exactly1;
+   if (o == GOF(RBX)     && is8) goto exactly1;
+   if (o == GOF(RSP)     && is8) goto exactly1;
+   if (o == GOF(RBP)     && is8) goto exactly1;
+   if (o == GOF(RSI)     && is8) goto exactly1;
+   if (o == GOF(RDI)     && is8) goto exactly1;
+   if (o == GOF(R8)      && is8) goto exactly1;
+   if (o == GOF(R9)      && is8) goto exactly1;
+   if (o == GOF(R10)     && is8) goto exactly1;
+   if (o == GOF(R11)     && is8) goto exactly1;
+   if (o == GOF(R12)     && is8) goto exactly1;
+   if (o == GOF(R13)     && is8) goto exactly1;
+   if (o == GOF(R14)     && is8) goto exactly1;
+   if (o == GOF(R15)     && is8) goto exactly1;
+   if (o == GOF(RIP)     && is8) goto exactly1;
+   if (o == GOF(CC_OP)   && is8) goto none;
+   if (o == GOF(CC_DEP1) && is8) goto none;
+   if (o == GOF(CC_DEP2) && is8) goto none;
+   if (o == GOF(CC_NDEP) && is8) goto none;
+   if (o == GOF(DFLAG)   && is8) goto none;
+   if (o == GOF(IDFLAG)  && is8) goto none;
+
+   /* Partial accesses to integer registers */
+   if (o == GOF(RAX)     && is421) {         o -= 0; goto contains_o; }
+   if (o == GOF(RAX)+1   && is421) { o -= 1; o -= 0; goto contains_o; }
+   if (o == GOF(RCX)     && is421) {         o -= 0; goto contains_o; }
+   if (o == GOF(RCX)+1   && is421) { o -= 1; o -= 0; goto contains_o; }
+   if (o == GOF(RDX)     && is421) {         o -= 0; goto contains_o; }
+   if (o == GOF(RDX)+1   && is421) { o -= 1; o -= 0; goto contains_o; }
+   if (o == GOF(RBX)     && is421) {         o -= 0; goto contains_o; }
+   if (o == GOF(RBX)+1   && is421) { o -= 1; o -= 0; goto contains_o; }
+   if (o == GOF(RBP)     && is421) {         o -= 0; goto contains_o; }
+   if (o == GOF(RSI)     && is421) {         o -= 0; goto contains_o; }
+   if (o == GOF(RDI)     && is421) {         o -= 0; goto contains_o; }
+   if (o == GOF(R8)      && is421) {         o -= 0; goto contains_o; }
+   if (o == GOF(R9)      && is421) {         o -= 0; goto contains_o; }
+   if (o == GOF(R10)     && is421) {         o -= 0; goto contains_o; }
+   if (o == GOF(R11)     && is421) {         o -= 0; goto contains_o; }
+   if (o == GOF(R12)     && is421) {         o -= 0; goto contains_o; }
+   if (o == GOF(R13)     && is421) {         o -= 0; goto contains_o; }
+   if (o == GOF(R14)     && is421) {         o -= 0; goto contains_o; }
+   if (o == GOF(R15)     && is421) {         o -= 0; goto contains_o; }
+
+   /* Segment related guff */
+   if (o == GOF(FS_ZERO) && is8) goto exactly1;
+
+   /* FP admin related */
+   if (o == GOF(SSEROUND) && is8) goto none;
+   if (o == GOF(FPROUND)  && is8) goto none;
+   if (o == GOF(EMWARN)   && sz == 4) goto none;
+   if (o == GOF(FTOP)     && sz == 4) goto none;
+   if (o == GOF(FPTAG)    && is8) goto none;
+   if (o == GOF(FC3210)   && is8) goto none;
+
+   /* xmm registers, including arbitrary sub-parts */
+   if (o >= GOF(XMM0)  && o+sz <= GOF(XMM0)+16)  goto none;
+   if (o >= GOF(XMM1)  && o+sz <= GOF(XMM1)+16)  goto none;
+   if (o >= GOF(XMM2)  && o+sz <= GOF(XMM2)+16)  goto none;
+   if (o >= GOF(XMM3)  && o+sz <= GOF(XMM3)+16)  goto none;
+   if (o >= GOF(XMM4)  && o+sz <= GOF(XMM4)+16)  goto none;
+   if (o >= GOF(XMM5)  && o+sz <= GOF(XMM5)+16)  goto none;
+   if (o >= GOF(XMM6)  && o+sz <= GOF(XMM6)+16)  goto none;
+   if (o >= GOF(XMM7)  && o+sz <= GOF(XMM7)+16)  goto none;
+   if (o >= GOF(XMM8)  && o+sz <= GOF(XMM8)+16)  goto none;
+   if (o >= GOF(XMM9)  && o+sz <= GOF(XMM9)+16)  goto none;
+   if (o >= GOF(XMM10) && o+sz <= GOF(XMM10)+16) goto none;
+   if (o >= GOF(XMM11) && o+sz <= GOF(XMM11)+16) goto none;
+   if (o >= GOF(XMM12) && o+sz <= GOF(XMM12)+16) goto none;
+   if (o >= GOF(XMM13) && o+sz <= GOF(XMM13)+16) goto none;
+   if (o >= GOF(XMM14) && o+sz <= GOF(XMM14)+16) goto none;
+   if (o >= GOF(XMM15) && o+sz <= GOF(XMM15)+16) goto none;
+
+   /* mmx/x87 registers (a bit of a kludge, since 'o' is not checked
+      to be exactly equal to one of FPREG[0] .. FPREG[7]) */
+   if (o >= GOF(FPREG[0]) && o < GOF(FPREG[7])+8 && sz == 8) goto none;
+
+   VG_(printf)("get_IntRegInfo(amd64):failing on (%d,%d)\n", o, sz);
+   tl_assert(0);
+#  undef GOF
+
+   /* -------------------- ppc32 -------------------- */
+
+#  elif defined(VGA_ppc32)
+
+#  define GOF(_fieldname) \
+      (offsetof(VexGuestPPC32State,guest_##_fieldname))
+
+   Int  o    = offset;
+   Int  sz   = szB;
+   Bool is4  = sz == 4;
+   Bool is8  = sz == 8;
+
+   tl_assert(sz > 0);
+   tl_assert(host_is_big_endian());
+
+   /* Set default state to "does not intersect any int register". */
+   VG_(memset)( iii, 0, sizeof(*iii) );
+
+   /* Exact accesses to integer registers */
+   if (o == GOF(GPR0)  && is4) goto exactly1;
+   if (o == GOF(GPR1)  && is4) goto exactly1;
+   if (o == GOF(GPR2)  && is4) goto exactly1;
+   if (o == GOF(GPR3)  && is4) goto exactly1;
+   if (o == GOF(GPR4)  && is4) goto exactly1;
+   if (o == GOF(GPR5)  && is4) goto exactly1;
+   if (o == GOF(GPR6)  && is4) goto exactly1;
+   if (o == GOF(GPR7)  && is4) goto exactly1;
+   if (o == GOF(GPR8)  && is4) goto exactly1;
+   if (o == GOF(GPR9)  && is4) goto exactly1;
+   if (o == GOF(GPR10) && is4) goto exactly1;
+   if (o == GOF(GPR11) && is4) goto exactly1;
+   if (o == GOF(GPR12) && is4) goto exactly1;
+   if (o == GOF(GPR13) && is4) goto exactly1;
+   if (o == GOF(GPR14) && is4) goto exactly1;
+   if (o == GOF(GPR15) && is4) goto exactly1;
+   if (o == GOF(GPR16) && is4) goto exactly1;
+   if (o == GOF(GPR17) && is4) goto exactly1;
+   if (o == GOF(GPR18) && is4) goto exactly1;
+   if (o == GOF(GPR19) && is4) goto exactly1;
+   if (o == GOF(GPR20) && is4) goto exactly1;
+   if (o == GOF(GPR21) && is4) goto exactly1;
+   if (o == GOF(GPR22) && is4) goto exactly1;
+   if (o == GOF(GPR23) && is4) goto exactly1;
+   if (o == GOF(GPR24) && is4) goto exactly1;
+   if (o == GOF(GPR25) && is4) goto exactly1;
+   if (o == GOF(GPR26) && is4) goto exactly1;
+   if (o == GOF(GPR27) && is4) goto exactly1;
+   if (o == GOF(GPR28) && is4) goto exactly1;
+   if (o == GOF(GPR29) && is4) goto exactly1;
+   if (o == GOF(GPR30) && is4) goto exactly1;
+   if (o == GOF(GPR31) && is4) goto exactly1;
+
+   /* Misc integer reg and condition code accesses */
+   if (o == GOF(LR)        && is4) goto exactly1;
+   if (o == GOF(CTR)       && is4) goto exactly1;
+   if (o == GOF(CIA)       && is4) goto none;
+   if (o == GOF(CIA_AT_SC) && is4) goto none;
+   if (o == GOF(RESVN)     && is4) goto none;
+   if (o == GOF(TISTART)   && is4) goto none;
+   if (o == GOF(TILEN)     && is4) goto none;
+   if (o == GOF(REDIR_SP)  && is4) goto none;
+
+   if (sz == 1) {
+      if (o == GOF(XER_SO))  goto none;
+      if (o == GOF(XER_OV))  goto none;
+      if (o == GOF(XER_CA))  goto none;
+      if (o == GOF(XER_BC))  goto none;
+      if (o == GOF(CR0_321)) goto none;
+      if (o == GOF(CR0_0))   goto none;
+      if (o == GOF(CR1_321)) goto none;
+      if (o == GOF(CR1_0))   goto none;
+      if (o == GOF(CR2_321)) goto none;
+      if (o == GOF(CR2_0))   goto none;
+      if (o == GOF(CR3_321)) goto none;
+      if (o == GOF(CR3_0))   goto none;
+      if (o == GOF(CR4_321)) goto none;
+      if (o == GOF(CR4_0))   goto none;
+      if (o == GOF(CR5_321)) goto none;
+      if (o == GOF(CR5_0))   goto none;
+      if (o == GOF(CR6_321)) goto none;
+      if (o == GOF(CR6_0))   goto none;
+      if (o == GOF(CR7_321)) goto none;
+      if (o == GOF(CR7_0))   goto none;
+   }
+
+   /* Exact accesses to FP registers */
+   if (o == GOF(FPR0)  && is8) goto none;
+   if (o == GOF(FPR1)  && is8) goto none;
+   if (o == GOF(FPR2)  && is8) goto none;
+   if (o == GOF(FPR3)  && is8) goto none;
+   if (o == GOF(FPR4)  && is8) goto none;
+   if (o == GOF(FPR5)  && is8) goto none;
+   if (o == GOF(FPR6)  && is8) goto none;
+   if (o == GOF(FPR7)  && is8) goto none;
+   if (o == GOF(FPR8)  && is8) goto none;
+   if (o == GOF(FPR9)  && is8) goto none;
+   if (o == GOF(FPR10) && is8) goto none;
+   if (o == GOF(FPR11) && is8) goto none;
+   if (o == GOF(FPR12) && is8) goto none;
+   if (o == GOF(FPR13) && is8) goto none;
+   if (o == GOF(FPR14) && is8) goto none;
+   if (o == GOF(FPR15) && is8) goto none;
+   if (o == GOF(FPR16) && is8) goto none;
+   if (o == GOF(FPR17) && is8) goto none;
+   if (o == GOF(FPR18) && is8) goto none;
+   if (o == GOF(FPR19) && is8) goto none;
+   if (o == GOF(FPR20) && is8) goto none;
+   if (o == GOF(FPR21) && is8) goto none;
+   if (o == GOF(FPR22) && is8) goto none;
+   if (o == GOF(FPR23) && is8) goto none;
+   if (o == GOF(FPR24) && is8) goto none;
+   if (o == GOF(FPR25) && is8) goto none;
+   if (o == GOF(FPR26) && is8) goto none;
+   if (o == GOF(FPR27) && is8) goto none;
+   if (o == GOF(FPR28) && is8) goto none;
+   if (o == GOF(FPR29) && is8) goto none;
+   if (o == GOF(FPR30) && is8) goto none;
+   if (o == GOF(FPR31) && is8) goto none;
+
+   /* FP admin related */
+   if (o == GOF(FPROUND) && is4) goto none;
+   if (o == GOF(EMWARN)  && is4) goto none;
+
+   /* Altivec registers */
+   if (o == GOF(VR0)  && sz == 16) goto none;
+   if (o == GOF(VR1)  && sz == 16) goto none;
+   if (o == GOF(VR2)  && sz == 16) goto none;
+   if (o == GOF(VR3)  && sz == 16) goto none;
+   if (o == GOF(VR4)  && sz == 16) goto none;
+   if (o == GOF(VR5)  && sz == 16) goto none;
+   if (o == GOF(VR6)  && sz == 16) goto none;
+   if (o == GOF(VR7)  && sz == 16) goto none;
+   if (o == GOF(VR8)  && sz == 16) goto none;
+   if (o == GOF(VR9)  && sz == 16) goto none;
+   if (o == GOF(VR10) && sz == 16) goto none;
+   if (o == GOF(VR11) && sz == 16) goto none;
+   if (o == GOF(VR12) && sz == 16) goto none;
+   if (o == GOF(VR13) && sz == 16) goto none;
+   if (o == GOF(VR14) && sz == 16) goto none;
+   if (o == GOF(VR15) && sz == 16) goto none;
+   if (o == GOF(VR16) && sz == 16) goto none;
+   if (o == GOF(VR17) && sz == 16) goto none;
+   if (o == GOF(VR18) && sz == 16) goto none;
+   if (o == GOF(VR19) && sz == 16) goto none;
+   if (o == GOF(VR20) && sz == 16) goto none;
+   if (o == GOF(VR21) && sz == 16) goto none;
+   if (o == GOF(VR22) && sz == 16) goto none;
+   if (o == GOF(VR23) && sz == 16) goto none;
+   if (o == GOF(VR24) && sz == 16) goto none;
+   if (o == GOF(VR25) && sz == 16) goto none;
+   if (o == GOF(VR26) && sz == 16) goto none;
+   if (o == GOF(VR27) && sz == 16) goto none;
+   if (o == GOF(VR28) && sz == 16) goto none;
+   if (o == GOF(VR29) && sz == 16) goto none;
+   if (o == GOF(VR30) && sz == 16) goto none;
+   if (o == GOF(VR31) && sz == 16) goto none;
+
+   VG_(printf)("get_IntRegInfo(ppc32):failing on (%d,%d)\n", o, sz);
+   tl_assert(0);
+#  undef GOF
+
+   /* -------------------- ppc64 -------------------- */
+
+#  elif defined(VGA_ppc64)
+
+#  define GOF(_fieldname) \
+      (offsetof(VexGuestPPC64State,guest_##_fieldname))
+
+   Int  o    = offset;
+   Int  sz   = szB;
+   Bool is4  = sz == 4;
+   Bool is8  = sz == 8;
+
+   tl_assert(sz > 0);
+   tl_assert(host_is_big_endian());
+
+   /* Set default state to "does not intersect any int register". */
+   VG_(memset)( iii, 0, sizeof(*iii) );
+
+   /* Exact accesses to integer registers */
+   if (o == GOF(GPR0)  && is8) goto exactly1;
+   if (o == GOF(GPR1)  && is8) goto exactly1;
+   if (o == GOF(GPR2)  && is8) goto exactly1;
+   if (o == GOF(GPR3)  && is8) goto exactly1;
+   if (o == GOF(GPR4)  && is8) goto exactly1;
+   if (o == GOF(GPR5)  && is8) goto exactly1;
+   if (o == GOF(GPR6)  && is8) goto exactly1;
+   if (o == GOF(GPR7)  && is8) goto exactly1;
+   if (o == GOF(GPR8)  && is8) goto exactly1;
+   if (o == GOF(GPR9)  && is8) goto exactly1;
+   if (o == GOF(GPR10) && is8) goto exactly1;
+   if (o == GOF(GPR11) && is8) goto exactly1;
+   if (o == GOF(GPR12) && is8) goto exactly1;
+   if (o == GOF(GPR13) && is8) goto exactly1;
+   if (o == GOF(GPR14) && is8) goto exactly1;
+   if (o == GOF(GPR15) && is8) goto exactly1;
+   if (o == GOF(GPR16) && is8) goto exactly1;
+   if (o == GOF(GPR17) && is8) goto exactly1;
+   if (o == GOF(GPR18) && is8) goto exactly1;
+   if (o == GOF(GPR19) && is8) goto exactly1;
+   if (o == GOF(GPR20) && is8) goto exactly1;
+   if (o == GOF(GPR21) && is8) goto exactly1;
+   if (o == GOF(GPR22) && is8) goto exactly1;
+   if (o == GOF(GPR23) && is8) goto exactly1;
+   if (o == GOF(GPR24) && is8) goto exactly1;
+   if (o == GOF(GPR25) && is8) goto exactly1;
+   if (o == GOF(GPR26) && is8) goto exactly1;
+   if (o == GOF(GPR27) && is8) goto exactly1;
+   if (o == GOF(GPR28) && is8) goto exactly1;
+   if (o == GOF(GPR29) && is8) goto exactly1;
+   if (o == GOF(GPR30) && is8) goto exactly1;
+   if (o == GOF(GPR31) && is8) goto exactly1;
+
+   /* Misc integer reg and condition code accesses */
+   if (o == GOF(LR)        && is8) goto exactly1;
+   if (o == GOF(CTR)       && is8) goto exactly1;
+   if (o == GOF(CIA)       && is8) goto none;
+   if (o == GOF(CIA_AT_SC) && is8) goto none;
+   if (o == GOF(RESVN)     && is8) goto none;
+   if (o == GOF(TISTART)   && is8) goto none;
+   if (o == GOF(TILEN)     && is8) goto none;
+   if (o == GOF(REDIR_SP)  && is8) goto none;
+
+   if (sz == 1) {
+      if (o == GOF(XER_SO))  goto none;
+      if (o == GOF(XER_OV))  goto none;
+      if (o == GOF(XER_CA))  goto none;
+      if (o == GOF(XER_BC))  goto none;
+      if (o == GOF(CR0_321)) goto none;
+      if (o == GOF(CR0_0))   goto none;
+      if (o == GOF(CR1_321)) goto none;
+      if (o == GOF(CR1_0))   goto none;
+      if (o == GOF(CR2_321)) goto none;
+      if (o == GOF(CR2_0))   goto none;
+      if (o == GOF(CR3_321)) goto none;
+      if (o == GOF(CR3_0))   goto none;
+      if (o == GOF(CR4_321)) goto none;
+      if (o == GOF(CR4_0))   goto none;
+      if (o == GOF(CR5_321)) goto none;
+      if (o == GOF(CR5_0))   goto none;
+      if (o == GOF(CR6_321)) goto none;
+      if (o == GOF(CR6_0))   goto none;
+      if (o == GOF(CR7_321)) goto none;
+      if (o == GOF(CR7_0))   goto none;
+   }
+
+   /* Exact accesses to FP registers */
+   if (o == GOF(FPR0)  && is8) goto none;
+   if (o == GOF(FPR1)  && is8) goto none;
+   if (o == GOF(FPR2)  && is8) goto none;
+   if (o == GOF(FPR3)  && is8) goto none;
+   if (o == GOF(FPR4)  && is8) goto none;
+   if (o == GOF(FPR5)  && is8) goto none;
+   if (o == GOF(FPR6)  && is8) goto none;
+   if (o == GOF(FPR7)  && is8) goto none;
+   if (o == GOF(FPR8)  && is8) goto none;
+   if (o == GOF(FPR9)  && is8) goto none;
+   if (o == GOF(FPR10) && is8) goto none;
+   if (o == GOF(FPR11) && is8) goto none;
+   if (o == GOF(FPR12) && is8) goto none;
+   if (o == GOF(FPR13) && is8) goto none;
+   if (o == GOF(FPR14) && is8) goto none;
+   if (o == GOF(FPR15) && is8) goto none;
+   if (o == GOF(FPR16) && is8) goto none;
+   if (o == GOF(FPR17) && is8) goto none;
+   if (o == GOF(FPR18) && is8) goto none;
+   if (o == GOF(FPR19) && is8) goto none;
+   if (o == GOF(FPR20) && is8) goto none;
+   if (o == GOF(FPR21) && is8) goto none;
+   if (o == GOF(FPR22) && is8) goto none;
+   if (o == GOF(FPR23) && is8) goto none;
+   if (o == GOF(FPR24) && is8) goto none;
+   if (o == GOF(FPR25) && is8) goto none;
+   if (o == GOF(FPR26) && is8) goto none;
+   if (o == GOF(FPR27) && is8) goto none;
+   if (o == GOF(FPR28) && is8) goto none;
+   if (o == GOF(FPR29) && is8) goto none;
+   if (o == GOF(FPR30) && is8) goto none;
+   if (o == GOF(FPR31) && is8) goto none;
+
+   /* FP admin related */
+   if (o == GOF(FPROUND) && is4) goto none;
+   if (o == GOF(EMWARN)  && is4) goto none;
+
+   /* Altivec registers */
+   if (o == GOF(VR0)  && sz == 16) goto none;
+   if (o == GOF(VR1)  && sz == 16) goto none;
+   if (o == GOF(VR2)  && sz == 16) goto none;
+   if (o == GOF(VR3)  && sz == 16) goto none;
+   if (o == GOF(VR4)  && sz == 16) goto none;
+   if (o == GOF(VR5)  && sz == 16) goto none;
+   if (o == GOF(VR6)  && sz == 16) goto none;
+   if (o == GOF(VR7)  && sz == 16) goto none;
+   if (o == GOF(VR8)  && sz == 16) goto none;
+   if (o == GOF(VR9)  && sz == 16) goto none;
+   if (o == GOF(VR10) && sz == 16) goto none;
+   if (o == GOF(VR11) && sz == 16) goto none;
+   if (o == GOF(VR12) && sz == 16) goto none;
+   if (o == GOF(VR13) && sz == 16) goto none;
+   if (o == GOF(VR14) && sz == 16) goto none;
+   if (o == GOF(VR15) && sz == 16) goto none;
+   if (o == GOF(VR16) && sz == 16) goto none;
+   if (o == GOF(VR17) && sz == 16) goto none;
+   if (o == GOF(VR18) && sz == 16) goto none;
+   if (o == GOF(VR19) && sz == 16) goto none;
+   if (o == GOF(VR20) && sz == 16) goto none;
+   if (o == GOF(VR21) && sz == 16) goto none;
+   if (o == GOF(VR22) && sz == 16) goto none;
+   if (o == GOF(VR23) && sz == 16) goto none;
+   if (o == GOF(VR24) && sz == 16) goto none;
+   if (o == GOF(VR25) && sz == 16) goto none;
+   if (o == GOF(VR26) && sz == 16) goto none;
+   if (o == GOF(VR27) && sz == 16) goto none;
+   if (o == GOF(VR28) && sz == 16) goto none;
+   if (o == GOF(VR29) && sz == 16) goto none;
+   if (o == GOF(VR30) && sz == 16) goto none;
+   if (o == GOF(VR31) && sz == 16) goto none;
+
+   VG_(printf)("get_IntRegInfo(ppc64):failing on (%d,%d)\n", o, sz);
+   tl_assert(0);
+#  undef GOF
+
+
+#  else
+#    error "FIXME: not implemented for this architecture"
+#  endif
+
+  exactly1:
+   iii->n_offsets = -1;
+   return;
+  none:
+   iii->n_offsets = 0;
+   return;
+  contains_o:
+   tl_assert(o >= 0 && 0 == (o % sizeof(UWord)));
+   iii->n_offsets = 1;
+   iii->offsets[0] = o;
+   return;
+}
+
+
+/* Does 'arr' describe an indexed guest state section containing host
+   words, that we want to shadow? */
+
+static Bool is_integer_guest_reg_array ( IRRegArray* arr )
+{
+   /* --------------------- x86 --------------------- */
+#  if defined(VGA_x86)
+   /* The x87 tag array. */
+   if (arr->base == offsetof(VexGuestX86State,guest_FPTAG[0])
+       && arr->elemTy == Ity_I8 && arr->nElems == 8)
+      return False;
+   /* The x87 register array. */
+   if (arr->base == offsetof(VexGuestX86State,guest_FPREG[0])
+       && arr->elemTy == Ity_F64 && arr->nElems == 8)
+      return False;
+
+   VG_(printf)("is_integer_guest_reg_array(x86): unhandled: ");
+   ppIRRegArray(arr);
+   VG_(printf)("\n");
+   tl_assert(0);
+
+   /* -------------------- amd64 -------------------- */
+#  elif defined(VGA_amd64)
+   /* The x87 tag array. */
+   if (arr->base == offsetof(VexGuestAMD64State,guest_FPTAG[0])
+       && arr->elemTy == Ity_I8 && arr->nElems == 8)
+      return False;
+   /* The x87 register array. */
+   if (arr->base == offsetof(VexGuestAMD64State,guest_FPREG[0])
+       && arr->elemTy == Ity_F64 && arr->nElems == 8)
+      return False;
+
+   VG_(printf)("is_integer_guest_reg_array(amd64): unhandled: ");
+   ppIRRegArray(arr);
+   VG_(printf)("\n");
+   tl_assert(0);
+
+   /* -------------------- ppc32 -------------------- */
+#  elif defined(VGA_ppc32)
+   /* The redir stack. */
+   if (arr->base == offsetof(VexGuestPPC32State,guest_REDIR_STACK[0])
+       && arr->elemTy == Ity_I32
+       && arr->nElems == VEX_GUEST_PPC32_REDIR_STACK_SIZE)
+      return True;
+
+   VG_(printf)("is_integer_guest_reg_array(ppc32): unhandled: ");
+   ppIRRegArray(arr);
+   VG_(printf)("\n");
+   tl_assert(0);
+
+   /* -------------------- ppc64 -------------------- */
+#  elif defined(VGA_ppc64)
+   /* The redir stack. */
+   if (arr->base == offsetof(VexGuestPPC64State,guest_REDIR_STACK[0])
+       && arr->elemTy == Ity_I64
+       && arr->nElems == VEX_GUEST_PPC64_REDIR_STACK_SIZE)
+      return True;
+
+   VG_(printf)("is_integer_guest_reg_array(ppc64): unhandled: ");
+   ppIRRegArray(arr);
+   VG_(printf)("\n");
+   tl_assert(0);
+
+#  else
+#    error "FIXME: not implemented for this architecture"
+#  endif
+}
+
+
+// END move this uglyness to pc_machine.c
+
+/* returns True iff given slice exactly matches an int reg.  Merely
+   a convenience wrapper around get_IntRegInfo. */
+static Bool is_integer_guest_reg ( Int offset, Int szB )
+{
+   IntRegInfo iii;
+   get_IntRegInfo( &iii, offset, szB );
+   tl_assert(iii.n_offsets >= -1 && iii.n_offsets <= N_INTREGINFO_OFFSETS);
+   return iii.n_offsets == -1;
+}
+
+/* these assume guest and host have the same endianness and
+   word size (probably). */
+static UWord get_guest_intreg ( ThreadId tid, Int shadowNo,
+                                OffT offset, SizeT size )
+{
+   UChar tmp[ 2 + sizeof(UWord) ];
+   tl_assert(size == sizeof(UWord));
+   tl_assert(0 == (offset % sizeof(UWord)));
+   VG_(memset)(tmp, 0, sizeof(tmp));
+   tmp[0] = 0x31;
+   tmp[ sizeof(tmp)-1 ] = 0x27;
+   VG_(get_shadow_regs_area)(tid, &tmp[1], shadowNo, offset, size);
+   tl_assert(tmp[0] == 0x31);
+   tl_assert(tmp[ sizeof(tmp)-1 ] == 0x27);
+   return * ((UWord*) &tmp[1] ); /* MISALIGNED LOAD */
+}
+static void put_guest_intreg ( ThreadId tid, Int shadowNo,
+                               OffT offset, SizeT size, UWord w )
+{
+   tl_assert(size == sizeof(UWord));
+   tl_assert(0 == (offset % sizeof(UWord)));
+   VG_(set_shadow_regs_area)(tid, shadowNo, offset, size,
+                             (const UChar*)&w);
+}
+
+/* Initialise the integer shadow registers to UNKNOWN.  This is a bit
+   of a nasty kludge, but it does mean we don't need to know which
+   registers we really need to initialise -- simply assume that all
+   integer registers will be naturally aligned w.r.t. the start of the
+   guest state, and fill in all possible entries. */
+static void init_shadow_registers ( ThreadId tid )
+{
+   Int i, wordSzB = sizeof(UWord);
+   for (i = 0; i < MC_SIZEOF_GUEST_STATE-wordSzB; i += wordSzB) {
+      put_guest_intreg( tid, 1, i, wordSzB, (UWord)UNKNOWN );
+   }
+}
+
+static void post_reg_write_nonptr ( ThreadId tid, OffT offset, SizeT size )
+{
+   // syscall_return: Default is non-pointer.  If it really is a pointer
+   // (eg. for mmap()), SK_(post_syscall) sets it again afterwards.
+   //
+   // clientreq_return: All the global client requests return non-pointers
+   // (except possibly CLIENT_CALL[0123], but they're handled by
+   // post_reg_write_clientcall, not here).
+   //
+   if (is_integer_guest_reg( (Int)offset, (Int)size )) {
+      put_guest_intreg( tid, 1, offset, size, (UWord)NONPTR );
+   } else {
+      tl_assert(0);
+   }
+   //   VG_(set_thread_shadow_archreg)( tid, reg, (UInt)NONPTR );
+}
+
+static void post_reg_write_nonptr_or_unknown ( ThreadId tid,
+                                               OffT offset, SizeT size )
+{
+   // deliver_signal: called from two places; one sets the reg to zero, the
+   // other sets the stack pointer.
+   //
+   if (is_integer_guest_reg( (Int)offset, (Int)size )) {
+      put_guest_intreg(
+         tid, 1/*shadowno*/, offset, size,
+         (UWord)nonptr_or_unknown( 
+                   get_guest_intreg( tid, 0/*shadowno*/,
+                                     offset, size )));
+   } else {
+      tl_assert(0);
+   }
+}
+
+void h_post_reg_write_demux ( CorePart part, ThreadId tid,
+                              OffT guest_state_offset, SizeT size)
+{
+   if (0)
+   VG_(printf)("post_reg_write_demux: tid %d part %d off %ld size %ld\n",
+               (Int)tid, (Int)part,
+              guest_state_offset, size);
+   switch (part) {
+      case Vg_CoreStartup:
+         /* This is a bit of a kludge since for any Vg_CoreStartup
+            event we overwrite the entire shadow register set.  But
+            that's ok - we're only called once with
+            part==Vg_CoreStartup event, and in that case the supplied
+            offset & size cover the entire guest state anyway. */
+         init_shadow_registers(tid);
+         break;
+      case Vg_CoreSysCall:
+         if (0) VG_(printf)("ZZZZZZZ p_r_w    -> NONPTR\n");
+         post_reg_write_nonptr( tid, guest_state_offset, size );
+         break;
+      case Vg_CoreClientReq:
+         post_reg_write_nonptr( tid, guest_state_offset, size );
+         break;
+      case Vg_CoreSignal:
+         post_reg_write_nonptr_or_unknown( tid, guest_state_offset, size );
+         break;
+      default:
+         tl_assert(0);
+   }
+}
+
+void h_post_reg_write_clientcall(ThreadId tid, OffT guest_state_offset,
+                                 SizeT size, Addr f )
+{
+   UWord p;
+
+   // Having to do this is a bit nasty...
+   if (f == (Addr)h_replace_malloc
+       || f == (Addr)h_replace___builtin_new
+       || f == (Addr)h_replace___builtin_vec_new
+       || f == (Addr)h_replace_calloc
+       || f == (Addr)h_replace_memalign
+       || f == (Addr)h_replace_realloc)
+   {
+      // We remembered the last added segment;  make sure it's the right one.
+      /* What's going on: at this point, the scheduler has just called
+         'f' -- one of our malloc replacement functions -- and it has
+         returned.  The return value has been written to the guest
+         state of thread 'tid', offset 'guest_state_offset' length
+         'size'.  We need to look at that return value and set the
+         shadow return value accordingly.  The shadow return value
+         required is handed to us "under the counter" through the
+         global variable 'last_seg_added'.  This is all very ugly, not
+         to mention, non-thread-safe should V ever become
+         multithreaded. */
+      /* assert the place where the return value is is a legit int reg */
+      tl_assert(is_integer_guest_reg(guest_state_offset, size));
+      /* Now we need to look at the returned value, to see whether the
+         malloc succeeded or not. */
+      p = get_guest_intreg(tid, 0/*non-shadow*/, guest_state_offset, size);
+      if ((UWord)NULL == p) {
+         // if alloc failed, eg. realloc on bogus pointer
+         put_guest_intreg(tid, 1/*first-shadow*/,
+                          guest_state_offset, size, (UWord)NONPTR );
+      } else {
+         // alloc didn't fail.  Check we have the correct segment.
+         tl_assert(p == last_seg_added->addr);
+         put_guest_intreg(tid, 1/*first-shadow*/,
+                          guest_state_offset, size, (UWord)last_seg_added );
+      }
+   } 
+   else if (f == (Addr)h_replace_free
+            || f == (Addr)h_replace___builtin_delete
+            || f == (Addr)h_replace___builtin_vec_delete
+   //            || f == (Addr)VG_(cli_block_size)
+            || f == (Addr)VG_(message))
+   {
+      // Probably best to set the (non-existent!) return value to
+      // non-pointer.
+      tl_assert(is_integer_guest_reg(guest_state_offset, size));
+      put_guest_intreg(tid, 1/*first-shadow*/,
+                       guest_state_offset, size, (UWord)NONPTR );
+   }
+   else {
+      // Anything else, probably best to set return value to non-pointer.
+      //VG_(set_thread_shadow_archreg)(tid, reg, (UInt)UNKNOWN);
+      Char fbuf[100];
+      VG_(printf)("f = %#lx\n", f);
+      VG_(get_fnname)(f, fbuf, 100);
+      VG_(printf)("name = %s\n", fbuf);
+      VG_(tool_panic)("argh: clientcall");
+   }
+}
+
+
+//zz /*--------------------------------------------------------------------*/
+//zz /*--- Sanity checking                                              ---*/
+//zz /*--------------------------------------------------------------------*/
+//zz 
+//zz /* Check that nobody has spuriously claimed that the first or last 16
+//zz    pages (64 KB) of address space have become accessible.  Failure of
+//zz    the following do not per se indicate an internal consistency
+//zz    problem, but they are so likely to that we really want to know
+//zz    about it if so. */
+//zz Bool pc_replace_cheap_sanity_check) ( void )
+//zz {
+//zz    if (IS_DISTINGUISHED_SM(primary_map[0])
+//zz        /* kludge: kernel drops a page up at top of address range for
+//zz           magic "optimized syscalls", so we can no longer check the
+//zz           highest page */
+//zz        /* && IS_DISTINGUISHED_SM(primary_map[65535]) */
+//zz       )
+//zz       return True;
+//zz    else
+//zz       return False;
+//zz }
+//zz 
+//zz Bool SK_(expensive_sanity_check) ( void )
+//zz {
+//zz    Int i;
+//zz 
+//zz    /* Make sure nobody changed the distinguished secondary. */
+//zz    for (i = 0; i < SEC_MAP_WORDS; i++)
+//zz       if (distinguished_secondary_map.vseg[i] != UNKNOWN)
+//zz          return False;
+//zz 
+//zz    return True;
+//zz }
+
+
+/*--------------------------------------------------------------------*/
+/*--- System calls                                                 ---*/
+/*--------------------------------------------------------------------*/
+
+void h_pre_syscall ( ThreadId tid, UInt sysno )
+{
+   /* we don't do anything at the pre-syscall point */
+}
+
+/* The post-syscall table is a table of pairs (number, flag).
+
+   'flag' is only ever zero or one.  If it is zero, it indicates that
+   default handling for that syscall is required -- namely that the
+   syscall is deemed to return NONPTR.  This is the case for the vast
+   majority of syscalls.  If it is one then some special
+   syscall-specific handling is is required.  No further details of it
+   are stored in the table.
+
+   On Linux, 'number' is a __NR_xxx constant.
+
+   On AIX5, 'number' is an Int*, which points to the Int variable
+   holding the currently assigned number for this syscall.
+
+   When querying the table, we compare the supplied syscall number
+   with the 'number' field (directly on Linux, after dereferencing on
+   AIX5), to find the relevant entry.  This requires a linear search
+   of the table.  To stop the costs getting too high, the table is
+   incrementally rearranged after each search, to move commonly
+   requested items a bit closer to the front.
+
+   The table is built once, the first time it is used.  After that we
+   merely query it (and reorder the entries as a result). */
+
+static XArray* /* of UWordPair */ post_syscall_table = NULL;
+
+static void setup_post_syscall_table ( void )
+{
+   tl_assert(!post_syscall_table);
+   post_syscall_table = VG_(newXA)( VG_(malloc), "pc.h_main.spst.1",
+                                    VG_(free), sizeof(UWordPair) );
+   tl_assert(post_syscall_table);
+
+   /* --------------- LINUX --------------- */
+
+#  if defined(VGO_linux)
+
+#     define ADD(_flag, _syscallname) \
+         do { UWordPair p; p.uw1 = (_syscallname); p.uw2 = (_flag); \
+              VG_(addToXA)( post_syscall_table, &p ); \
+         } while (0)
+
+      /* These ones definitely don't return pointers.  They're not
+         particularly grammatical, either. */
+
+#     if defined(__NR__llseek)
+      ADD(0, __NR__llseek);
+#     endif
+      ADD(0, __NR__sysctl);
+#     if defined(__NR__newselect)
+      ADD(0, __NR__newselect);
+#     endif
+#     if defined(__NR_accept)
+      ADD(0, __NR_accept);
+#     endif
+      ADD(0, __NR_access);
+#     if defined(__NR_bind)
+      ADD(0, __NR_bind);
+#     endif
+#     if defined(__NR_chdir)
+      ADD(0, __NR_chdir);
+#     endif
+      ADD(0, __NR_chmod);
+      ADD(0, __NR_chown);
+      ADD(0, __NR_clock_getres);
+      ADD(0, __NR_clock_gettime);
+      ADD(0, __NR_clone);
+      ADD(0, __NR_close);
+#     if defined(__NR_connect)
+      ADD(0, __NR_connect);
+#     endif
+      ADD(0, __NR_dup);
+      ADD(0, __NR_dup2);
+      ADD(0, __NR_execve); /* presumably we see this because the call failed? */
+      ADD(0, __NR_exit); /* hmm, why are we still alive? */
+      ADD(0, __NR_exit_group);
+      ADD(0, __NR_fadvise64);
+      ADD(0, __NR_fchmod);
+      ADD(0, __NR_fchown);
+#     if defined(__NR_fchown32)
+      ADD(0, __NR_fchown32);
+#     endif
+      ADD(0, __NR_fcntl);
+#     if defined(__NR_fcntl64)
+      ADD(0, __NR_fcntl64);
+#     endif
+      ADD(0, __NR_fdatasync);
+      ADD(0, __NR_fstat);
+#     if defined(__NR_fstat64)
+      ADD(0, __NR_fstat64);
+#     endif
+      ADD(0, __NR_fstatfs);
+      ADD(0, __NR_fsync);
+      ADD(0, __NR_ftruncate);
+#     if defined(__NR_ftruncate64)
+      ADD(0, __NR_ftruncate64);
+#     endif
+      ADD(0, __NR_futex);
+      ADD(0, __NR_getcwd);
+      ADD(0, __NR_getdents); // something to do with teeth
+      ADD(0, __NR_getdents64);
+      ADD(0, __NR_getegid);
+#     if defined(__NR_getegid32)
+      ADD(0, __NR_getegid32);
+#     endif
+      ADD(0, __NR_geteuid);
+#     if defined(__NR_geteuid32)
+      ADD(0, __NR_geteuid32);
+#     endif
+      ADD(0, __NR_getgid);
+#     if defined(__NR_getgid32)
+      ADD(0, __NR_getgid32);
+#     endif
+      ADD(0, __NR_getitimer);
+#     if defined(__NR_getpeername)
+      ADD(0, __NR_getpeername);
+#     endif
+      ADD(0, __NR_getpid);
+      ADD(0, __NR_getppid);
+      ADD(0, __NR_getresgid);
+      ADD(0, __NR_getresuid);
+      ADD(0, __NR_getrlimit);
+#     if defined(__NR_getsockname)
+      ADD(0, __NR_getsockname);
+#     endif
+#     if defined(__NR_getsockopt)
+      ADD(0, __NR_getsockopt);
+#     endif
+      ADD(0, __NR_gettimeofday);
+      ADD(0, __NR_getuid);
+#     if defined(__NR_getuid32)
+      ADD(0, __NR_getuid32);
+#     endif
+      ADD(0, __NR_getxattr);
+      ADD(0, __NR_inotify_init);
+      ADD(0, __NR_ioctl); // ioctl -- assuming no pointers returned
+      ADD(0, __NR_kill);
+      ADD(0, __NR_link);
+#     if defined(__NR_listen)
+      ADD(0, __NR_listen);
+#     endif
+      ADD(0, __NR_lseek);
+      ADD(0, __NR_lstat);
+#     if defined(__NR_lstat64)
+      ADD(0, __NR_lstat64);
+#     endif
+      ADD(0, __NR_madvise);
+      ADD(0, __NR_mkdir);
+      ADD(0, __NR_mprotect);
+      ADD(0, __NR_munmap); // die_mem_munmap already called, segment remove);
+      ADD(0, __NR_open);
+      ADD(0, __NR_pipe);
+      ADD(0, __NR_poll);
+      ADD(0, __NR_pread64);
+      ADD(0, __NR_pwrite64);
+      ADD(0, __NR_read);
+      ADD(0, __NR_readlink);
+      ADD(0, __NR_readv);
+#     if defined(__NR_recvfrom)
+      ADD(0, __NR_recvfrom);
+#     endif
+#     if defined(__NR_recvmsg)
+      ADD(0, __NR_recvmsg);
+#     endif
+      ADD(0, __NR_rename);
+      ADD(0, __NR_rmdir);
+      ADD(0, __NR_rt_sigaction);
+      ADD(0, __NR_rt_sigprocmask);
+      ADD(0, __NR_rt_sigreturn); /* not sure if we should see this or not */
+      ADD(0, __NR_sched_get_priority_max);
+      ADD(0, __NR_sched_get_priority_min);
+      ADD(0, __NR_sched_getparam);
+      ADD(0, __NR_sched_getscheduler);
+      ADD(0, __NR_sched_setscheduler);
+      ADD(0, __NR_sched_yield);
+      ADD(0, __NR_select);
+#     if defined(__NR_sendto)
+      ADD(0, __NR_sendto);
+#     endif
+      ADD(0, __NR_set_robust_list);
+#     if defined(__NR_set_thread_area)
+      ADD(0, __NR_set_thread_area);
+#     endif
+      ADD(0, __NR_set_tid_address);
+      ADD(0, __NR_setitimer);
+      ADD(0, __NR_setrlimit);
+      ADD(0, __NR_setsid);
+#     if defined(__NR_setsockopt)
+      ADD(0, __NR_setsockopt);
+#     endif
+#     if defined(__NR_shmctl)
+      ADD(0, __NR_shmctl);
+      ADD(0, __NR_shmdt);
+#     endif
+#     if defined(__NR_shutdown)
+      ADD(0, __NR_shutdown);
+#     endif
+#     if defined(__NR_socket)
+      ADD(0, __NR_socket);
+#     endif
+#     if defined(__NR_socketcall)
+      ADD(0, __NR_socketcall); /* the nasty x86-linux socket multiplexor */
+#     endif
+#     if defined(__NR_statfs64)
+      ADD(0, __NR_statfs64);
+#     endif
+#     if defined(__NR_sigreturn)
+      ADD(0, __NR_sigreturn); /* not sure if we should see this or not */
+#     endif
+#     if defined(__NR_stat64)
+      ADD(0, __NR_stat64);
+#     endif
+      ADD(0, __NR_stat);
+      ADD(0, __NR_statfs);
+      ADD(0, __NR_symlink);
+      ADD(0, __NR_sysinfo);
+      ADD(0, __NR_tgkill);
+      ADD(0, __NR_time);
+      ADD(0, __NR_times);
+      ADD(0, __NR_truncate);
+#     if defined(__NR_truncate64)
+      ADD(0, __NR_truncate64);
+#     endif
+#     if defined(__NR_ugetrlimit)
+      ADD(0, __NR_ugetrlimit);
+#     endif
+      ADD(0, __NR_umask);
+      ADD(0, __NR_uname);
+      ADD(0, __NR_unlink);
+      ADD(0, __NR_utime);
+#     if defined(__NR_waitpid)
+      ADD(0, __NR_waitpid);
+#     endif
+      ADD(0, __NR_wait4);
+      ADD(0, __NR_write);
+      ADD(0, __NR_writev);
+
+      /* Whereas the following need special treatment */
+#     if defined(__NR_arch_prctl)
+      ADD(1, __NR_arch_prctl);
+#     endif
+      ADD(1, __NR_brk);
+      ADD(1, __NR_mmap);
+#     if defined(__NR_mmap2)
+      ADD(1, __NR_mmap2);
+#     endif
+#     if defined(__NR_shmat)
+      ADD(1, __NR_shmat);
+#     endif
+#     if defined(__NR_shmget)
+      ADD(1, __NR_shmget);
+#     endif
+
+   /* --------------- AIX5 --------------- */
+
+#  elif defined(VGO_aix5)
+
+#     define ADD(_flag, _syscallname) \
+         do { \
+            UWordPair p; \
+            if ((_syscallname) != __NR_AIX5_UNKNOWN) { \
+               p.uw1 = (UWord)&(_syscallname); p.uw2 = (_flag); \
+               VG_(addToXA)( post_syscall_table, &p ); \
+            } \
+         } while (0)
+
+      /* Just a minimal set of handlers, enough to make
+         a 32- and 64-bit hello-world program run. */
+      ADD(1, __NR_AIX5___loadx); /* not sure what to do here */
+      ADD(0, __NR_AIX5__exit);
+      ADD(0, __NR_AIX5_access);
+      ADD(0, __NR_AIX5_getgidx);
+      ADD(0, __NR_AIX5_getuidx);
+      ADD(0, __NR_AIX5_kfcntl);
+      ADD(0, __NR_AIX5_kioctl);
+      ADD(1, __NR_AIX5_kload); /* not sure what to do here */
+      ADD(0, __NR_AIX5_kwrite);
+
+#  else
+#     error "Unsupported OS"
+#  endif
+
+#  undef ADD
+}
+
+
+void h_post_syscall ( ThreadId tid, UInt sysno, SysRes res )
+{
+   Word i, n;
+   UWordPair* pair;
+
+   if (!post_syscall_table)
+      setup_post_syscall_table();
+
+   /* search for 'sysno' in the post_syscall_table */
+   n = VG_(sizeXA)( post_syscall_table );
+   for (i = 0; i < n; i++) {
+      pair = VG_(indexXA)( post_syscall_table, i );
+#     if defined(VGO_linux)
+      if (pair->uw1 == (UWord)sysno)
+         break;
+#     elif defined(VGO_aix5)
+      if (*(Int*)(pair->uw1) == (Int)sysno)
+         break;
+#     else
+#        error "Unsupported OS"
+#     endif
+   }
+
+   tl_assert(i >= 0 && i <= n);
+
+   if (i == n) {
+      VG_(printf)("sysno == %u\n", sysno);
+#     if defined(VGO_aix5)
+      VG_(printf)("syscallnm == %s\n",
+                  VG_(aix5_sysno_to_sysname)(sysno));
+#     endif
+      VG_(tool_panic)("unhandled syscall");
+   }
+
+   /* So we found the relevant entry.  Move it one step
+      forward so as to speed future accesses to it. */
+   if (i > 0) {
+      UWordPair tmp, *p, *q;
+      p = VG_(indexXA)( post_syscall_table, i-1 );
+      q = VG_(indexXA)( post_syscall_table, i-0 );
+      tmp = *p;
+      *p = *q;
+      *q = tmp;
+      i--;
+   }
+
+   /* Deal with the common case */
+   pair = VG_(indexXA)( post_syscall_table, i );
+   if (pair->uw2 == 0) {
+     /* the common case */
+      VG_(set_syscall_return_shadows)( 
+         tid, /* retval */ (UWord)NONPTR, 0,
+              /* error */  (UWord)NONPTR, 0
+      );
+      return;
+   }
+
+   /* Special handling for all remaining cases */
+   tl_assert(pair->uw2 == 1);
+
+#  if defined(__NR_arch_prctl)
+   if (sysno == __NR_arch_prctl) {
+      /* This is nasty.  On amd64-linux, arch_prctl may write a
+         value to guest_FS_ZERO, and we need to shadow that value.
+         Hence apply nonptr_or_unknown to it here, after the
+         syscall completes. */
+      post_reg_write_nonptr_or_unknown( tid, PC_OFF_FS_ZERO, 
+                                             PC_SZB_FS_ZERO );
+      VG_(set_syscall_return_shadows)( 
+         tid, /* retval */ (UWord)NONPTR, 0,
+              /* error */  (UWord)NONPTR, 0
+      );
+      return;
+   }
+#  endif
+
+#  if defined(__NR_brk)
+   // With brk(), result (of kernel syscall, not glibc wrapper) is a heap
+   // pointer.  Make the shadow UNKNOWN.
+   if (sysno ==  __NR_brk) {
+      VG_(set_syscall_return_shadows)( 
+         tid, /* retval */ (UWord)UNKNOWN, 0,
+              /* error */  (UWord)NONPTR,  0
+      );
+      return;
+   }
+#  endif
+
+   // With mmap, new_mem_mmap() has already been called and added the
+   // segment (we did it there because we had the result address and size
+   // handy).  So just set the return value shadow.
+   if (sysno == __NR_mmap
+#      if defined(__NR_mmap2)
+       || sysno == __NR_mmap2
+#      endif
+#      if defined(__NR_AIX5___loadx)
+       || (sysno == __NR_AIX5___loadx && __NR_AIX5___loadx != __NR_AIX5_UNKNOWN)
+#      endif
+#      if defined(__NR_AIX5_kload)
+       || (sysno == __NR_AIX5_kload && __NR_AIX5_kload != __NR_AIX5_UNKNOWN)
+#      endif
+      ) {
+      if (res.isError) {
+         // mmap() had an error, return value is a small negative integer
+         VG_(set_syscall_return_shadows)( tid, /*val*/ (UWord)NONPTR, 0,
+                                               /*err*/ (UWord)NONPTR, 0 );
+         if (0) VG_(printf)("ZZZZZZZ mmap res -> NONPTR\n");
+      } else {
+         VG_(set_syscall_return_shadows)( tid, /*val*/ (UWord)UNKNOWN, 0,
+                                               /*err*/ (UWord)NONPTR, 0 );
+         if (0) VG_(printf)("ZZZZZZZ mmap res -> UNKNOWN\n");
+      }
+      return;
+   }
+
+   // shmat uses the same scheme.  We will just have had a
+   // notification via new_mem_mmap.  Just set the return value shadow.
+#  if defined(__NR_shmat)
+   if (sysno == __NR_shmat) {
+      if (res.isError) {
+         VG_(set_syscall_return_shadows)( tid, /*val*/ (UWord)NONPTR, 0,
+                                               /*err*/ (UWord)NONPTR, 0 );
+         if (0) VG_(printf)("ZZZZZZZ shmat res -> NONPTR\n");
+      } else {
+         VG_(set_syscall_return_shadows)( tid, /*val*/ (UWord)UNKNOWN, 0,
+                                               /*err*/ (UWord)NONPTR, 0 );
+         if (0) VG_(printf)("ZZZZZZZ shmat res -> UNKNOWN\n");
+      }
+      return;
+   }
+#  endif
+
+#  if defined(__NR_shmget)
+   if (sysno == __NR_shmget) {
+      // FIXME: is this correct?
+      VG_(set_syscall_return_shadows)( tid, /*val*/ (UWord)UNKNOWN, 0,
+                                            /*err*/ (UWord)NONPTR, 0 );
+      return;
+   }
+#  endif
+
+   /* If we get here, it implies the corresponding entry in
+      post_syscall_table has .w2 == 1, which in turn implies there
+      should be special-case code for it above. */
+   tl_assert(0);
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- Functions called from generated code                         ---*/
+/*--------------------------------------------------------------------*/
+
+#if SC_SEGS
+static void checkSeg ( Seg vseg ) {
+   tl_assert(vseg == UNKNOWN || vseg == NONPTR || vseg == BOTTOM
+             || Seg__plausible(vseg) );
+}
+#endif
+
+// XXX: could be more sophisticated -- actually track the lowest/highest
+// valid address used by the program, and then return False for anything
+// below that (using a suitable safety margin).  Also, nothing above
+// 0xc0000000 is valid [unless you've changed that in your kernel]
+static inline Bool looks_like_a_pointer(Addr a)
+{
+#  if defined(VGA_x86) || defined(VGA_ppc32)
+   tl_assert(sizeof(UWord) == 4);
+   return (a > 0x01000000UL && a < 0xFF000000UL);
+#  elif defined(VGA_amd64) || defined(VGA_ppc64)
+   tl_assert(sizeof(UWord) == 8);
+   return (a >= 16 * 0x10000UL && a < 0xFF00000000000000UL);
+#  else
+#    error "Unsupported architecture"
+#  endif
+}
+
+static inline VG_REGPARM(1)
+Seg* nonptr_or_unknown(UWord x)
+{
+   Seg* res = looks_like_a_pointer(x) ? UNKNOWN : NONPTR;
+   if (0) VG_(printf)("nonptr_or_unknown %s %#lx\n", 
+                      res==UNKNOWN ? "UUU" : "nnn", x);
+   return res;
+}
+
+//zz static __attribute__((regparm(1)))
+//zz void print_BB_entry(UInt bb)
+//zz {
+//zz    VG_(printf)("%u =\n", bb);
+//zz }
+
+static ULong stats__tot_mem_refs  = 0;
+static ULong stats__refs_in_a_seg = 0;
+static ULong stats__refs_lost_seg = 0;
+
+typedef
+   struct { ExeContext* ec; UWord count; }
+   Lossage;
+
+static OSet* lossage = NULL;
+
+static void inc_lossage ( ExeContext* ec ) 
+{
+   Lossage key, *res, *nyu;
+   key.ec = ec;
+   key.count = 0; /* frivolous */
+   res = VG_(OSetGen_Lookup)(lossage, &key);
+   if (res) {
+      tl_assert(res->ec == ec);
+      res->count++;
+   } else {
+      nyu = (Lossage*)VG_(OSetGen_AllocNode)(lossage, sizeof(Lossage));
+      tl_assert(nyu);
+      nyu->ec = ec;
+      nyu->count = 1;
+      VG_(OSetGen_Insert)( lossage, nyu );
+   }
+}
+
+static void init_lossage ( void )
+{
+   lossage = VG_(OSetGen_Create)( /*keyOff*/ offsetof(Lossage,ec),
+                                  /*fastCmp*/NULL,
+                                  VG_(malloc), "pc.h_main.il.1",
+                                  VG_(free) );
+   tl_assert(lossage);
+}
+
+static void show_lossage ( void )
+{
+   Lossage* elem;
+   VG_(OSetGen_ResetIter)( lossage );
+   while ( (elem = VG_(OSetGen_Next)(lossage)) ) {
+      if (elem->count < 10) continue;
+      //Char buf[100];
+      //(void)VG_(describe_IP)(elem->ec, buf, sizeof(buf)-1);
+      //buf[sizeof(buf)-1] = 0;
+      //VG_(printf)("  %,8lu  %s\n", elem->count, buf);
+      VG_(message)(Vg_UserMsg, "Lossage count %'lu at", elem->count);
+      VG_(pp_ExeContext)(elem->ec);
+   }
+}
+
+// This function is called *a lot*; inlining it sped up Konqueror by 20%.
+static inline
+void check_load_or_store(Bool is_write, Addr m, UWord sz, Seg* mptr_vseg)
+{
+   if (h_clo_lossage_check) {
+     tl_assert(0);
+#if 0
+      Seg* seg;
+      stats__tot_mem_refs++;
+      if (ISList__findI0( seglist, (Addr)m, &seg )) {
+         /* m falls inside 'seg' (that is, we are making a memory
+            reference inside 'seg').  Now, really mptr_vseg should be
+            a tracked segment of some description.  Badness is when
+            mptr_vseg is UNKNOWN, BOTTOM or NONPTR at this point,
+            since that means we've lost the type of it somehow: it
+            shoud say that m points into a real segment (preferable
+            'seg'), but it doesn't. */
+         if (Seg__status_is_SegHeap(seg)) {
+            stats__refs_in_a_seg++;
+            if (UNKNOWN == mptr_vseg
+                || BOTTOM == mptr_vseg || NONPTR == mptr_vseg) {
+               ExeContext* ec;
+               Char buf[100];
+               static UWord xx = 0;
+               stats__refs_lost_seg++;
+               ec = VG_(record_ExeContext)( VG_(get_running_tid)(), 0 );
+               inc_lossage(ec);
+               if (0) {
+                  VG_(message)(Vg_DebugMsg, "");
+                  VG_(message)(Vg_DebugMsg,
+                               "Lossage %s %#lx sz %lu inside block alloc'd",
+                               is_write ? "wr" : "rd", m, (UWord)sz);
+                  VG_(pp_ExeContext)(Seg__where(seg));
+               }
+               if (xx++ < 0) {
+                  Addr ip = VG_(get_IP)( VG_(get_running_tid)() );
+                  (void)VG_(describe_IP)( ip, buf, sizeof(buf)-1);
+                  buf[sizeof(buf)-1] = 0;
+                  VG_(printf)("lossage at %p %s\n", ec, buf );
+               }
+            }
+         }
+      }
+#endif
+   } /* clo_lossage_check */
+
+#  if SC_SEGS
+   checkSeg(mptr_vseg);
+#  endif
+
+   if (UNKNOWN == mptr_vseg) {
+      // do nothing
+
+   } else if (BOTTOM == mptr_vseg) {
+      // do nothing
+
+   } else if (NONPTR == mptr_vseg) {
+      h_record_heap_error( m, sz, mptr_vseg, is_write );
+
+   } else {
+      // check all segment ranges in the circle
+      // if none match, warn about 1st seg
+      // else,          check matching one isn't freed
+      Bool is_ok = False;
+      Seg* curr  = mptr_vseg;
+      Addr mhi;
+
+      // Accesses partly outside range are an error, unless it's an aligned
+      // word-sized read, and --partial-loads-ok=yes.  This is to cope with
+      // gcc's/glibc's habits of doing word-sized accesses that read past
+      // the ends of arrays/strings.
+      // JRS 2008-sept-11: couldn't this be moved off the critical path?
+      if (!is_write && sz == sizeof(UWord)
+          && h_clo_partial_loads_ok && SHMEM_IS_WORD_ALIGNED(m)) {
+         mhi = m;
+      } else {
+         mhi = m+sz-1;
+      }
+
+      if (0) VG_(printf)("calling seg_ci %p %#lx %#lx\n", curr,m,mhi);
+      is_ok = curr->addr <= m && mhi < curr->addr + curr->szB;
+
+      // If it's an overrun/underrun of a freed block, don't give both
+      // warnings, since the first one mentions that the block has been
+      // freed.
+      if ( ! is_ok || Seg__is_freed(curr) )
+         h_record_heap_error( m, sz, mptr_vseg, is_write );
+   }
+}
+
+// ------------------ Load handlers ------------------ //
+
+/* On 32 bit targets, we will use:
+      check_load1 check_load2 check_load4_P
+      check_load4  (for 32-bit FP reads)
+      check_load8  (for 64-bit FP reads)
+      check_load16 (for xmm/altivec reads)
+   On 64 bit targets, we will use:
+      check_load1 check_load2 check_load4 check_load8_P
+      check_load8  (for 64-bit FP reads)
+      check_load16 (for xmm/altivec reads)
+
+   A "_P" handler reads a pointer from memory, and so returns a value
+   to the generated code -- the pointer's shadow value.  That implies
+   that check_load4_P is only to be called on a 32 bit host and
+   check_load8_P is only to be called on a 64 bit host.  For all other
+   cases no shadow value is returned; we merely check that the pointer
+   (m) matches the block described by its shadow value (mptr_vseg).
+*/
+
+// This handles 128 bit loads on both 32 bit and 64 bit targets.
+static VG_REGPARM(2)
+void check_load16(Addr m, Seg* mptr_vseg)
+{
+#  if SC_SEGS
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/False, m, 16, mptr_vseg);
+}
+
+// This handles 64 bit FP-or-otherwise-nonpointer loads on both
+// 32 bit and 64 bit targets.
+static VG_REGPARM(2)
+void check_load8(Addr m, Seg* mptr_vseg)
+{
+#  if SC_SEGS
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/False, m, 8, mptr_vseg);
+}
+
+// This handles 64 bit loads on 64 bit targets.  It must
+// not be called on 32 bit targets.
+// return m.vseg
+static VG_REGPARM(2)
+Seg* check_load8_P(Addr m, Seg* mptr_vseg)
+{
+   Seg* vseg;
+   tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
+#  if SC_SEGS
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/False, m, 8, mptr_vseg);
+   if (VG_IS_8_ALIGNED(m)) {
+      vseg = get_mem_vseg(m);
+   } else {
+      vseg = nonptr_or_unknown( *(ULong*)m );
+   }
+   return vseg;
+}
+
+// This handles 32 bit loads on 32 bit targets.  It must
+// not be called on 64 bit targets.
+// return m.vseg
+static VG_REGPARM(2)
+Seg* check_load4_P(Addr m, Seg* mptr_vseg)
+{
+   Seg* vseg;
+   tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
+#  if SC_SEGS
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/False, m, 4, mptr_vseg);
+   if (VG_IS_4_ALIGNED(m)) {
+      vseg = get_mem_vseg(m);
+   } else {
+      vseg = nonptr_or_unknown( *(UInt*)m );
+   }
+   return vseg;
+}
+
+// Used for both 32 bit and 64 bit targets.
+static VG_REGPARM(2)
+void check_load4(Addr m, Seg* mptr_vseg)
+{
+#  if SC_SEGS
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/False, m, 4, mptr_vseg);
+}
+
+// Used for both 32 bit and 64 bit targets.
+static VG_REGPARM(2)
+void check_load2(Addr m, Seg* mptr_vseg)
+{
+#  if SC_SEGS
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/False, m, 2, mptr_vseg);
+}
+
+// Used for both 32 bit and 64 bit targets.
+static VG_REGPARM(2)
+void check_load1(Addr m, Seg* mptr_vseg)
+{
+#  if SC_SEGS
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/False, m, 1, mptr_vseg);
+}
+
+// ------------------ Store handlers ------------------ //
+
+/* On 32 bit targets, we will use:
+      check_store1 check_store2 check_store4_P
+      check_store4 (for 32-bit nonpointer stores)
+      check_store8_ms4B_ls4B (for 64-bit stores)
+      check_store16_ms4B_4B_4B_ls4B (for xmm/altivec stores)
+
+   On 64 bit targets, we will use:
+      check_store1 check_store2 check_store4 check_store8_P
+      check_store8_all8B (for 64-bit nonpointer stores)
+      check_store16_ms8B_ls8B (for xmm/altivec stores)
+
+   A "_P" handler writes a pointer to memory, and so has an extra
+   argument -- the pointer's shadow value.  That implies that
+   check_store4_P is only to be called on a 32 bit host and
+   check_store8_P is only to be called on a 64 bit host.  For all
+   other cases, and for the misaligned _P cases, the strategy is to
+   let the store go through, and then snoop around with
+   nonptr_or_unknown to fix up the shadow values of any affected
+   words. */
+
+/* Apply nonptr_or_unknown to all the words intersecting
+   [a, a+len). */
+static VG_REGPARM(2)
+void nonptr_or_unknown_range ( Addr a, SizeT len )
+{
+   const SizeT wszB = sizeof(UWord);
+   Addr wfirst = VG_ROUNDDN(a,       wszB);
+   Addr wlast  = VG_ROUNDDN(a+len-1, wszB);
+   Addr a2;
+   tl_assert(wfirst <= wlast);
+   for (a2 = wfirst ; a2 <= wlast; a2 += wszB) {
+      set_mem_vseg( a2, nonptr_or_unknown( *(UWord*)a2 ));
+   }
+}
+
+// This handles 128 bit stores on 64 bit targets.  The
+// store data is passed in 2 pieces, the most significant
+// bits first.
+static VG_REGPARM(3)
+void check_store16_ms8B_ls8B(Addr m, Seg* mptr_vseg,
+                             UWord ms8B, UWord ls8B)
+{
+   tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
+#  if SC_SEGS
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/True, m, 16, mptr_vseg);
+   // Actually *do* the STORE here
+   if (host_is_little_endian()) {
+      // FIXME: aren't we really concerned whether the guest
+      // is little endian, not whether the host is?
+      *(ULong*)(m + 0) = ls8B;
+      *(ULong*)(m + 8) = ms8B;
+   } else {
+      *(ULong*)(m + 0) = ms8B;
+      *(ULong*)(m + 8) = ls8B;
+   }
+   nonptr_or_unknown_range(m, 16);
+}
+
+// This handles 128 bit stores on 64 bit targets.  The
+// store data is passed in 2 pieces, the most significant
+// bits first.
+static VG_REGPARM(3)
+void check_store16_ms4B_4B_4B_ls4B(Addr m, Seg* mptr_vseg,
+                                   UWord ms4B, UWord w2,
+                                   UWord w1,   UWord ls4B)
+{
+   tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
+#  if SC_SEGS
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/True, m, 16, mptr_vseg);
+   // Actually *do* the STORE here
+   if (host_is_little_endian()) {
+      // FIXME: aren't we really concerned whether the guest
+      // is little endian, not whether the host is?
+      *(UInt*)(m +  0) = ls4B;
+      *(UInt*)(m +  4) = w1;
+      *(UInt*)(m +  8) = w2;
+      *(UInt*)(m + 12) = ms4B;
+   } else {
+      *(UInt*)(m +  0) = ms4B;
+      *(UInt*)(m +  4) = w2;
+      *(UInt*)(m +  8) = w1;
+      *(UInt*)(m + 12) = ls4B;
+   }
+   nonptr_or_unknown_range(m, 16);
+}
+
+// This handles 64 bit stores on 32 bit targets.  The
+// store data is passed in 2 pieces, the most significant
+// bits first.
+static VG_REGPARM(3)
+void check_store8_ms4B_ls4B(Addr m, Seg* mptr_vseg,
+                            UWord ms4B, UWord ls4B)
+{
+   tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
+#  if SC_SEGS
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/True, m, 8, mptr_vseg);
+   // Actually *do* the STORE here
+   if (host_is_little_endian()) {
+      // FIXME: aren't we really concerned whether the guest
+      // is little endian, not whether the host is?
+      *(UInt*)(m + 0) = ls4B;
+      *(UInt*)(m + 4) = ms4B;
+   } else {
+      *(UInt*)(m + 0) = ms4B;
+      *(UInt*)(m + 4) = ls4B;
+   }
+   nonptr_or_unknown_range(m, 8);
+}
+
+// This handles 64 bit non pointer stores on 64 bit targets.
+// It must not be called on 32 bit targets.
+static VG_REGPARM(3)
+void check_store8_all8B(Addr m, Seg* mptr_vseg, UWord all8B)
+{
+   tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
+#  if SC_SEGS
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/True, m, 8, mptr_vseg);
+   // Actually *do* the STORE here
+   *(ULong*)m = all8B;
+   nonptr_or_unknown_range(m, 8);
+}
+
+// This handles 64 bit stores on 64 bit targets.  It must
+// not be called on 32 bit targets.
+static VG_REGPARM(3)
+void check_store8_P(Addr m, Seg* mptr_vseg, UWord t, Seg* t_vseg)
+{
+   tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
+#  if SC_SEGS
+   checkSeg(t_vseg);
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/True, m, 8, mptr_vseg);
+   // Actually *do* the STORE here
+   *(ULong*)m = t;
+   if (VG_IS_8_ALIGNED(m)) {
+      set_mem_vseg( m, t_vseg );
+   } else {
+      // straddling two words
+      nonptr_or_unknown_range(m, 8);
+   }
+}
+
+// This handles 32 bit stores on 32 bit targets.  It must
+// not be called on 64 bit targets.
+static VG_REGPARM(3)
+void check_store4_P(Addr m, Seg* mptr_vseg, UWord t, Seg* t_vseg)
+{
+   tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
+#  if SC_SEGS
+   checkSeg(t_vseg);
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/True, m, 4, mptr_vseg);
+   // Actually *do* the STORE here
+   *(UInt*)m = t;
+   if (VG_IS_4_ALIGNED(m)) {
+      set_mem_vseg( m, t_vseg );
+   } else {
+      // straddling two words
+      nonptr_or_unknown_range(m, 4);
+   }
+}
+
+// Used for both 32 bit and 64 bit targets.
+static VG_REGPARM(3)
+void check_store4(Addr m, Seg* mptr_vseg, UWord t)
+{
+#  if SC_SEGS
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/True, m, 4, mptr_vseg);
+   // Actually *do* the STORE here  (Nb: cast must be to 4-byte type!)
+   *(UInt*)m = t;
+   nonptr_or_unknown_range(m, 4);
+}
+
+// Used for both 32 bit and 64 bit targets.
+static VG_REGPARM(3)
+void check_store2(Addr m, Seg* mptr_vseg, UWord t)
+{
+#  if SC_SEGS
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/True, m, 2, mptr_vseg);
+   // Actually *do* the STORE here  (Nb: cast must be to 2-byte type!)
+   *(UShort*)m = t;
+   nonptr_or_unknown_range(m, 2);
+}
+
+// Used for both 32 bit and 64 bit targets.
+static VG_REGPARM(3)
+void check_store1(Addr m, Seg* mptr_vseg, UWord t)
+{
+#  if SC_SEGS
+   checkSeg(mptr_vseg);
+#  endif
+   check_load_or_store(/*is_write*/True, m, 1, mptr_vseg);
+   // Actually *do* the STORE here  (Nb: cast must be to 1-byte type!)
+   *(UChar*)m = t;
+   nonptr_or_unknown_range(m, 1);
+}
+
+
+// Nb: if the result is BOTTOM, return immedately -- don't let BOTTOM
+//     be changed to NONPTR by a range check on the result.
+#define BINOP(bt, nn, nu, np, un, uu, up, pn, pu, pp) \
+   if (BOTTOM == seg1 || BOTTOM == seg2) { bt;                   \
+   } else if (NONPTR == seg1)  { if      (NONPTR == seg2)  { nn; }  \
+                                 else if (UNKNOWN == seg2) { nu; }    \
+                                 else                      { np; }    \
+   } else if (UNKNOWN == seg1) { if      (NONPTR == seg2)  { un; }    \
+                                 else if (UNKNOWN == seg2) { uu; }    \
+                                 else                      { up; }    \
+   } else                      { if      (NONPTR == seg2)  { pn; }    \
+                                 else if (UNKNOWN == seg2) { pu; }    \
+                                 else                      { pp; }    \
+   }
+
+#define BINERROR(opname)                    \
+   h_record_arith_error(seg1, seg2, opname);  \
+   out = NONPTR
+
+
+// -------------
+//  + | n  ?  p
+// -------------
+//  n | n  ?  p
+//  ? | ?  ?  ?
+//  p | p  ?  e   (all results become n if they look like a non-pointer)
+// -------------
+static Seg* do_addW_result(Seg* seg1, Seg* seg2, UWord result, HChar* opname)
+{
+   Seg* out;
+#  if SC_SEGS
+   checkSeg(seg1);
+   checkSeg(seg2);
+#  endif
+   BINOP(
+      return BOTTOM,
+      out = NONPTR,  out = UNKNOWN, out = seg2,
+      out = UNKNOWN, out = UNKNOWN, out = UNKNOWN,
+      out = seg1,    out = UNKNOWN,       BINERROR(opname)
+   );
+   return ( looks_like_a_pointer(result) ? out : NONPTR );
+}
+
+static VG_REGPARM(3) Seg* do_addW(Seg* seg1, Seg* seg2, UWord result)
+{
+   Seg* out;
+#  if SC_SEGS
+   checkSeg(seg1);
+   checkSeg(seg2);
+#  endif
+   out = do_addW_result(seg1, seg2, result, "Add32/Add64");
+#  if SC_SEGS
+   checkSeg(out);
+#  endif
+   return out;
+}
+
+// -------------
+//  - | n  ?  p      (Nb: operation is seg1 - seg2)
+// -------------
+//  n | n  ?  n+     (+) happens a lot due to "cmp", but result should never
+//  ? | ?  ?  n/B        be used, so give 'n'
+//  p | p  p? n*/B   (*) and possibly link the segments
+// -------------
+static VG_REGPARM(3) Seg* do_subW(Seg* seg1, Seg* seg2, UWord result)
+{
+   Seg* out;
+#  if SC_SEGS
+   checkSeg(seg1);
+   checkSeg(seg2);
+#  endif
+   // Nb: when returning BOTTOM, don't let it go through the range-check;
+   //     a segment linking offset can easily look like a nonptr.
+   BINOP(
+      return BOTTOM,
+      out = NONPTR,  out = UNKNOWN,    out = NONPTR,
+      out = UNKNOWN, out = UNKNOWN,    return BOTTOM,
+      out = seg1,    out = seg1/*??*/, return BOTTOM
+   );
+   #if 0
+         // This is for the p-p segment-linking case
+         Seg end2 = seg2;
+         while (end2->links != seg2) end2 = end2->links;
+         end2->links = seg1->links;
+         seg1->links = seg2;
+         return NONPTR;
+   #endif
+   return ( looks_like_a_pointer(result) ? out : NONPTR );
+}
+
+// -------------
+//  & | n  ?  p
+// -------------
+//  n | n  ?  p
+//  ? | ?  ?  ?
+//  p | p  ?  *  (*) if p1==p2 then p else e (see comment)
+// -------------
+/* Seems to be OK to And two pointers:
+     testq %ptr1,%ptr2
+     jnz ..
+   which possibly derives from
+     if (ptr1 & ptr2) { A } else { B }
+   not sure what that means
+*/
+static VG_REGPARM(3) Seg* do_andW(Seg* seg1, Seg* seg2, 
+                                  UWord result, UWord args_diff)
+{
+   Seg* out;
+   if (0 == args_diff) {
+      // p1==p2
+      out = seg1;
+   } else {
+      BINOP(
+         return BOTTOM,
+         out = NONPTR,  out = UNKNOWN, out = seg2,
+         out = UNKNOWN, out = UNKNOWN, out = UNKNOWN,
+         out = seg1,    out = UNKNOWN, out = NONPTR
+                                       /*BINERROR("And32/And64")*/
+      );
+   }
+   out = ( looks_like_a_pointer(result) ? out : NONPTR );
+   return out;
+}
+
+// -------------
+// `|`| n  ?  p
+// -------------
+//  n | n  ?  p
+//  ? | ?  ?  ?
+//  p | p  ?  n
+// -------------
+/* It's OK to Or two pointers together, but the result definitely
+   isn't a pointer.  Why would you want to do that?  Because of this:
+     char* p1 = malloc(..);
+     char* p2 = malloc(..);
+     ...
+     if (p1 || p2) { .. }
+   In this case gcc on x86/amd64 quite literally or-s the two pointers
+   together and throws away the result, the purpose of which is merely
+   to sets %eflags.Z/%rflags.Z.  So we have to allow it.
+*/
+static VG_REGPARM(3) Seg* do_orW(Seg* seg1, Seg* seg2, UWord result)
+{
+   Seg* out;
+   BINOP(
+      return BOTTOM,
+      out = NONPTR,  out = UNKNOWN, out = seg2,
+      out = UNKNOWN, out = UNKNOWN, out = UNKNOWN,
+      out = seg1,    out = UNKNOWN, out = NONPTR
+   );
+   out = ( looks_like_a_pointer(result) ? out : NONPTR );
+   return out;
+}
+
+// -------------
+//  ~ | n  ?  p
+// -------------
+//    | n  n  n
+// -------------
+static VG_REGPARM(2) Seg* do_notW(Seg* seg1, UWord result)
+{
+#  if SC_SEGS
+   checkSeg(seg1);
+#  endif
+   if (BOTTOM == seg1) return BOTTOM;
+   return NONPTR;
+}
+
+// Pointers are rarely multiplied, but sometimes legitimately, eg. as hash
+// function inputs.  But two pointers args --> error.
+// Pretend it always returns a nonptr.  Maybe improve later.
+static VG_REGPARM(2) Seg* do_mulW(Seg* seg1, Seg* seg2)
+{
+#  if SC_SEGS
+   checkSeg(seg1);
+   checkSeg(seg2);
+#  endif
+   if (is_known_segment(seg1) && is_known_segment(seg2))
+      h_record_arith_error(seg1, seg2, "Mul32/Mul64");
+   return NONPTR;
+}
+
+ 
+/*--------------------------------------------------------------------*/
+/*--- Instrumentation                                              ---*/
+/*--------------------------------------------------------------------*/
+
+/* The h_ instrumenter that follows is complex, since it deals with
+   shadow value computation.
+
+   It also needs to generate instrumentation for the sg_ side of
+   things.  That's relatively straightforward.  However, rather than
+   confuse the code herein any further, we simply delegate the problem
+   to sg_main.c, by using the four functions
+   sg_instrument_{init,fini,IRStmt,final_jump}.  These four completely
+   abstractify the sg_ instrumentation.  See comments in sg_main.c's
+   instrumentation section for further details. */
+
+/* Carries around state during Ptrcheck instrumentation. */
+typedef
+   struct {
+      /* MODIFIED: the superblock being constructed.  IRStmts are
+         added. */
+      IRSB* bb;
+      Bool  trace;
+
+      /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
+         original temps to their current their current shadow temp.
+         Initially all entries are IRTemp_INVALID.  Entries are added
+         lazily since many original temps are not used due to
+         optimisation prior to instrumentation.  Note that only
+         integer temps of the guest word size are shadowed, since it
+         is impossible (or meaningless) to hold a pointer in any other
+         type of temp. */
+      IRTemp* tmpMap;
+      Int     n_originalTmps; /* for range checking */
+
+      /* READONLY: the host word type.  Needed for constructing
+         arguments of type 'HWord' to be passed to helper functions.
+         Ity_I32 or Ity_I64 only. */
+      IRType hWordTy;
+
+      /* READONLY: the guest word type, Ity_I32 or Ity_I64 only. */
+      IRType gWordTy;
+
+      /* READONLY: the guest state size, so we can generate shadow
+         offsets correctly. */
+      Int guest_state_sizeB;
+   }
+   PCEnv;
+
+/* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
+   demand), as they are encountered.  This is for two reasons.
+
+   (1) (less important reason): Many original tmps are unused due to
+   initial IR optimisation, and we do not want to spaces in tables
+   tracking them.
+
+   Shadow IRTemps are therefore allocated on demand.  pce.tmpMap is a
+   table indexed [0 .. n_types-1], which gives the current shadow for
+   each original tmp, or INVALID_IRTEMP if none is so far assigned.
+   It is necessary to support making multiple assignments to a shadow
+   -- specifically, after testing a shadow for definedness, it needs
+   to be made defined.  But IR's SSA property disallows this.
+
+   (2) (more important reason): Therefore, when a shadow needs to get
+   a new value, a new temporary is created, the value is assigned to
+   that, and the tmpMap is updated to reflect the new binding.
+
+   A corollary is that if the tmpMap maps a given tmp to
+   IRTemp_INVALID and we are hoping to read that shadow tmp, it means
+   there's a read-before-write error in the original tmps.  The IR
+   sanity checker should catch all such anomalies, however.
+*/
+
+/* Find the tmp currently shadowing the given original tmp.  If none
+   so far exists, allocate one.  */
+static IRTemp findShadowTmp ( PCEnv* pce, IRTemp orig )
+{
+   tl_assert(orig < pce->n_originalTmps);
+   tl_assert(pce->bb->tyenv->types[orig] == pce->gWordTy);
+   if (pce->tmpMap[orig] == IRTemp_INVALID) {
+      tl_assert(0);
+      pce->tmpMap[orig]
+         = newIRTemp(pce->bb->tyenv, pce->gWordTy);
+   }
+   return pce->tmpMap[orig];
+}
+
+/* Allocate a new shadow for the given original tmp.  This means any
+   previous shadow is abandoned.  This is needed because it is
+   necessary to give a new value to a shadow once it has been tested
+   for undefinedness, but unfortunately IR's SSA property disallows
+   this.  Instead we must abandon the old shadow, allocate a new one
+   and use that instead. */
+__attribute__((noinline))
+static IRTemp newShadowTmp ( PCEnv* pce, IRTemp orig )
+{
+   tl_assert(orig < pce->n_originalTmps);
+   tl_assert(pce->bb->tyenv->types[orig] == pce->gWordTy);
+   pce->tmpMap[orig]
+      = newIRTemp(pce->bb->tyenv, pce->gWordTy);
+   return pce->tmpMap[orig];
+}
+
+
+/*------------------------------------------------------------*/
+/*--- IRAtoms -- a subset of IRExprs                       ---*/
+/*------------------------------------------------------------*/
+
+/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
+   isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
+   input, most of this code deals in atoms.  Usefully, a value atom
+   always has a V-value which is also an atom: constants are shadowed
+   by constants, and temps are shadowed by the corresponding shadow
+   temporary. */
+
+typedef  IRExpr  IRAtom;
+
+//zz /* (used for sanity checks only): is this an atom which looks
+//zz    like it's from original code? */
+//zz static Bool isOriginalAtom ( PCEnv* pce, IRAtom* a1 )
+//zz {
+//zz    if (a1->tag == Iex_Const)
+//zz       return True;
+//zz    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < pce->n_originalTmps)
+//zz       return True;
+//zz    return False;
+//zz }
+//zz 
+//zz /* (used for sanity checks only): is this an atom which looks
+//zz    like it's from shadow code? */
+//zz static Bool isShadowAtom ( PCEnv* pce, IRAtom* a1 )
+//zz {
+//zz    if (a1->tag == Iex_Const)
+//zz       return True;
+//zz    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= pce->n_originalTmps)
+//zz       return True;
+//zz    return False;
+//zz }
+//zz 
+//zz /* (used for sanity checks only): check that both args are atoms and
+//zz    are identically-kinded. */
+//zz static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
+//zz {
+//zz    if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
+//zz       return True;
+//zz    if (a1->tag == Iex_Const && a2->tag == Iex_Const)
+//zz       return True;
+//zz    return False;
+//zz }
+
+
+/*------------------------------------------------------------*/
+/*--- Constructing IR fragments                            ---*/
+/*------------------------------------------------------------*/
+
+/* add stmt to a bb */
+static inline void stmt ( HChar cat, PCEnv* pce, IRStmt* st ) {
+   if (pce->trace) {
+      VG_(printf)("  %c: ", cat);
+      ppIRStmt(st);
+      VG_(printf)("\n");
+   }
+   addStmtToIRSB(pce->bb, st);
+}
+
+/* assign value to tmp */
+static inline
+void assign ( HChar cat, PCEnv* pce, IRTemp tmp, IRExpr* expr ) {
+   stmt(cat, pce, IRStmt_WrTmp(tmp,expr));
+}
+
+/* build various kinds of expressions */
+#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
+#define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
+#define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
+#define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
+#define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
+#define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
+#define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
+#define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
+
+/* Bind the given expression to a new temporary, and return the
+   temporary.  This effectively converts an arbitrary expression into
+   an atom.
+
+   'ty' is the type of 'e' and hence the type that the new temporary
+   needs to be.  But passing it is redundant, since we can deduce the
+   type merely by inspecting 'e'.  So at least that fact to assert
+   that the two types agree. */
+static IRAtom* assignNew ( HChar cat, PCEnv* pce, IRType ty, IRExpr* e ) {
+   IRTemp t;
+   IRType tyE = typeOfIRExpr(pce->bb->tyenv, e);
+   tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
+   t = newIRTemp(pce->bb->tyenv, ty);
+   assign(cat, pce, t, e);
+   return mkexpr(t);
+}
+
+
+
+//-----------------------------------------------------------------------
+// Approach taken for range-checking for NONPTR/UNKNOWN-ness as follows.
+//
+// Range check (NONPTR/seg): 
+// - after modifying a word-sized value in/into a TempReg:
+//    - {ADD, SUB, ADC, SBB, AND, OR, XOR, LEA, LEA2, NEG, NOT}L
+//    - BSWAP
+// 
+// Range check (NONPTR/UNKNOWN):
+// - when introducing a new word-sized value into a TempReg:
+//    - MOVL l, t2
+//
+// - when copying a word-sized value which lacks a corresponding segment
+//   into a TempReg:
+//    - straddled LDL
+//
+// - when a sub-word of a word (or two) is updated:
+//    - SHROTL
+//    - {ADD, SUB, ADC, SBB, AND, OR, XOR, SHROT, NEG, NOT}[WB]
+//    - PUT[WB]
+//    - straddled   STL (2 range checks)
+//    - straddled   STW (2 range checks)
+//    - unstraddled STW
+//    - STB
+//    
+// Just copy:
+// - when copying word-sized values:
+//    - MOVL t1, t2 (--optimise=no only)
+//    - CMOV
+//    - GETL, PUTL
+//    - unstraddled LDL, unstraddled STL
+//
+// - when barely changing
+//    - INC[LWB]/DEC[LWB]
+// 
+// Set to NONPTR:
+// - after copying a sub-word value into a TempReg:
+//    - MOV[WB] l, t2
+//    - GET[WB]
+//    - unstraddled LDW
+//    - straddled   LDW
+//    - LDB
+//    - POP[WB]
+//
+// - after copying an obvious non-ptr into a TempReg:
+//    - GETF
+//    - CC2VAL
+//    - POPL
+//
+// - after copying an obvious non-ptr into a memory word:
+//    - FPU_W
+// 
+// Do nothing:
+// - LOCK, INCEIP
+// - WIDEN[WB]
+// - JMP, JIFZ
+// - CALLM_[SE], PUSHL, CALLM, CLEAR
+// - FPU, FPU_R (and similar MMX/SSE ones)
+//
+
+
+
+
+/* Call h_fn (name h_nm) with the given arg, and return a new IRTemp
+   holding the result.  The arg must be a word-typed atom.  Callee
+   must be a VG_REGPARM(1) function. */
+__attribute__((noinline))
+static IRTemp gen_dirty_W_W ( PCEnv* pce, void* h_fn, HChar* h_nm, 
+                              IRExpr* a1 )
+{
+   IRTemp   res;
+   IRDirty* di;
+   tl_assert(isIRAtom(a1));
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a1) == pce->gWordTy);
+   res = newIRTemp(pce->bb->tyenv, pce->gWordTy);
+   di = unsafeIRDirty_1_N( res, 1/*regparms*/,
+                           h_nm, VG_(fnptr_to_fnentry)( h_fn ),
+                           mkIRExprVec_1( a1 ) );
+   stmt( 'I', pce, IRStmt_Dirty(di) );
+   return res;
+}
+
+/* Two-arg version of gen_dirty_W_W.  Callee must be a VG_REGPARM(2)
+   function.*/
+static IRTemp gen_dirty_W_WW ( PCEnv* pce, void* h_fn, HChar* h_nm, 
+                               IRExpr* a1, IRExpr* a2 )
+{
+   IRTemp   res;
+   IRDirty* di;
+   tl_assert(isIRAtom(a1));
+   tl_assert(isIRAtom(a2));
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a1) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a2) == pce->gWordTy);
+   res = newIRTemp(pce->bb->tyenv, pce->gWordTy);
+   di = unsafeIRDirty_1_N( res, 2/*regparms*/,
+                           h_nm, VG_(fnptr_to_fnentry)( h_fn ),
+                           mkIRExprVec_2( a1, a2 ) );
+   stmt( 'I', pce, IRStmt_Dirty(di) );
+   return res;
+}
+
+/* Three-arg version of gen_dirty_W_W.  Callee must be a VG_REGPARM(3)
+   function.*/
+static IRTemp gen_dirty_W_WWW ( PCEnv* pce, void* h_fn, HChar* h_nm, 
+                                IRExpr* a1, IRExpr* a2, IRExpr* a3 )
+{
+   IRTemp   res;
+   IRDirty* di;
+   tl_assert(isIRAtom(a1));
+   tl_assert(isIRAtom(a2));
+   tl_assert(isIRAtom(a3));
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a1) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a2) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a3) == pce->gWordTy);
+   res = newIRTemp(pce->bb->tyenv, pce->gWordTy);
+   di = unsafeIRDirty_1_N( res, 3/*regparms*/,
+                           h_nm, VG_(fnptr_to_fnentry)( h_fn ),
+                           mkIRExprVec_3( a1, a2, a3 ) );
+   stmt( 'I', pce, IRStmt_Dirty(di) );
+   return res;
+}
+
+/* Four-arg version of gen_dirty_W_W.  Callee must be a VG_REGPARM(3)
+   function.*/
+static IRTemp gen_dirty_W_WWWW ( PCEnv* pce, void* h_fn, HChar* h_nm, 
+                                 IRExpr* a1, IRExpr* a2,
+                                 IRExpr* a3, IRExpr* a4 )
+{
+   IRTemp   res;
+   IRDirty* di;
+   tl_assert(isIRAtom(a1));
+   tl_assert(isIRAtom(a2));
+   tl_assert(isIRAtom(a3));
+   tl_assert(isIRAtom(a4));
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a1) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a2) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a3) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a4) == pce->gWordTy);
+   res = newIRTemp(pce->bb->tyenv, pce->gWordTy);
+   di = unsafeIRDirty_1_N( res, 3/*regparms*/,
+                           h_nm, VG_(fnptr_to_fnentry)( h_fn ),
+                           mkIRExprVec_4( a1, a2, a3, a4 ) );
+   stmt( 'I', pce, IRStmt_Dirty(di) );
+   return res;
+}
+
+/* Version of gen_dirty_W_WW with no return value.  Callee must be a
+   VG_REGPARM(2) function.*/
+static void gen_dirty_v_WW ( PCEnv* pce, void* h_fn, HChar* h_nm, 
+                             IRExpr* a1, IRExpr* a2 )
+{
+   IRDirty* di;
+   tl_assert(isIRAtom(a1));
+   tl_assert(isIRAtom(a2));
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a1) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a2) == pce->gWordTy);
+   di = unsafeIRDirty_0_N( 2/*regparms*/,
+                           h_nm, VG_(fnptr_to_fnentry)( h_fn ),
+                           mkIRExprVec_2( a1, a2 ) );
+   stmt( 'I', pce, IRStmt_Dirty(di) );
+}
+
+/* Version of gen_dirty_W_WWW with no return value.  Callee must be a
+   VG_REGPARM(3) function.*/
+static void gen_dirty_v_WWW ( PCEnv* pce, void* h_fn, HChar* h_nm, 
+                              IRExpr* a1, IRExpr* a2, IRExpr* a3 )
+{
+   IRDirty* di;
+   tl_assert(isIRAtom(a1));
+   tl_assert(isIRAtom(a2));
+   tl_assert(isIRAtom(a3));
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a1) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a2) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a3) == pce->gWordTy);
+   di = unsafeIRDirty_0_N( 3/*regparms*/,
+                           h_nm, VG_(fnptr_to_fnentry)( h_fn ),
+                           mkIRExprVec_3( a1, a2, a3 ) );
+   stmt( 'I', pce, IRStmt_Dirty(di) );
+}
+
+/* Version of gen_dirty_v_WWW for 4 arguments.  Callee must be a
+   VG_REGPARM(3) function.*/
+static void gen_dirty_v_WWWW ( PCEnv* pce, void* h_fn, HChar* h_nm, 
+                               IRExpr* a1, IRExpr* a2,
+                               IRExpr* a3, IRExpr* a4 )
+{
+   IRDirty* di;
+   tl_assert(isIRAtom(a1));
+   tl_assert(isIRAtom(a2));
+   tl_assert(isIRAtom(a3));
+   tl_assert(isIRAtom(a4));
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a1) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a2) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a3) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a4) == pce->gWordTy);
+   di = unsafeIRDirty_0_N( 3/*regparms*/,
+                           h_nm, VG_(fnptr_to_fnentry)( h_fn ),
+                           mkIRExprVec_4( a1, a2, a3, a4 ) );
+   stmt( 'I', pce, IRStmt_Dirty(di) );
+}
+
+/* Version of gen_dirty_v_WWW for 6 arguments.  Callee must be a
+   VG_REGPARM(3) function.*/
+static void gen_dirty_v_6W ( PCEnv* pce, void* h_fn, HChar* h_nm, 
+                             IRExpr* a1, IRExpr* a2, IRExpr* a3,
+                             IRExpr* a4, IRExpr* a5, IRExpr* a6 )
+{
+   IRDirty* di;
+   tl_assert(isIRAtom(a1));
+   tl_assert(isIRAtom(a2));
+   tl_assert(isIRAtom(a3));
+   tl_assert(isIRAtom(a4));
+   tl_assert(isIRAtom(a5));
+   tl_assert(isIRAtom(a6));
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a1) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a2) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a3) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a4) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a5) == pce->gWordTy);
+   tl_assert(typeOfIRExpr(pce->bb->tyenv, a6) == pce->gWordTy);
+   di = unsafeIRDirty_0_N( 3/*regparms*/,
+                           h_nm, VG_(fnptr_to_fnentry)( h_fn ),
+                           mkIRExprVec_6( a1, a2, a3, a4, a5, a6 ) );
+   stmt( 'I', pce, IRStmt_Dirty(di) );
+}
+
+static IRAtom* uwiden_to_host_word ( PCEnv* pce, IRAtom* a )
+{
+   IRType a_ty = typeOfIRExpr(pce->bb->tyenv, a);
+   tl_assert(isIRAtom(a));
+   if (pce->hWordTy == Ity_I32) {
+      switch (a_ty) {
+         case Ity_I8:
+            return assignNew( 'I', pce, Ity_I32, unop(Iop_8Uto32, a) );
+         case Ity_I16:
+            return assignNew( 'I', pce, Ity_I32, unop(Iop_16Uto32, a) );
+         default:
+            ppIRType(a_ty);
+            tl_assert(0);
+      }
+   } else {
+      tl_assert(pce->hWordTy == Ity_I64);
+      switch (a_ty) {
+         case Ity_I8:
+            return assignNew( 'I', pce, Ity_I64, unop(Iop_8Uto64, a) );
+         case Ity_I16:
+            return assignNew( 'I', pce, Ity_I64, unop(Iop_16Uto64, a) );
+         case Ity_I32:
+            return assignNew( 'I', pce, Ity_I64, unop(Iop_32Uto64, a) );
+         default:
+            ppIRType(a_ty);
+            tl_assert(0);
+      }
+   }
+}
+
+/* 'e' is a word-sized atom.  Call nonptr_or_unknown with it, bind the
+   results to a new temporary, and return the temporary.  Note this
+   takes an original expression but returns a shadow value. */
+static IRTemp gen_call_nonptr_or_unknown_w ( PCEnv* pce, IRExpr* e )
+{
+   return gen_dirty_W_W( pce, &nonptr_or_unknown, 
+                              "nonptr_or_unknown", e );
+}
+
+
+/* Generate the shadow value for an IRExpr which is an atom and
+   guaranteed to be word-sized. */
+static IRAtom* schemeEw_Atom ( PCEnv* pce, IRExpr* e )
+{
+   if (pce->gWordTy == Ity_I32) {
+      if (e->tag == Iex_Const && e->Iex.Const.con->tag == Ico_U32) {
+         IRTemp t;
+         tl_assert(sizeof(UWord) == 4);
+         t = gen_call_nonptr_or_unknown_w(pce, e);
+         return mkexpr(t);
+      }
+      if (e->tag == Iex_RdTmp
+          && typeOfIRExpr(pce->bb->tyenv, e) == Ity_I32) {
+         return mkexpr( findShadowTmp(pce, e->Iex.RdTmp.tmp) );
+      }
+      /* there are no other word-sized atom cases */
+   } else {
+      if (e->tag == Iex_Const && e->Iex.Const.con->tag == Ico_U64) {
+         IRTemp t;
+         tl_assert(sizeof(UWord) == 8);
+         //return mkU64( (ULong)(UWord)NONPTR );
+         t = gen_call_nonptr_or_unknown_w(pce, e);
+         return mkexpr(t);
+      }
+      if (e->tag == Iex_RdTmp
+          && typeOfIRExpr(pce->bb->tyenv, e) == Ity_I64) {
+         return mkexpr( findShadowTmp(pce, e->Iex.RdTmp.tmp) );
+      }
+      /* there are no other word-sized atom cases */
+   }
+   ppIRExpr(e);
+   tl_assert(0);
+}
+
+
+static
+void instrument_arithop ( PCEnv* pce,
+                          IRTemp dst, /* already holds result */
+                          IRTemp dstv, /* generate an assignment to this */
+                          IROp op,
+                          /* original args, guaranteed to be atoms */
+                          IRExpr* a1, IRExpr* a2, IRExpr* a3, IRExpr* a4 )
+{
+   HChar*  nm  = NULL;
+   void*   fn  = NULL;
+   IRExpr* a1v = NULL;
+   IRExpr* a2v = NULL;
+   //IRExpr* a3v = NULL;
+   //IRExpr* a4v = NULL;
+   IRTemp  res = IRTemp_INVALID;
+
+   if (pce->gWordTy == Ity_I32) {
+
+      tl_assert(pce->hWordTy == Ity_I32);
+      switch (op) {
+
+         /* For these cases, pass Segs for both arguments, and the
+            result value. */
+         case Iop_Add32: nm = "do_addW"; fn = &do_addW; goto ssr32;
+         case Iop_Sub32: nm = "do_subW"; fn = &do_subW; goto ssr32;
+         case Iop_Or32:  nm = "do_orW";  fn = &do_orW;  goto ssr32;
+         ssr32:
+            a1v = schemeEw_Atom( pce, a1 );
+            a2v = schemeEw_Atom( pce, a2 );
+            res = gen_dirty_W_WWW( pce, fn, nm, a1v, a2v, mkexpr(dst) );
+            assign( 'I', pce, dstv, mkexpr(res) );
+            break;
+
+         /* In this case, pass Segs for both arguments, the result
+            value, and the difference between the (original) values of
+            the arguments. */
+         case Iop_And32:
+            nm = "do_andW"; fn = &do_andW;
+            a1v = schemeEw_Atom( pce, a1 );
+            a2v = schemeEw_Atom( pce, a2 );
+            res = gen_dirty_W_WWWW( 
+                     pce, fn, nm, a1v, a2v, mkexpr(dst),
+                     assignNew( 'I', pce, Ity_I32,
+                                binop(Iop_Sub32,a1,a2) ) );
+            assign( 'I', pce, dstv, mkexpr(res) );
+            break;
+
+         /* Pass one shadow arg and the result to the helper. */
+         case Iop_Not32: nm = "do_notW"; fn = &do_notW; goto vr32;
+         vr32:
+            a1v = schemeEw_Atom( pce, a1 );
+            res = gen_dirty_W_WW( pce, fn, nm, a1v, mkexpr(dst) );
+            assign( 'I', pce, dstv, mkexpr(res) );
+            break;
+
+         /* Pass two shadow args only to the helper. */
+         case Iop_Mul32: nm = "do_mulW"; fn = &do_mulW; goto vv32;
+         vv32:
+            a1v = schemeEw_Atom( pce, a1 );
+            a2v = schemeEw_Atom( pce, a2 );
+            res = gen_dirty_W_WW( pce, fn, nm, a1v, a2v );
+            assign( 'I', pce, dstv, mkexpr(res) );
+            break;
+
+         /* We don't really know what the result could be; test at run
+            time. */
+         case Iop_64HIto32: goto n_or_u_32;
+         case Iop_64to32:   goto n_or_u_32;
+         case Iop_Xor32:    goto n_or_u_32;
+         n_or_u_32:
+            assign( 'I', pce, dstv,
+                    mkexpr(
+                       gen_call_nonptr_or_unknown_w( pce, 
+                                                     mkexpr(dst) ) ) );
+            break;
+
+         /* Cases where it's very obvious that the result cannot be a
+            pointer.  Hence declare directly that it's NONPTR; don't
+            bother with the overhead of calling nonptr_or_unknown. */
+
+         /* cases where it makes no sense for the result to be a ptr */
+         /* FIXME: for Shl/Shr/Sar, really should do a test on the 2nd
+            arg, so that shift by zero preserves the original
+            value. */
+         case Iop_Shl32:    goto n32;
+         case Iop_Sar32:    goto n32;
+         case Iop_Shr32:    goto n32;
+         case Iop_16Uto32:  goto n32;
+         case Iop_16Sto32:  goto n32;
+         case Iop_F64toI32: goto n32;
+         case Iop_16HLto32: goto n32;
+         case Iop_MullS16:  goto n32;
+         case Iop_MullU16:  goto n32;
+         case Iop_PRemC3210F64: goto n32;
+         case Iop_DivU32:   goto n32;
+         case Iop_DivS32:   goto n32;
+         case Iop_V128to32: goto n32;
+
+         /* cases where result range is very limited and clearly cannot
+            be a pointer */
+         case Iop_1Uto32: goto n32;
+         case Iop_1Sto32: goto n32;
+         case Iop_8Uto32: goto n32;
+         case Iop_8Sto32: goto n32;
+         case Iop_Clz32:  goto n32;
+         case Iop_Ctz32:  goto n32;
+         case Iop_CmpF64: goto n32;
+         case Iop_CmpORD32S: goto n32;
+         case Iop_CmpORD32U: goto n32;
+         n32:
+            assign( 'I', pce, dstv, mkU32( (UWord)NONPTR ));
+            break;
+
+         default:
+            VG_(printf)("instrument_arithop(32-bit): unhandled: ");
+            ppIROp(op);
+            tl_assert(0);
+      }
+
+   } else {
+
+      tl_assert(pce->gWordTy == Ity_I64);
+      switch (op) {
+
+         /* For these cases, pass Segs for both arguments, and the
+            result value. */
+         case Iop_Add64: nm = "do_addW"; fn = &do_addW; goto ssr64;
+         case Iop_Sub64: nm = "do_subW"; fn = &do_subW; goto ssr64;
+         case Iop_Or64:  nm = "do_orW";  fn = &do_orW;  goto ssr64;
+         ssr64:
+            a1v = schemeEw_Atom( pce, a1 );
+            a2v = schemeEw_Atom( pce, a2 );
+            res = gen_dirty_W_WWW( pce, fn, nm, a1v, a2v, mkexpr(dst) );
+            assign( 'I', pce, dstv, mkexpr(res) );
+            break;
+
+         /* In this case, pass Segs for both arguments, the result
+            value, and the difference between the (original) values of
+            the arguments. */
+         case Iop_And64:
+            nm = "do_andW"; fn = &do_andW;
+            a1v = schemeEw_Atom( pce, a1 );
+            a2v = schemeEw_Atom( pce, a2 );
+            res = gen_dirty_W_WWWW( 
+                     pce, fn, nm, a1v, a2v, mkexpr(dst),
+                     assignNew( 'I', pce, Ity_I64,
+                                binop(Iop_Sub64,a1,a2) ) );
+            assign( 'I', pce, dstv, mkexpr(res) );
+            break;
+
+         /* Pass one shadow arg and the result to the helper. */
+         case Iop_Not64: nm = "do_notW"; fn = &do_notW; goto vr64;
+         vr64:
+            a1v = schemeEw_Atom( pce, a1 );
+            res = gen_dirty_W_WW( pce, fn, nm, a1v, mkexpr(dst) );
+            assign( 'I', pce, dstv, mkexpr(res) );
+            break;
+
+         /* Pass two shadow args only to the helper. */
+         case Iop_Mul64: nm = "do_mulW"; fn = &do_mulW; goto vv64;
+         vv64:
+            a1v = schemeEw_Atom( pce, a1 );
+            a2v = schemeEw_Atom( pce, a2 );
+            res = gen_dirty_W_WW( pce, fn, nm, a1v, a2v );
+            assign( 'I', pce, dstv, mkexpr(res) );
+            break;
+
+         /* We don't really know what the result could be; test at run
+            time. */
+         case Iop_Xor64:      goto n_or_u_64;
+         case Iop_128HIto64:  goto n_or_u_64;
+         case Iop_128to64:    goto n_or_u_64;
+         case Iop_V128HIto64: goto n_or_u_64;
+         case Iop_V128to64:   goto n_or_u_64;
+         n_or_u_64:
+            assign( 'I', pce, dstv,
+                    mkexpr(
+                       gen_call_nonptr_or_unknown_w( pce, 
+                                                     mkexpr(dst) ) ) );
+            break;
+
+         /* Cases where it's very obvious that the result cannot be a
+            pointer.  Hence declare directly that it's NONPTR; don't
+            bother with the overhead of calling nonptr_or_unknown. */
+
+         /* cases where it makes no sense for the result to be a ptr */
+         /* FIXME: for Shl/Shr/Sar, really should do a test on the 2nd
+            arg, so that shift by zero preserves the original
+            value. */
+         case Iop_Shl64:      goto n64;
+         case Iop_Sar64:      goto n64;
+         case Iop_Shr64:      goto n64;
+         case Iop_32Uto64:    goto n64;
+         case Iop_32Sto64:    goto n64;
+         case Iop_16Uto64:    goto n64;
+         case Iop_16Sto64:    goto n64;
+         case Iop_32HLto64:   goto n64;
+         case Iop_DivModU64to32: goto n64;
+         case Iop_DivModS64to32: goto n64;
+         case Iop_F64toI64:      goto n64;
+         case Iop_MullS32:    goto n64;
+         case Iop_MullU32:    goto n64;
+         case Iop_DivU64:     goto n64;
+         case Iop_DivS64:     goto n64;
+         case Iop_ReinterpF64asI64: goto n64;
+
+         /* cases where result range is very limited and clearly cannot
+            be a pointer */
+         case Iop_1Uto64:        goto n64;
+         case Iop_8Uto64:        goto n64;
+         case Iop_8Sto64:        goto n64;
+         case Iop_Ctz64:         goto n64;
+         case Iop_Clz64:         goto n64;
+         case Iop_CmpORD64S:     goto n64;
+         case Iop_CmpORD64U:     goto n64;
+         /* 64-bit simd */
+         case Iop_Avg8Ux8: case Iop_Avg16Ux4:
+         case Iop_Max16Sx4: case Iop_Max8Ux8: case Iop_Min16Sx4:
+         case Iop_Min8Ux8: case Iop_MulHi16Ux4:
+         case Iop_QNarrow32Sx2: case Iop_QNarrow16Sx4:
+         case Iop_QNarrow16Ux4: case Iop_Add8x8: case Iop_Add32x2:
+         case Iop_QAdd8Sx8: case Iop_QAdd16Sx4: case Iop_QAdd8Ux8:
+         case Iop_QAdd16Ux4: case Iop_Add16x4: case Iop_CmpEQ8x8:
+         case Iop_CmpEQ32x2: case Iop_CmpEQ16x4: case Iop_CmpGT8Sx8:
+         case Iop_CmpGT32Sx2: case Iop_CmpGT16Sx4: case Iop_MulHi16Sx4:
+         case Iop_Mul16x4: case Iop_ShlN32x2: case Iop_ShlN16x4:
+         case Iop_SarN32x2: case Iop_SarN16x4: case Iop_ShrN32x2:
+         case Iop_ShrN16x4: case Iop_Sub8x8: case Iop_Sub32x2:
+         case Iop_QSub8Sx8: case Iop_QSub16Sx4: case Iop_QSub8Ux8:
+         case Iop_QSub16Ux4: case Iop_Sub16x4: case Iop_InterleaveHI8x8:
+         case Iop_InterleaveHI32x2: case Iop_InterleaveHI16x4:
+         case Iop_InterleaveLO8x8: case Iop_InterleaveLO32x2:
+         case Iop_InterleaveLO16x4: case Iop_SarN8x8:
+         case Iop_Perm8x8: case Iop_ShlN8x8: case Iop_Mul32x2:
+         case Iop_CatEvenLanes16x4: case Iop_CatOddLanes16x4:
+         n64:
+            assign( 'I', pce, dstv, mkU64( (UWord)NONPTR ));
+            break;
+
+         default:
+            VG_(printf)("instrument_arithop(64-bit): unhandled: ");
+            ppIROp(op);
+            tl_assert(0);
+      }
+   }
+}
+
+static 
+void gen_call_nonptr_or_unknown_range ( PCEnv* pce,
+                                        IRAtom* addr, IRAtom* len )
+{
+   gen_dirty_v_WW( pce, 
+                   &nonptr_or_unknown_range,
+                   "nonptr_or_unknown_range",
+                   addr, len );
+}
+
+/* iii describes zero or more non-exact integer register updates.  For
+   each one, generate IR to get the containing register, apply
+   nonptr_or_unknown to it, and write it back again. */
+static void gen_nonptr_or_unknown_for_III( PCEnv* pce, IntRegInfo* iii )
+{
+   Int i;
+   tl_assert(iii && iii->n_offsets >= 0);
+   for (i = 0; i < iii->n_offsets; i++) {
+      IRAtom* a1 = assignNew( 'I', pce, pce->gWordTy, 
+                              IRExpr_Get( iii->offsets[i], pce->gWordTy ));
+      IRTemp a2 = gen_call_nonptr_or_unknown_w( pce, a1 );
+      stmt( 'I', pce, IRStmt_Put( iii->offsets[i] 
+                                     + pce->guest_state_sizeB,
+                                  mkexpr(a2) ));
+   }
+}
+
+/* Generate into 'ane', instrumentation for 'st'.  Also copy 'st'
+   itself into 'ane' (the caller does not do so).  This is somewhat
+   complex and relies heavily on the assumption that the incoming IR
+   is in flat form.
+
+   Generally speaking, the instrumentation is placed after the
+   original statement, so that results computed by the original can be
+   used in the instrumentation.  However, that isn't safe for memory
+   references, since we need the instrumentation (hence bounds check
+   and potential error message) to happen before the reference itself,
+   as the latter could cause a fault. */
+static void schemeS ( PCEnv* pce, IRStmt* st )
+{
+   tl_assert(st);
+   tl_assert(isFlatIRStmt(st));
+
+   switch (st->tag) {
+
+      case Ist_Dirty: {
+         Int i;
+         IRDirty* di;
+         stmt( 'C', pce, st );
+         /* nasty.  assumes that (1) all helpers are unconditional,
+            and (2) all outputs are non-ptr */
+         di = st->Ist.Dirty.details;
+         /* deal with the return tmp, if any */
+         if (di->tmp != IRTemp_INVALID
+             && typeOfIRTemp(pce->bb->tyenv, di->tmp) == pce->gWordTy) {
+            /* di->tmp is shadowed.  Set it to NONPTR. */
+            IRTemp dstv = newShadowTmp( pce, di->tmp );
+            if (pce->gWordTy == Ity_I32) {
+              assign( 'I', pce, dstv, mkU32( (UWord)NONPTR ));
+            } else {
+              assign( 'I', pce, dstv, mkU64( (UWord)NONPTR ));
+            }
+         }
+         /* apply the nonptr_or_unknown technique to any parts of
+            the guest state that happen to get written */
+         for (i = 0; i < di->nFxState; i++) {
+            IntRegInfo iii;
+            tl_assert(di->fxState[i].fx != Ifx_None);
+            if (di->fxState[i].fx == Ifx_Read)
+               continue; /* this bit is only read -- not interesting */
+            get_IntRegInfo( &iii, di->fxState[i].offset,
+                                  di->fxState[i].size );
+            tl_assert(iii.n_offsets >= -1 
+                      && iii.n_offsets <= N_INTREGINFO_OFFSETS);
+            /* Deal with 3 possible cases, same as with Ist_Put
+               elsewhere in this function. */
+            if (iii.n_offsets == -1) {
+               /* case (1): exact write of an integer register. */
+               IRAtom* a1
+                  = assignNew( 'I', pce, pce->gWordTy, 
+                               IRExpr_Get( iii.offsets[i], pce->gWordTy ));
+               IRTemp a2 = gen_call_nonptr_or_unknown_w( pce, a1 );
+               stmt( 'I', pce, IRStmt_Put( iii.offsets[i] 
+                                              + pce->guest_state_sizeB,
+                                           mkexpr(a2) ));
+            } else {
+               /* when == 0: case (3): no instrumentation needed */
+               /* when > 0: case (2) .. complex case.  Fish out the
+                  stored value for the whole register, heave it
+                  through nonptr_or_unknown, and use that as the new
+                  shadow value. */
+               tl_assert(iii.n_offsets >= 0 
+                         && iii.n_offsets <= N_INTREGINFO_OFFSETS);
+               gen_nonptr_or_unknown_for_III( pce, &iii );
+            }
+         } /* for (i = 0; i < di->nFxState; i++) */
+         /* finally, deal with memory outputs */
+         if (di->mFx != Ifx_None) {
+            tl_assert(di->mAddr && isIRAtom(di->mAddr));
+            tl_assert(di->mSize > 0);
+            gen_call_nonptr_or_unknown_range( pce, di->mAddr,
+                                              mkIRExpr_HWord(di->mSize));
+         }
+         break;
+      }
+
+      case Ist_NoOp:
+         break;
+
+      /* nothing interesting in these; just copy them through */
+      case Ist_AbiHint:
+      case Ist_MBE:
+      case Ist_Exit:
+      case Ist_IMark:
+         stmt( 'C', pce, st );
+         break;
+
+      case Ist_PutI: {
+         IRRegArray* descr = st->Ist.PutI.descr;
+         stmt( 'C', pce, st );
+         tl_assert(descr && descr->elemTy);
+         if (is_integer_guest_reg_array(descr)) {
+            /* if this fails, is_integer_guest_reg_array is returning
+               bogus results */
+            tl_assert(descr->elemTy == pce->gWordTy);
+            stmt(
+               'I', pce,
+               IRStmt_PutI(
+                  mkIRRegArray(descr->base + pce->guest_state_sizeB,
+                               descr->elemTy, descr->nElems),
+                  st->Ist.PutI.ix,
+                  st->Ist.PutI.bias,
+                  schemeEw_Atom( pce, st->Ist.PutI.data)
+               )
+            );
+         }
+         break;
+      }
+
+      case Ist_Put: {
+         /* PUT(offset) = atom */
+         /* 3 cases:
+            1. It's a complete write of an integer register.  Get hold of
+               'atom's shadow value and write it in the shadow state.
+            2. It's a partial write of an integer register.  Let the write
+               happen, then fish out the complete register value and see if,
+               via range checking, consultation of tea leaves, etc, its
+               shadow value can be upgraded to anything useful.
+            3. It is none of the above.  Generate no instrumentation. */
+         IntRegInfo iii;
+         IRType     ty;
+         stmt( 'C', pce, st );
+         ty = typeOfIRExpr(pce->bb->tyenv, st->Ist.Put.data);
+         get_IntRegInfo( &iii, st->Ist.Put.offset,
+                         sizeofIRType(ty) );
+         if (iii.n_offsets == -1) {
+            /* case (1): exact write of an integer register. */
+            tl_assert(ty == pce->gWordTy);
+            stmt( 'I', pce,
+                       IRStmt_Put( st->Ist.Put.offset
+                                      + pce->guest_state_sizeB,
+                                   schemeEw_Atom( pce, st->Ist.Put.data)) );
+         } else {
+            /* when == 0: case (3): no instrumentation needed */
+            /* when > 0: case (2) .. complex case.  Fish out the
+               stored value for the whole register, heave it through
+               nonptr_or_unknown, and use that as the new shadow
+               value. */
+            tl_assert(iii.n_offsets >= 0 
+                      && iii.n_offsets <= N_INTREGINFO_OFFSETS);
+            gen_nonptr_or_unknown_for_III( pce, &iii );
+         }
+         break;
+      } /* case Ist_Put */
+
+      case Ist_Store: {
+         /* We have: STle(addr) = data
+            if data is int-word sized, do
+            check_store4(addr, addr#, data, data#)
+            for all other stores
+            check_store{1,2}(addr, addr#, data)
+
+            The helper actually *does* the store, so that it can do
+            the post-hoc ugly hack of inspecting and "improving" the
+            shadow data after the store, in the case where it isn't an
+            aligned word store.
+         */
+         IRExpr* data  = st->Ist.Store.data;
+         IRExpr* addr  = st->Ist.Store.addr;
+         IRType  d_ty  = typeOfIRExpr(pce->bb->tyenv, data);
+         IRExpr* addrv = schemeEw_Atom( pce, addr );
+         if (pce->gWordTy == Ity_I32) {
+            /* ------ 32 bit host/guest (cough, cough) ------ */
+            switch (d_ty) {
+               /* Integer word case */
+               case Ity_I32: {
+                  IRExpr* datav = schemeEw_Atom( pce, data );
+                  gen_dirty_v_WWWW( pce,
+                                    &check_store4_P, "check_store4_P",
+                                    addr, addrv, data, datav );
+                  break;
+               }
+               /* Integer subword cases */
+               case Ity_I16:
+                  gen_dirty_v_WWW( pce,
+                                   &check_store2, "check_store2",
+                                   addr, addrv,
+                                   uwiden_to_host_word( pce, data ));
+                  break;
+               case Ity_I8:
+                  gen_dirty_v_WWW( pce,
+                                   &check_store1, "check_store1",
+                                   addr, addrv,
+                                   uwiden_to_host_word( pce, data ));
+                  break;
+               /* 64-bit float.  Pass store data in 2 32-bit pieces. */
+               case Ity_F64: {
+                  IRAtom* d64 = assignNew( 'I', pce, Ity_I64,
+                                           unop(Iop_ReinterpF64asI64, data) );
+                  IRAtom* dLo32 = assignNew( 'I', pce, Ity_I32,
+                                             unop(Iop_64to32, d64) );
+                  IRAtom* dHi32 = assignNew( 'I', pce, Ity_I32,
+                                             unop(Iop_64HIto32, d64) );
+                  gen_dirty_v_WWWW( pce,
+                                    &check_store8_ms4B_ls4B, 
+                                    "check_store8_ms4B_ls4B",
+                                    addr, addrv, dHi32, dLo32 );
+                  break;
+               }
+               /* 32-bit float.  We can just use _store4, but need
+                  to futz with the argument type. */
+               case Ity_F32: {
+                  IRAtom* i32 = assignNew( 'I', pce, Ity_I32, 
+                                           unop(Iop_ReinterpF32asI32,
+                                                data ) );
+                  gen_dirty_v_WWW( pce,
+                                   &check_store4,
+                                   "check_store4",
+                                   addr, addrv, i32 );
+                  break;
+               }
+               /* 64-bit int.  Pass store data in 2 32-bit pieces. */
+               case Ity_I64: {
+                  IRAtom* dLo32 = assignNew( 'I', pce, Ity_I32,
+                                             unop(Iop_64to32, data) );
+                  IRAtom* dHi32 = assignNew( 'I', pce, Ity_I32,
+                                             unop(Iop_64HIto32, data) );
+                  gen_dirty_v_WWWW( pce,
+                                    &check_store8_ms4B_ls4B, 
+                                    "check_store8_ms4B_ls4B",
+                                    addr, addrv, dHi32, dLo32 );
+                  break;
+               }
+
+               /* 128-bit vector.  Pass store data in 4 32-bit pieces.
+                  This is all very ugly and inefficient, but it is
+                  hard to better without considerably complicating the
+                  store-handling schemes. */
+               case Ity_V128: {
+                  IRAtom* dHi64 = assignNew( 'I', pce, Ity_I64,
+                                             unop(Iop_V128HIto64, data) );
+                  IRAtom* dLo64 = assignNew( 'I', pce, Ity_I64,
+                                             unop(Iop_V128to64, data) );
+                  IRAtom* w3    = assignNew( 'I', pce, Ity_I32,
+                                             unop(Iop_64HIto32, dHi64) );
+                  IRAtom* w2    = assignNew( 'I', pce, Ity_I32,
+                                             unop(Iop_64to32, dHi64) );
+                  IRAtom* w1    = assignNew( 'I', pce, Ity_I32,
+                                             unop(Iop_64HIto32, dLo64) );
+                  IRAtom* w0    = assignNew( 'I', pce, Ity_I32,
+                                             unop(Iop_64to32, dLo64) );
+                  gen_dirty_v_6W( pce,
+                                  &check_store16_ms4B_4B_4B_ls4B, 
+                                  "check_store16_ms4B_4B_4B_ls4B",
+                                  addr, addrv, w3, w2, w1, w0 );
+                  break;
+               }
+
+
+               default:
+                  ppIRType(d_ty); tl_assert(0);
+            }
+         } else {
+            /* ------ 64 bit host/guest (cough, cough) ------ */
+            switch (d_ty) {
+               /* Integer word case */
+               case Ity_I64: {
+                  IRExpr* datav = schemeEw_Atom( pce, data );
+                  gen_dirty_v_WWWW( pce,
+                                    &check_store8_P, "check_store8_P",
+                                    addr, addrv, data, datav );
+                  break;
+               }
+               /* Integer subword cases */
+               case Ity_I32:
+                  gen_dirty_v_WWW( pce,
+                                   &check_store4, "check_store4",
+                                   addr, addrv,
+                                   uwiden_to_host_word( pce, data ));
+                  break;
+               case Ity_I16:
+                  gen_dirty_v_WWW( pce,
+                                   &check_store2, "check_store2",
+                                   addr, addrv,
+                                   uwiden_to_host_word( pce, data ));
+                  break;
+               case Ity_I8:
+                  gen_dirty_v_WWW( pce,
+                                   &check_store1, "check_store1",
+                                   addr, addrv,
+                                   uwiden_to_host_word( pce, data ));
+                  break;
+               /* 128-bit vector.  Pass store data in 2 64-bit pieces. */
+               case Ity_V128: {
+                  IRAtom* dHi64 = assignNew( 'I', pce, Ity_I64,
+                                             unop(Iop_V128HIto64, data) );
+                  IRAtom* dLo64 = assignNew( 'I', pce, Ity_I64,
+                                             unop(Iop_V128to64, data) );
+                  gen_dirty_v_WWWW( pce,
+                                    &check_store16_ms8B_ls8B, 
+                                    "check_store16_ms8B_ls8B",
+                                    addr, addrv, dHi64, dLo64 );
+                  break;
+               }
+               /* 64-bit float. */
+               case Ity_F64: {
+                  IRAtom* dI = assignNew( 'I', pce, Ity_I64, 
+                                           unop(Iop_ReinterpF64asI64,
+                                                data ) );
+                  gen_dirty_v_WWW( pce,
+                                   &check_store8_all8B,
+                                   "check_store8_all8B",
+                                   addr, addrv, dI );
+                  break;
+               }
+               /* 32-bit float.  We can just use _store4, but need
+                  to futz with the argument type. */
+               case Ity_F32: {
+                  IRAtom* i32 = assignNew( 'I', pce, Ity_I32, 
+                                           unop(Iop_ReinterpF32asI32,
+                                                data ) );
+                  IRAtom* i64 = assignNew( 'I', pce, Ity_I64, 
+                                           unop(Iop_32Uto64,
+                                                i32 ) );
+                  gen_dirty_v_WWW( pce,
+                                   &check_store4,
+                                   "check_store4",
+                                   addr, addrv, i64 );
+                  break;
+               }
+               default:
+                  ppIRType(d_ty); tl_assert(0);
+            }
+         }
+         /* And don't copy the original, since the helper does the
+            store.  Ick. */
+         break;
+      } /* case Ist_Store */
+
+      case Ist_WrTmp: {
+         /* This is the only place we have to deal with the full
+            IRExpr range.  In all other places where an IRExpr could
+            appear, we in fact only get an atom (Iex_RdTmp or
+            Iex_Const). */
+         IRExpr* e      = st->Ist.WrTmp.data;
+         IRType  e_ty   = typeOfIRExpr( pce->bb->tyenv, e );
+         Bool    isWord = e_ty == pce->gWordTy;
+         IRTemp  dst    = st->Ist.WrTmp.tmp;
+         IRTemp  dstv   = isWord ? newShadowTmp( pce, dst )
+                                 : IRTemp_INVALID;
+
+         switch (e->tag) {
+
+            case Iex_Const: {
+               stmt( 'C', pce, st );
+               if (isWord)
+                  assign( 'I', pce, dstv, schemeEw_Atom( pce, e ) );
+               break;
+            }
+
+            case Iex_CCall: {
+               stmt( 'C', pce, st );
+               if (isWord)
+                  assign( 'I', pce, dstv,
+                          mkexpr( gen_call_nonptr_or_unknown_w( 
+                                     pce, mkexpr(dst)))); 
+               break;
+            }
+
+            case Iex_Mux0X: {
+               /* Just steer the shadow values in the same way as the
+                  originals. */
+               stmt( 'C', pce, st );
+               if (isWord)
+                  assign( 'I', pce, dstv, 
+                          IRExpr_Mux0X(
+                             e->Iex.Mux0X.cond,
+                             schemeEw_Atom( pce, e->Iex.Mux0X.expr0 ),
+                             schemeEw_Atom( pce, e->Iex.Mux0X.exprX ) ));
+               break;
+            }
+
+            case Iex_RdTmp: {
+               stmt( 'C', pce, st );
+               if (isWord)
+                  assign( 'I', pce, dstv, schemeEw_Atom( pce, e ));
+               break;
+            }
+
+            case Iex_Load: {
+               IRExpr* addr  = e->Iex.Load.addr;
+               HChar*  h_nm  = NULL;
+               void*   h_fn  = NULL;
+               IRExpr* addrv = NULL;
+               if (pce->gWordTy == Ity_I32) {
+                  /* 32 bit host/guest (cough, cough) */
+                  switch (e_ty) {
+                     /* Ity_I32: helper returns shadow value. */
+                     case Ity_I32:  h_fn = &check_load4_P;
+                                    h_nm = "check_load4_P"; break;
+                     /* all others: helper does not return a shadow
+                        value. */
+                     case Ity_V128: h_fn = &check_load16;
+                                    h_nm = "check_load16"; break;
+                     case Ity_I64:
+                     case Ity_F64:  h_fn = &check_load8;
+                                    h_nm = "check_load8"; break;
+                     case Ity_F32:  h_fn = &check_load4;
+                                    h_nm = "check_load4"; break;
+                     case Ity_I16:  h_fn = &check_load2;
+                                    h_nm = "check_load2"; break;
+                     case Ity_I8:   h_fn = &check_load1;
+                                    h_nm = "check_load1"; break;
+                     default: ppIRType(e_ty); tl_assert(0);
+                  }
+                  addrv = schemeEw_Atom( pce, addr );
+                  if (e_ty == Ity_I32) {
+                     assign( 'I', pce, dstv, 
+                              mkexpr( gen_dirty_W_WW( pce, h_fn, h_nm,
+                                                           addr, addrv )) );
+                  } else {
+                     gen_dirty_v_WW( pce, h_fn, h_nm, addr, addrv );
+                  }
+               } else {
+                  /* 64 bit host/guest (cough, cough) */
+                  switch (e_ty) {
+                     /* Ity_I64: helper returns shadow value. */
+                     case Ity_I64:  h_fn = &check_load8_P;
+                                    h_nm = "check_load8_P"; break;
+                     /* all others: helper does not return a shadow
+                        value. */
+                     case Ity_V128: h_fn = &check_load16;
+                                    h_nm = "check_load16"; break;
+                     case Ity_F64:  h_fn = &check_load8;
+                                    h_nm = "check_load8"; break;
+                     case Ity_F32:
+                     case Ity_I32:  h_fn = &check_load4;
+                                    h_nm = "check_load4"; break;
+                     case Ity_I16:  h_fn = &check_load2;
+                                    h_nm = "check_load2"; break;
+                     case Ity_I8:   h_fn = &check_load1;
+                                    h_nm = "check_load1"; break;
+                     default: ppIRType(e_ty); tl_assert(0);
+                  }
+                  addrv = schemeEw_Atom( pce, addr );
+                  if (e_ty == Ity_I64) {
+                     assign( 'I', pce, dstv, 
+                              mkexpr( gen_dirty_W_WW( pce, h_fn, h_nm,
+                                                           addr, addrv )) );
+                  } else {
+                     gen_dirty_v_WW( pce, h_fn, h_nm, addr, addrv );
+                  }
+               }
+               /* copy the original -- must happen after the helper call */
+               stmt( 'C', pce, st );
+               break;
+            }
+
+            case Iex_GetI: {
+               IRRegArray* descr = e->Iex.GetI.descr;
+               stmt( 'C', pce, st );
+               tl_assert(descr && descr->elemTy);
+               if (is_integer_guest_reg_array(descr)) {
+                  /* if this fails, is_integer_guest_reg_array is
+                     returning bogus results */
+                  tl_assert(isWord);
+                  assign(
+                     'I', pce, dstv,
+                     IRExpr_GetI(
+                        mkIRRegArray(descr->base + pce->guest_state_sizeB,
+                                     descr->elemTy, descr->nElems),
+                        e->Iex.GetI.ix,
+                        e->Iex.GetI.bias
+                     )
+                  );
+               }
+               break;
+            }
+
+            case Iex_Get: {
+               stmt( 'C', pce, st );
+               if (isWord) {
+                  /* guest-word-typed tmp assignment, so it will have a
+                     shadow tmp, and we must make an assignment to
+                     that */
+                  if (is_integer_guest_reg(e->Iex.Get.offset,
+                                           sizeofIRType(e->Iex.Get.ty))) {
+                     assign( 'I', pce, dstv,
+                             IRExpr_Get( e->Iex.Get.offset 
+                                            + pce->guest_state_sizeB,
+                                         e->Iex.Get.ty) );
+                  } else {
+                     if (pce->hWordTy == Ity_I32) {
+                        assign( 'I', pce, dstv, mkU32( (UWord)NONPTR ));
+                     } else {
+                       assign( 'I', pce, dstv, mkU64( (UWord)NONPTR ));
+                     }
+                  }
+               } else {
+                  /* tmp isn't guest-word-typed, so isn't shadowed, so
+                     generate no instrumentation */
+               }
+               break;
+            }
+
+            case Iex_Unop: {
+               stmt( 'C', pce, st );
+               tl_assert(isIRAtom(e->Iex.Unop.arg));
+               if (isWord)
+                  instrument_arithop( pce, dst, dstv, e->Iex.Unop.op,
+                                      e->Iex.Unop.arg,
+                                      NULL, NULL, NULL );
+               break;
+            }
+
+            case Iex_Binop: {
+               stmt( 'C', pce, st );
+               tl_assert(isIRAtom(e->Iex.Binop.arg1));
+               tl_assert(isIRAtom(e->Iex.Binop.arg2));
+               if (isWord)
+                  instrument_arithop( pce, dst, dstv, e->Iex.Binop.op,
+                                      e->Iex.Binop.arg1, e->Iex.Binop.arg2,
+                                      NULL, NULL );
+               break;
+            }
+
+            case Iex_Triop: {
+               stmt( 'C', pce, st );
+               tl_assert(isIRAtom(e->Iex.Triop.arg1));
+               tl_assert(isIRAtom(e->Iex.Triop.arg2));
+               tl_assert(isIRAtom(e->Iex.Triop.arg3));
+               if (isWord)
+                  instrument_arithop( pce, dst, dstv, e->Iex.Triop.op,
+                                      e->Iex.Triop.arg1, e->Iex.Triop.arg2,
+                                      e->Iex.Triop.arg3, NULL );
+               break;
+            }
+
+            case Iex_Qop: {
+               stmt( 'C', pce, st );
+               tl_assert(isIRAtom(e->Iex.Qop.arg1));
+               tl_assert(isIRAtom(e->Iex.Qop.arg2));
+               tl_assert(isIRAtom(e->Iex.Qop.arg3));
+               tl_assert(isIRAtom(e->Iex.Qop.arg4));
+               if (isWord)
+                  instrument_arithop( pce, dst, dstv, e->Iex.Qop.op,
+                                      e->Iex.Qop.arg1, e->Iex.Qop.arg2,
+                                      e->Iex.Qop.arg3, e->Iex.Qop.arg4 );
+               break;
+            }
+
+            default:
+               goto unhandled;
+         } /* switch (e->tag) */
+
+         break;
+
+      } /* case Ist_WrTmp */
+
+      default:
+      unhandled:
+         ppIRStmt(st);
+         tl_assert(0);
+   }
+}
+
+
+IRSB* h_instrument ( VgCallbackClosure* closure,
+                     IRSB* sbIn,
+                     VexGuestLayout* layout,
+                     VexGuestExtents* vge,
+                     IRType gWordTy, IRType hWordTy )
+{
+   Bool  verboze = 0||False;
+   Int   i /*, j*/;
+   PCEnv pce;
+   struct _SGEnv* sgenv;
+
+   if (gWordTy != hWordTy) {
+      /* We don't currently support this case. */
+      VG_(tool_panic)("host/guest word size mismatch");
+   }
+
+   /* Check we're not completely nuts */
+   tl_assert(sizeof(UWord)  == sizeof(void*));
+   tl_assert(sizeof(Word)   == sizeof(void*));
+   tl_assert(sizeof(Addr)   == sizeof(void*));
+   tl_assert(sizeof(ULong)  == 8);
+   tl_assert(sizeof(Long)   == 8);
+   tl_assert(sizeof(Addr64) == 8);
+   tl_assert(sizeof(UInt)   == 4);
+   tl_assert(sizeof(Int)    == 4);
+
+   /* Set up the running environment.  Only .bb is modified as we go
+      along. */
+   pce.bb                = deepCopyIRSBExceptStmts(sbIn);
+   pce.trace             = verboze;
+   pce.n_originalTmps    = sbIn->tyenv->types_used;
+   pce.hWordTy           = hWordTy;
+   pce.gWordTy           = gWordTy;
+   pce.guest_state_sizeB = layout->total_sizeB;
+   pce.tmpMap            = LibVEX_Alloc(pce.n_originalTmps * sizeof(IRTemp));
+   for (i = 0; i < pce.n_originalTmps; i++)
+      pce.tmpMap[i] = IRTemp_INVALID;
+
+   /* Also set up for the sg_ instrumenter.  See comments
+      at the top of this instrumentation section for details. */
+   sgenv = sg_instrument_init();
+
+   /* Stay sane.  These two should agree! */
+   tl_assert(layout->total_sizeB == MC_SIZEOF_GUEST_STATE);
+
+   /* Copy verbatim any IR preamble preceding the first IMark */
+
+   i = 0;
+   while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
+      IRStmt* st = sbIn->stmts[i];
+      tl_assert(st);
+      tl_assert(isFlatIRStmt(st));
+      stmt( 'C', &pce, sbIn->stmts[i] );
+      i++;
+   }
+
+   /* Nasty problem.  IR optimisation of the pre-instrumented IR may
+      cause the IR following the preamble to contain references to IR
+      temporaries defined in the preamble.  Because the preamble isn't
+      instrumented, these temporaries don't have any shadows.
+      Nevertheless uses of them following the preamble will cause
+      memcheck to generate references to their shadows.  End effect is
+      to cause IR sanity check failures, due to references to
+      non-existent shadows.  This is only evident for the complex
+      preambles used for function wrapping on TOC-afflicted platforms
+      (ppc64-linux, ppc32-aix5, ppc64-aix5).
+
+      The following loop therefore scans the preamble looking for
+      assignments to temporaries.  For each one found it creates an
+      assignment to the corresponding shadow temp, marking it as
+      'defined'.  This is the same resulting IR as if the main
+      instrumentation loop before had been applied to the statement
+      'tmp = CONSTANT'.
+   */
+#if 0
+   // FIXME: this isn't exactly right; only needs to generate shadows
+   // for guest-word-typed temps
+   for (j = 0; j < i; j++) {
+      if (sbIn->stmts[j]->tag == Ist_WrTmp) {
+         /* findShadowTmpV checks its arg is an original tmp;
+            no need to assert that here. */
+         IRTemp tmp_o = sbIn->stmts[j]->Ist.WrTmp.tmp;
+         IRTemp tmp_s = findShadowTmp(&pce, tmp_o);
+         IRType ty_s  = typeOfIRTemp(sbIn->tyenv, tmp_s);
+         assign( 'V', &pce, tmp_s, definedOfType( ty_s ) );
+         if (0) {
+            VG_(printf)("create shadow tmp for preamble tmp [%d] ty ", j);
+            ppIRType( ty_s );
+            VG_(printf)("\n");
+         }
+      }
+   }
+#endif
+
+   /* Iterate over the remaining stmts to generate instrumentation. */
+
+   tl_assert(sbIn->stmts_used > 0);
+   tl_assert(i >= 0);
+   tl_assert(i < sbIn->stmts_used);
+   tl_assert(sbIn->stmts[i]->tag == Ist_IMark);
+
+   for (/*use current i*/; i < sbIn->stmts_used; i++) {
+      /* generate sg_ instrumentation for this stmt */
+      sg_instrument_IRStmt( sgenv, pce.bb, sbIn->stmts[i],
+                            layout, gWordTy, hWordTy );
+      /* generate h_ instrumentation for this stmt */
+      schemeS( &pce, sbIn->stmts[i] );
+   }
+
+   /* generate sg_ instrumentation for the final jump */
+   sg_instrument_final_jump( sgenv, pce.bb, sbIn->next, sbIn->jumpkind,
+                             layout, gWordTy, hWordTy );
+
+   /* and finalise .. */
+   sg_instrument_fini( sgenv );
+
+   return pce.bb;
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- Initialisation                                               ---*/
+/*--------------------------------------------------------------------*/
+
+void h_pre_clo_init ( void )
+{
+   // Other initialisation
+   init_shadow_memory();
+   init_lossage();
+}
+
+void h_post_clo_init ( void )
+{
+}
+
+/*--------------------------------------------------------------------*/
+/*--- Finalisation                                                 ---*/
+/*--------------------------------------------------------------------*/
+
+void h_fini ( Int exitcode )
+{
+   if (VG_(clo_verbosity) >= 2) {
+      VG_(message)(Vg_DebugMsg,
+                   "  h_:  %'10llu client allocs, %'10llu client frees", 
+                   stats__client_mallocs, stats__client_frees);
+      VG_(message)(Vg_DebugMsg,
+                   "  h_:  %'10llu Segs allocd,   %'10llu Segs recycled", 
+                   stats__segs_allocd, stats__segs_recycled);
+      VG_(message)(Vg_DebugMsg,
+                   "  h_:  %'10llu slow searches, %'10llu total cmps",
+                   stats__slow_searches, stats__slow_totcmps);
+
+   }
+
+   if (h_clo_lossage_check) {
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(Vg_UserMsg, "%12lld total memory references",
+                               stats__tot_mem_refs);
+      VG_(message)(Vg_UserMsg, "%12lld   of which are in a known segment",
+                               stats__refs_in_a_seg);
+      VG_(message)(Vg_UserMsg, "%12lld   of which are 'lost' w.r.t the seg",
+                               stats__refs_lost_seg);
+      VG_(message)(Vg_UserMsg, "");
+      show_lossage();
+      VG_(message)(Vg_UserMsg, "");
+   } else {
+      tl_assert( 0 == VG_(OSetGen_Size)(lossage) );
+   }
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                 h_main.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/exp-ptrcheck/h_main.h b/exp-ptrcheck/h_main.h
new file mode 100644
index 0000000..84efddc
--- /dev/null
+++ b/exp-ptrcheck/h_main.h
@@ -0,0 +1,98 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Ptrcheck: a pointer-use checker.                             ---*/
+/*--- Exports for heap access checking.                            ---*/
+/*---                                                     h_main.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Ptrcheck, a Valgrind tool for checking pointer
+   use in programs.
+
+   Copyright (C) 2003-2008 Nicholas Nethercote
+      njn@valgrind.org
+   Copyright (C) 2008-2008 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __H_MAIN_H
+
+#define __H_MAIN_H
+
+// Choose values that couldn't possibly be pointers
+#define NONPTR          ((Seg*)0xA1)
+#define UNKNOWN         ((Seg*)0xB2)
+#define BOTTOM          ((Seg*)0xC3)
+
+static inline Bool is_known_segment(Seg* teg) {
+   return (UNKNOWN != teg && BOTTOM != teg && NONPTR != teg);
+   // better?  teg <= BOTTOM
+}
+
+void        Seg__cmp(Seg* seg, Addr a, Int* cmp, UWord* n);
+Bool        Seg__is_freed(Seg* seg);
+ExeContext* Seg__where(Seg* seg);
+SizeT       Seg__size(Seg* seg);
+Addr        Seg__addr(Seg* seg);
+
+void h_pre_clo_init ( void );
+void h_post_clo_init ( void );
+void h_fini ( Int exitcode );
+
+void* h_replace_malloc ( ThreadId tid, SizeT n );
+void* h_replace___builtin_new ( ThreadId tid, SizeT n );
+void* h_replace___builtin_vec_new ( ThreadId tid, SizeT n );
+void* h_replace_memalign ( ThreadId tid, SizeT align, SizeT n );
+void* h_replace_calloc ( ThreadId tid, SizeT nmemb, SizeT size1 );
+void  h_replace_free ( ThreadId tid, void* p );
+void  h_replace___builtin_delete ( ThreadId tid, void* p );
+void  h_replace___builtin_vec_delete ( ThreadId tid, void* p );
+void* h_replace_realloc ( ThreadId tid, void* p_old, SizeT new_size );
+
+void h_new_mem_startup( Addr a, SizeT len,
+                        Bool rr, Bool ww, Bool xx, ULong di_handle );
+void h_new_mem_mmap( Addr a, SizeT len,
+                     Bool rr, Bool ww, Bool xx, ULong di_handle );
+void h_die_mem_munmap( Addr a, SizeT len );
+void h_pre_mem_access ( CorePart part, ThreadId tid, Char* s,
+                        Addr base, SizeT size );
+void h_pre_mem_read_asciiz ( CorePart part, ThreadId tid, 
+                             Char* s, Addr lo );
+
+void h_post_reg_write_demux ( CorePart part, ThreadId tid,
+                              OffT guest_state_offset, SizeT size);
+void h_post_reg_write_clientcall(ThreadId tid, OffT guest_state_offset,
+                                 SizeT size, Addr f );
+
+void h_pre_syscall ( ThreadId tid, UInt syscallno );
+void h_post_syscall ( ThreadId tid, UInt syscallno, SysRes res );
+
+/* Note that this also does the sg_ instrumentation. */
+IRSB* h_instrument ( VgCallbackClosure* closure,
+                     IRSB* sbIn,
+                     VexGuestLayout* layout,
+                     VexGuestExtents* vge,
+                     IRType gWordTy, IRType hWordTy );
+
+#endif
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                 h_main.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/exp-ptrcheck/pc_common.c b/exp-ptrcheck/pc_common.c
new file mode 100644
index 0000000..a08f0f1
--- /dev/null
+++ b/exp-ptrcheck/pc_common.c
@@ -0,0 +1,543 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Ptrcheck: a pointer-use checker.                             ---*/
+/*--- Provides stuff shared between sg_ and h_ subtools.           ---*/
+/*---                                                  pc_common.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Ptrcheck, a Valgrind tool for checking pointer
+   use in programs.
+
+   Copyright (C) 2008-2008 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+
+   Neither the names of the U.S. Department of Energy nor the
+   University of California nor the names of its contributors may be
+   used to endorse or promote products derived from this software
+   without prior written permission.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcprint.h"
+#include "pub_tool_mallocfree.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_options.h"
+#include "pub_tool_replacemalloc.h"
+#include "pub_tool_execontext.h"
+#include "pub_tool_tooliface.h"    // CorePart
+#include "pub_tool_threadstate.h"  // VG_(get_running_tid)
+#include "pub_tool_debuginfo.h"
+
+#include "pc_common.h"   // self, & Seg
+
+#include "h_main.h"      // NONPTR, BOTTOM, UNKNOWN
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// Command line options                                     //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+Bool h_clo_partial_loads_ok = True;   /* user visible */
+Bool h_clo_lossage_check    = False;  /* dev flag only */
+
+Bool pc_process_cmd_line_options(Char* arg)
+{
+        VG_BOOL_CLO(arg, "--partial-loads-ok", h_clo_partial_loads_ok)
+   else VG_BOOL_CLO(arg, "--lossage-check",    h_clo_lossage_check)
+   else
+      return VG_(replacement_malloc_process_cmd_line_option)(arg);
+
+   return True;
+}
+
+void pc_print_usage(void)
+{
+   VG_(printf)(
+   "    --partial-loads-ok=no|yes same as for Memcheck [yes]\n"
+   );
+   VG_(replacement_malloc_print_usage)();
+}
+
+void pc_print_debug_usage(void)
+{
+   VG_(printf)(
+   "    --lossage-check=no|yes gather stats for quality control [no]\n"
+   );
+   VG_(replacement_malloc_print_debug_usage)();
+}
+
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// Error management                                         //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+/* What kind of error it is. */
+typedef
+   enum {
+      XE_SorG=1202, // sg: stack or global array inconsistency
+      XE_Heap,      // h: mismatched ptr/addr segments on load/store
+      XE_Arith,     // h: bad arithmetic between two segment pointers
+      XE_SysParam   // h: block straddling >1 segment passed to syscall
+   }
+   XErrorTag;
+
+typedef
+   enum {
+      XS_SorG=2021,
+      XS_Heap,
+      XS_Arith,
+      XS_SysParam
+   }
+   XSuppTag;
+
+typedef
+   struct {
+      XErrorTag tag;
+      union {
+         struct {
+            Addr   addr;
+            SSizeT sszB;  /* -ve is write, +ve is read */
+            HChar  expect[128];
+            HChar  actual[128];
+         } SorG;
+         struct {
+            Addr   addr;
+            SSizeT sszB;  /* -ve is write, +ve is read */
+            Seg*   vseg;
+            Char   descr1[96];
+            Char   descr2[96];
+            Char   datasym[96];
+            OffT   datasymoff;
+         } Heap;
+         struct {
+            Seg* seg1;
+            Seg* seg2;
+            const HChar* opname; // user-understandable text name
+         } Arith;
+         struct {
+            CorePart part;
+            Addr lo;
+            Addr hi;
+            Seg* seglo;
+            Seg* seghi;
+         } SysParam;
+      } XE;
+   }
+   XError;
+
+
+void sg_record_error_SorG ( ThreadId tid,
+                            Addr addr, SSizeT sszB,
+                            HChar* expect, HChar* actual )
+{
+   XError xe;
+   VG_(memset)(&xe, 0, sizeof(xe));
+   xe.tag = XE_SorG;
+   xe.XE.SorG.addr = addr;
+   xe.XE.SorG.sszB = sszB;
+   VG_(strncpy)( &xe.XE.SorG.expect[0],
+                 expect, sizeof(xe.XE.SorG.expect) );
+   VG_(strncpy)( &xe.XE.SorG.actual[0],
+                 actual, sizeof(xe.XE.SorG.actual) );
+   xe.XE.SorG.expect[ sizeof(xe.XE.SorG.expect)-1 ] = 0;
+   xe.XE.SorG.actual[ sizeof(xe.XE.SorG.actual)-1 ] = 0;
+   VG_(maybe_record_error)( tid, XE_SorG, 0, NULL, &xe );
+}
+
+void h_record_heap_error( Addr a, SizeT size, Seg* vseg, Bool is_write )
+{
+   XError xe;
+   tl_assert(size > 0);
+   VG_(memset)(&xe, 0, sizeof(xe));
+   xe.tag = XE_Heap;
+   xe.XE.Heap.addr = a;
+   xe.XE.Heap.sszB = is_write ? -size : size;
+   xe.XE.Heap.vseg = vseg;
+   VG_(maybe_record_error)( VG_(get_running_tid)(), XE_Heap,
+                            /*a*/0, /*str*/NULL, /*extra*/(void*)&xe);
+}
+
+void h_record_arith_error( Seg* seg1, Seg* seg2, HChar* opname )
+{
+   XError xe;
+   VG_(memset)(&xe, 0, sizeof(xe));
+   xe.tag = XE_Arith;
+   xe.XE.Arith.seg1   = seg1;
+   xe.XE.Arith.seg2   = seg2;
+   xe.XE.Arith.opname = opname;
+   VG_(maybe_record_error)( VG_(get_running_tid)(), XE_Arith,
+                            /*a*/0, /*str*/NULL, /*extra*/(void*)&xe);
+}
+
+void h_record_sysparam_error( ThreadId tid, CorePart part, Char* s,
+                              Addr lo, Addr hi, Seg* seglo, Seg* seghi )
+{
+   XError xe;
+   VG_(memset)(&xe, 0, sizeof(xe));
+   xe.tag = XE_SysParam;
+   xe.XE.SysParam.part = part;
+   xe.XE.SysParam.lo = lo;
+   xe.XE.SysParam.hi = hi;
+   xe.XE.SysParam.seglo = seglo;
+   xe.XE.SysParam.seghi = seghi;
+   VG_(maybe_record_error)( tid, XE_SysParam, /*a*/(Addr)0, /*str*/s,
+                            /*extra*/(void*)&xe);
+}
+
+
+Bool pc_eq_Error ( VgRes res, Error* e1, Error* e2 )
+{
+   XError *xe1, *xe2;
+   tl_assert(VG_(get_error_kind)(e1) == VG_(get_error_kind)(e2));
+   //tl_assert(VG_(get_error_string)(e1) == NULL);
+   //tl_assert(VG_(get_error_string)(e2) == NULL);
+
+   xe1 = (XError*)VG_(get_error_extra)(e1);
+   xe2 = (XError*)VG_(get_error_extra)(e2);
+   tl_assert(xe1);
+   tl_assert(xe2);
+
+   if (xe1->tag != xe2->tag)
+      return False;
+
+   switch (xe1->tag) {
+      case XE_SorG:
+         return //xe1->XE.SorG.addr == xe2->XE.SorG.addr
+                //&& 
+                xe1->XE.SorG.sszB == xe2->XE.SorG.sszB
+                && 0 == VG_(strncmp)( &xe1->XE.SorG.expect[0],
+                                      &xe2->XE.SorG.expect[0],
+                                      sizeof(xe1->XE.SorG.expect) ) 
+                && 0 == VG_(strncmp)( &xe1->XE.SorG.actual[0],
+                                      &xe2->XE.SorG.actual[0],
+                                      sizeof(xe1->XE.SorG.actual) );
+      case XE_Heap:
+      case XE_Arith:
+      case XE_SysParam:
+         return True;
+      default:
+         VG_(tool_panic)("eq_Error: unrecognised error kind");
+   }
+}
+
+
+static Char* readwrite(SSizeT sszB)
+{
+   return ( sszB < 0 ? "write" : "read" );
+}
+
+static Word Word__abs ( Word w ) {
+   return w < 0 ? -w : w;
+}
+
+void pc_pp_Error ( Error* err )
+{
+   XError *xe = (XError*)VG_(get_error_extra)(err);
+   tl_assert(xe);
+
+   switch (VG_(get_error_kind)(err)) {
+
+   //----------------------------------------------------------
+   case XE_SorG:
+      tl_assert(xe);
+      VG_(message)(Vg_UserMsg, "Invalid %s of size %ld", 
+                               xe->XE.SorG.sszB < 0 ? "write" : "read",
+                               Word__abs(xe->XE.SorG.sszB) );
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+      VG_(message)(Vg_UserMsg, " Address %#lx expected vs actual:",
+                               xe->XE.SorG.addr);
+      VG_(message)(Vg_UserMsg, " Expected: %s", &xe->XE.SorG.expect[0] );
+      VG_(message)(Vg_UserMsg, " Actual:   %s", &xe->XE.SorG.actual[0] );
+      break;
+
+   //----------------------------------------------------------
+   case XE_Heap: {
+      Char *place, *legit, *how_invalid;
+      Addr a    = xe->XE.Heap.addr;
+      Seg* vseg = xe->XE.Heap.vseg;
+
+      tl_assert(is_known_segment(vseg) || NONPTR == vseg);
+
+      if (NONPTR == vseg) {
+         // Access via a non-pointer
+         VG_(message)(Vg_UserMsg, "Invalid %s of size %ld",
+                                   readwrite(xe->XE.Heap.sszB),
+                                   Word__abs(xe->XE.Heap.sszB));
+         VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+         VG_(message)(Vg_UserMsg,
+                      " Address %#lx is not derived from any known block", a);
+
+      } else {
+         // Access via a pointer, but outside its range.
+         Int cmp;
+         UWord miss_size;
+         Seg__cmp(vseg, a, &cmp, &miss_size);
+         if      (cmp  < 0) place = "before";
+         else if (cmp == 0) place = "inside";
+         else               place = "after";
+         how_invalid = ( ( Seg__is_freed(vseg) && 0 != cmp )
+                       ? "Doubly-invalid" : "Invalid" );
+         legit = ( Seg__is_freed(vseg) ? "once-" : "" );
+
+         VG_(message)(Vg_UserMsg, "%s %s of size %ld", how_invalid,
+                                  readwrite(xe->XE.Heap.sszB),
+                                  Word__abs(xe->XE.Heap.sszB));
+         VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+
+         VG_(message)(Vg_UserMsg,
+                      " Address %#lx is %lu bytes %s the accessing pointer's",
+                      a, miss_size, place);
+         VG_(message)(Vg_UserMsg,
+                      " %slegitimate range, a block of size %lu %s",
+                      legit, Seg__size(vseg),
+                      Seg__is_freed(vseg) ? "free'd" : "alloc'd" );
+         VG_(pp_ExeContext)(Seg__where(vseg));
+      }
+      if (xe->XE.Heap.descr1[0] != 0)
+         VG_(message)(Vg_UserMsg, " %s", xe->XE.Heap.descr1);
+      if (xe->XE.Heap.descr2[0] != 0)
+         VG_(message)(Vg_UserMsg, " %s", xe->XE.Heap.descr2);
+      if (xe->XE.Heap.datasym[0] != 0)
+         VG_(message)(Vg_UserMsg, " Address 0x%llx is %llu bytes "
+                      "inside data symbol \"%s\"",
+                      (ULong)xe->XE.Heap.addr,
+                      (ULong)xe->XE.Heap.datasymoff,
+                      xe->XE.Heap.datasym);
+      break;
+   }
+
+   //----------------------------------------------------------
+   case XE_Arith: {
+      Seg*   seg1   = xe->XE.Arith.seg1;
+      Seg*   seg2   = xe->XE.Arith.seg2;
+      Char*  which;
+
+      tl_assert(BOTTOM != seg1);
+      tl_assert(BOTTOM != seg2 && UNKNOWN != seg2);
+
+      VG_(message)(Vg_UserMsg, "Invalid arguments to %s", xe->XE.Arith.opname);
+      VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+
+      if (seg1 != seg2) {
+         if (NONPTR == seg1) {
+            VG_(message)(Vg_UserMsg, " First arg not a pointer");
+         } else if (UNKNOWN == seg1) {
+            VG_(message)(Vg_UserMsg, " First arg may be a pointer");
+         } else {
+            VG_(message)(Vg_UserMsg, " First arg derived from address %#lx of "
+                                     "%lu-byte block alloc'd",
+                                     Seg__addr(seg1), Seg__size(seg1) );
+            VG_(pp_ExeContext)(Seg__where(seg1));
+         }
+         which = "Second arg";
+      } else {
+         which = "Both args";
+      }
+      if (NONPTR == seg2) {
+         VG_(message)(Vg_UserMsg, " %s not a pointer", which);
+      } else {
+         VG_(message)(Vg_UserMsg, " %s derived from address %#lx of "
+                                  "%lu-byte block alloc'd",
+                      which, Seg__addr(seg2), Seg__size(seg2) );
+         VG_(pp_ExeContext)(Seg__where(seg2));
+      }
+      break;
+   }
+
+   //----------------------------------------------------------
+   case XE_SysParam: {
+      Addr  lo    = xe->XE.SysParam.lo;
+      Addr  hi    = xe->XE.SysParam.hi;
+      Seg*  seglo = xe->XE.SysParam.seglo;
+      Seg*  seghi = xe->XE.SysParam.seghi;
+      Char* s     = VG_(get_error_string) (err);
+      Char* what;
+
+      tl_assert(BOTTOM != seglo && BOTTOM != seghi);
+
+      if      (Vg_CoreSysCall == xe->XE.SysParam.part) 
+                 what = "Syscall param ";
+      else    VG_(tool_panic)("bad CorePart");
+
+      if (seglo == seghi) {
+         // freed block
+         tl_assert(is_known_segment(seglo));
+         tl_assert(Seg__is_freed(seglo)); // XXX what if it's now recycled?
+         VG_(message)(Vg_UserMsg, "%s%s contains unaddressable byte(s)",
+                                  what, s);
+         VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+
+         VG_(message)(Vg_UserMsg, " Address %#lx is %ld bytes inside a "
+                                  "%ld-byte block alloc'd",
+                                  lo, lo-Seg__addr(seglo),
+                                  Seg__size(seglo) );
+         VG_(pp_ExeContext)(Seg__where(seglo));
+
+      } else {
+         // mismatch
+         VG_(message)(Vg_UserMsg, "%s%s is non-contiguous", what, s);
+         VG_(pp_ExeContext)( VG_(get_error_where)(err) );
+
+         if (UNKNOWN == seglo) {
+            VG_(message)(Vg_UserMsg, " First byte is not inside a known block");
+         } else {
+            VG_(message)(Vg_UserMsg, " First byte (%#lx) is %ld bytes inside a "
+                                     "%ld-byte block alloc'd",
+                                     lo, lo-Seg__addr(seglo), 
+                                     Seg__size(seglo) );
+            VG_(pp_ExeContext)(Seg__where(seglo));
+         }
+
+         if (UNKNOWN == seghi) {
+            VG_(message)(Vg_UserMsg, " Last byte is not inside a known block");
+         } else {
+            VG_(message)(Vg_UserMsg, " Last byte (%#lx) is %ld bytes inside a "
+                                     "%ld-byte block alloc'd",
+                                     hi, hi-Seg__addr(seghi),
+                                     Seg__size(seghi) );
+            VG_(pp_ExeContext)(Seg__where(seghi));
+         }
+      }
+      break;
+   }
+
+   default:
+      VG_(tool_panic)("pp_Error: unrecognised error kind");
+   }
+}
+
+
+UInt pc_update_Error_extra ( Error* err )
+{
+   XError *xe = (XError*)VG_(get_error_extra)(err);
+   tl_assert(xe);
+   switch (xe->tag) {
+      case XE_SorG:
+         return sizeof(XError);
+      case XE_Heap: {
+         tl_assert(sizeof(xe->XE.Heap.descr1) == sizeof(xe->XE.Heap.descr2));
+         tl_assert(sizeof(xe->XE.Heap.descr1) > 0);
+         tl_assert(sizeof(xe->XE.Heap.datasym) > 0);
+         VG_(memset)(&xe->XE.Heap.descr1, 0, sizeof(xe->XE.Heap.descr1));
+         VG_(memset)(&xe->XE.Heap.descr2, 0, sizeof(xe->XE.Heap.descr2));
+         VG_(memset)(&xe->XE.Heap.datasym, 0, sizeof(xe->XE.Heap.datasym));
+         xe->XE.Heap.datasymoff = 0;
+         if (VG_(get_data_description)( &xe->XE.Heap.descr1[0],
+                                        &xe->XE.Heap.descr2[0],
+                                        sizeof(xe->XE.Heap.descr1)-1,
+                                        xe->XE.Heap.addr )) {
+            tl_assert(xe->XE.Heap.descr1[sizeof(xe->XE.Heap.descr1)-1] == 0);
+            tl_assert(xe->XE.Heap.descr1[sizeof(xe->XE.Heap.descr2)-1] == 0);
+         }
+         else
+         if (VG_(get_datasym_and_offset)( xe->XE.Heap.addr,
+                                          &xe->XE.Heap.datasym[0],
+                                          sizeof(xe->XE.Heap.datasym)-1,
+                                          &xe->XE.Heap.datasymoff )) {
+            tl_assert(xe->XE.Heap.datasym[sizeof(xe->XE.Heap.datasym)-1] == 0);
+         }
+         return sizeof(XError);
+      }
+      case XE_Arith:
+         return sizeof(XError);
+      case XE_SysParam:
+         return sizeof(XError);
+      default:
+         VG_(tool_panic)("update_extra");
+   }
+}
+
+Bool pc_is_recognised_suppression ( Char* name, Supp *su )
+{
+   SuppKind skind;
+
+   if      (VG_STREQ(name, "SorG"))     skind = XS_SorG;
+   else if (VG_STREQ(name, "Heap"))     skind = XS_Heap;
+   else if (VG_STREQ(name, "Arith"))    skind = XS_Arith;
+   else if (VG_STREQ(name, "SysParam")) skind = XS_SysParam;
+   else
+      return False;
+
+   VG_(set_supp_kind)(su, skind);
+   return True;
+}
+
+Bool pc_read_extra_suppression_info ( Int fd, Char* buf, 
+                                      Int nBuf, Supp* su )
+{
+   Bool eof;
+   if (VG_(get_supp_kind)(su) == XS_SysParam) {
+      eof = VG_(get_line) ( fd, buf, nBuf );
+      if (eof) return False;
+      VG_(set_supp_string)(su, VG_(strdup)("pc.common.presi.1", buf));
+   }
+   return True;
+}
+
+Bool pc_error_matches_suppression (Error* err, Supp* su)
+{
+   ErrorKind ekind = VG_(get_error_kind)(err);
+   switch (VG_(get_supp_kind)(su)) {
+      case XS_SorG:     return ekind == XE_SorG;
+      case XS_Heap:     return ekind == XE_Heap;
+      case XS_Arith:    return ekind == XE_Arith;
+      case XS_SysParam: return ekind == XE_SysParam;
+      default:
+         VG_(printf)("Error:\n"
+                     "  unknown suppression type %d\n",
+                     VG_(get_supp_kind)(su));
+         VG_(tool_panic)("unknown suppression type in "
+                         "pc_error_matches_suppression");
+   }
+}
+
+Char* pc_get_error_name ( Error* err )
+{
+   XError *xe = (XError*)VG_(get_error_extra)(err);
+   tl_assert(xe);
+   switch (xe->tag) {
+      case XE_SorG:     return "SorG";
+      case XE_Heap:     return "Heap";
+      case XE_Arith:    return "Arith";
+      case XE_SysParam: return "SysParam";
+      default:          VG_(tool_panic)("get_error_name: unexpected type");
+   }
+}
+
+void pc_print_extra_suppression_info ( Error* err )
+{
+   if (XE_SysParam == VG_(get_error_kind)(err)) {
+      VG_(printf)("   %s\n", VG_(get_error_string)(err));
+   }
+}
+
+
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              pc_common.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/exp-ptrcheck/pc_common.h b/exp-ptrcheck/pc_common.h
new file mode 100644
index 0000000..ac4e4d4
--- /dev/null
+++ b/exp-ptrcheck/pc_common.h
@@ -0,0 +1,72 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Ptrcheck: a pointer-use checker.                             ---*/
+/*--- Exports for stuff shared between sg_ and h_ subtools.        ---*/
+/*---                                                  pc_common.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Ptrcheck, a Valgrind tool for checking pointer
+   use in programs.
+
+   Copyright (C) 2008-2008 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __PC_COMMON_H
+
+#define __PC_COMMON_H
+
+typedef  struct _Seg  Seg;   /* abstract every except in h_main.c */
+
+void sg_record_error_SorG ( ThreadId tid,
+                            Addr addr, SSizeT sszB,
+                            HChar* expect, HChar* actual );
+
+void h_record_heap_error( Addr a, SizeT size, Seg* vseg, Bool is_write );
+
+void h_record_arith_error( Seg* seg1, Seg* seg2, HChar* opname );
+
+void h_record_sysparam_error( ThreadId tid, CorePart part, Char* s,
+                              Addr lo, Addr hi, Seg* seglo, Seg* seghi );
+
+Bool pc_eq_Error ( VgRes res, Error* e1, Error* e2 );
+void pc_pp_Error ( Error* err );
+UInt pc_update_Error_extra ( Error* err );
+Bool pc_is_recognised_suppression ( Char* name, Supp *su );
+Bool pc_read_extra_suppression_info ( Int fd, Char* buf, 
+                                      Int nBuf, Supp* su );
+Bool pc_error_matches_suppression (Error* err, Supp* su);
+Char* pc_get_error_name ( Error* err );
+void pc_print_extra_suppression_info ( Error* err );
+
+extern Bool h_clo_partial_loads_ok;
+extern Bool h_clo_lossage_check;
+
+Bool pc_process_cmd_line_options(Char* arg);
+void pc_print_usage(void);
+void pc_print_debug_usage(void);
+
+
+#endif
+
+/*--------------------------------------------------------------------*/
+/*--- end                                              pc_common.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/exp-ptrcheck/pc_main.c b/exp-ptrcheck/pc_main.c
new file mode 100644
index 0000000..deba949
--- /dev/null
+++ b/exp-ptrcheck/pc_main.c
@@ -0,0 +1,202 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Ptrcheck: a pointer-use checker.                             ---*/
+/*--- This file coordinates the h_ and sg_ subtools.               ---*/
+/*---                                                    pc_main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Ptrcheck, a Valgrind tool for checking pointer
+   use in programs.
+
+   Copyright (C) 2008-2008 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+
+   Neither the names of the U.S. Department of Energy nor the
+   University of California nor the names of its contributors may be
+   used to endorse or promote products derived from this software
+   without prior written permission.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_execontext.h"
+#include "pub_tool_tooliface.h"
+#include "pub_tool_options.h"
+
+//#include "h_list.h"      // Seg
+#include "sg_main.h"
+#include "pc_common.h"
+#include "h_main.h"
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+//                                                          //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// main                                                     //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+static void pc_fini ( Int exitcode ) {
+   h_fini( exitcode );
+   sg_fini( exitcode );
+}
+
+static void pc_die_mem_stack ( Addr old_SP, SizeT len ) {
+   /* h_die_mem_stack( old_SP, len ); */
+   sg_die_mem_stack( old_SP, len );
+}
+
+static 
+void pc_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
+   /* h_pre_thread_ll_create(); */
+   sg_pre_thread_ll_create(parent,child);
+}
+
+static void pc_pre_thread_first_insn ( ThreadId tid ) {
+   /* h_pre_thread_first_insn(tid); */
+   sg_pre_thread_first_insn(tid);
+}
+
+static 
+void pc_new_mem_mmap ( Addr a, SizeT len,
+                       Bool rr, Bool ww, Bool xx, ULong di_handle )
+{
+   h_new_mem_mmap(a, len, rr, ww, xx, di_handle);
+   sg_new_mem_mmap(a, len, rr, ww, xx, di_handle);
+}
+
+static
+void pc_new_mem_startup ( Addr a, SizeT len,
+                          Bool rr, Bool ww, Bool xx, ULong di_handle )
+{
+   h_new_mem_startup(a, len, rr, ww, xx, di_handle);
+   sg_new_mem_startup(a, len, rr, ww, xx, di_handle);
+}
+
+static void pc_die_mem_munmap ( Addr a, SizeT len ) {
+   h_die_mem_munmap(a, len);
+   sg_die_mem_munmap(a, len);
+}
+
+static void pc_pre_mem_read ( CorePart part, ThreadId tid, Char* s,
+                              Addr base, SizeT size ) {
+   h_pre_mem_access(part, tid, s, base, size );
+   /* sg_pre_mem_read(part, tid, s, base, size); */
+}
+
+static void pc_pre_mem_read_asciiz ( CorePart part, ThreadId tid, 
+                                     Char* s, Addr lo )
+{
+   h_pre_mem_read_asciiz(part, tid, s, lo);
+   /* sg_pre_mem_read_asciiz(part, tid, s, lo); */
+}
+
+static void pc_pre_mem_write ( CorePart part, ThreadId tid, Char* s,
+                               Addr base, SizeT size ) {
+   h_pre_mem_access(part, tid, s, base, size);
+   /* sg_pre_mem_write(part, tid, s, base, size); */
+}
+
+static void pc_post_clo_init ( void )
+{
+   h_post_clo_init();
+   sg_post_clo_init();
+}
+
+static void pc_pre_clo_init(void)
+{
+   VG_(details_name)            ("exp-ptrcheck");
+   VG_(details_version)         (NULL);
+   VG_(details_description)     ("a heap, stack & global array "
+                                 "overrun detector");
+   VG_(details_copyright_author)(
+      "Copyright (C) 2003-2008, and GNU GPL'd, by OpenWorks Ltd et al.");
+   VG_(details_bug_reports_to)  (VG_BUGS_TO);
+
+   VG_(basic_tool_funcs)        (pc_post_clo_init,
+                                 h_instrument,
+                                 pc_fini);
+
+   VG_(needs_malloc_replacement)( h_replace_malloc,
+                                  h_replace___builtin_new,
+                                  h_replace___builtin_vec_new,
+                                  h_replace_memalign,
+                                  h_replace_calloc,
+                                  h_replace_free,
+                                  h_replace___builtin_delete,
+                                  h_replace___builtin_vec_delete,
+                                  h_replace_realloc,
+                                  0 /* no need for client heap redzones */ );
+
+   VG_(needs_var_info)          ();
+
+   VG_(needs_core_errors)       ();
+   VG_(needs_tool_errors)       (pc_eq_Error,
+                                 pc_pp_Error,
+                                 True,/*show TIDs for errors*/
+                                 pc_update_Error_extra,
+                                 pc_is_recognised_suppression,
+                                 pc_read_extra_suppression_info,
+                                 pc_error_matches_suppression,
+                                 pc_get_error_name,
+                                 pc_print_extra_suppression_info);
+
+   VG_(needs_syscall_wrapper)( h_pre_syscall,
+                               h_post_syscall );
+
+   VG_(needs_command_line_options)( pc_process_cmd_line_options,
+                                    pc_print_usage,
+                                    pc_print_debug_usage );
+
+   VG_(track_die_mem_stack)        ( pc_die_mem_stack );
+   VG_(track_pre_thread_ll_create) ( pc_pre_thread_ll_create );
+   VG_(track_pre_thread_first_insn)( pc_pre_thread_first_insn );
+
+   VG_(track_new_mem_mmap)         ( pc_new_mem_mmap );
+   VG_(track_new_mem_startup)      ( pc_new_mem_startup);
+   VG_(track_die_mem_munmap)       ( pc_die_mem_munmap );
+
+   VG_(track_pre_mem_read)         ( pc_pre_mem_read );
+   VG_(track_pre_mem_read_asciiz)  ( pc_pre_mem_read_asciiz );
+   VG_(track_pre_mem_write)        ( pc_pre_mem_write );
+
+   VG_(track_post_reg_write_clientcall_return) ( h_post_reg_write_clientcall );
+   VG_(track_post_reg_write)( h_post_reg_write_demux );
+
+   h_pre_clo_init();
+   sg_pre_clo_init();
+
+   VG_(clo_vex_control).iropt_unroll_thresh = 0;
+   VG_(clo_vex_control).guest_chase_thresh = 0;
+}
+
+VG_DETERMINE_INTERFACE_VERSION(pc_pre_clo_init)
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                pc_main.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/exp-ptrcheck/sg_main.c b/exp-ptrcheck/sg_main.c
new file mode 100644
index 0000000..7c0ecb4
--- /dev/null
+++ b/exp-ptrcheck/sg_main.c
@@ -0,0 +1,2418 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Ptrcheck: a pointer-use checker.                             ---*/
+/*--- This file checks stack and global array accesses.            ---*/
+/*---                                                    sg_main.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Ptrcheck, a Valgrind tool for checking pointer
+   use in programs.
+
+   Copyright (C) 2008-2008 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+
+   Neither the names of the U.S. Department of Energy nor the
+   University of California nor the names of its contributors may be
+   used to endorse or promote products derived from this software
+   without prior written permission.
+*/
+
+#include "pub_tool_basics.h"
+#include "pub_tool_libcbase.h"
+#include "pub_tool_libcassert.h"
+#include "pub_tool_libcprint.h"
+#include "pub_tool_tooliface.h"
+#include "pub_tool_wordfm.h"
+#include "pub_tool_xarray.h"
+#include "pub_tool_threadstate.h"
+#include "pub_tool_mallocfree.h"
+#include "pub_tool_machine.h"
+#include "pub_tool_debuginfo.h"
+#include "pub_tool_options.h"
+
+#include "pc_common.h"
+
+#include "sg_main.h"      // self
+
+
+static
+void preen_Invars ( Addr a, SizeT len, Bool isHeap ); /*fwds*/
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// Basic Stuff                                              //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+static inline Bool is_sane_TId ( ThreadId tid )
+{
+   return tid >= 0 && tid < VG_N_THREADS
+          && tid != VG_INVALID_THREADID;
+}
+
+static void* sg_malloc ( HChar* cc, SizeT n ) {
+   void* p;
+   tl_assert(n > 0);
+   p = VG_(malloc)( cc, n );
+   tl_assert(p);
+   return p;
+}
+
+static void sg_free ( void* p ) {
+   tl_assert(p);
+   VG_(free)(p);
+}
+
+
+/* Compare the intervals [a1,a1+n1) and [a2,a2+n2).  Return -1 if the
+   first interval is lower, 1 if the first interval is higher, and 0
+   if there is any overlap.  Redundant paranoia with casting is there
+   following what looked distinctly like a bug in gcc-4.1.2, in which
+   some of the comparisons were done signedly instead of
+   unsignedly. */
+inline
+static Word cmp_nonempty_intervals ( Addr a1, SizeT n1, 
+                                     Addr a2, SizeT n2 ) {
+   UWord a1w = (UWord)a1;
+   UWord n1w = (UWord)n1;
+   UWord a2w = (UWord)a2;
+   UWord n2w = (UWord)n2;
+   tl_assert(n1w > 0 && n2w > 0);
+   if (a1w + n1w <= a2w) return -1L;
+   if (a2w + n2w <= a1w) return 1L;
+   return 0;
+}
+
+/* Return true iff [aSmall,aSmall+nSmall) is entirely contained
+   within [aBig,aBig+nBig). */
+inline
+static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
+                                Addr aSmall, SizeT nSmall ) {
+   tl_assert(nBig > 0 && nSmall > 0);
+   return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
+}
+
+inline
+static Addr Addr__min ( Addr a1, Addr a2 ) {
+   return a1 < a2 ? a1 : a2;
+}
+
+inline
+static Addr Addr__max ( Addr a1, Addr a2 ) {
+   return a1 < a2 ? a2 : a1;
+}
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// StackBlocks Persistent Cache                             //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+/* We maintain a set of XArray* of StackBlocks.  These are never
+   freed.  When a new StackBlock vector is acquired from
+   VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
+   If not present, it is added.  If present, the just-acquired one is
+   freed and the copy used.
+
+   This simplifies storage management elsewhere.  It allows us to
+   assume that a pointer to an XArray* of StackBlock is valid forever.
+   It also means there are no duplicates anywhere, which could be
+   important from a space point of view for programs that generate a
+   lot of translations, or where translations are frequently discarded
+   and re-made.
+
+   Note that we normalise the arrays by sorting the elements according
+   to an arbitrary total order, so as to avoid the situation that two
+   vectors describe the same set of variables but are not structurally
+   identical. */
+
+static inline Bool StackBlock__sane ( StackBlock* fb )
+{
+   if (fb->name[ sizeof(fb->name)-1 ] != 0)
+      return False;
+   if (fb->spRel != False && fb->spRel != True)
+      return False;
+   if (fb->isVec != False && fb->isVec != True)
+      return False;
+   return True;
+}
+
+/* Generate an arbitrary total ordering on StackBlocks. */
+static Word StackBlock__cmp ( StackBlock* fb1, StackBlock* fb2 )
+{
+   Word r;
+   tl_assert(StackBlock__sane(fb1));
+   tl_assert(StackBlock__sane(fb2));
+   /* Hopefully the .base test hits most of the time.  For the blocks
+      associated with any particular instruction, if the .base values
+      are the same then probably it doesn't make sense for the other
+      fields to be different.  But this is supposed to be a completely
+      general structural total order, so we have to compare everything
+      anyway. */
+   if (fb1->base < fb2->base) return -1;
+   if (fb1->base > fb2->base) return 1;
+   /* compare sizes */
+   if (fb1->szB < fb2->szB) return -1;
+   if (fb1->szB > fb2->szB) return 1;
+   /* compare sp/fp flag */
+   if (fb1->spRel < fb2->spRel) return -1;
+   if (fb1->spRel > fb2->spRel) return 1;
+   /* compare is/is-not array-typed flag */
+   if (fb1->isVec < fb2->isVec) return -1;
+   if (fb1->isVec > fb2->isVec) return 1;
+   /* compare the name */
+   r = (Word)VG_(strcmp)(fb1->name, fb2->name);
+   return r;
+}
+
+/* Returns True if all fields except .szB are the same.  szBs may or
+   may not be the same; they are simply not consulted. */
+static Bool StackBlock__all_fields_except_szB_are_equal ( 
+               StackBlock* fb1,
+               StackBlock* fb2 
+            )
+{
+   tl_assert(StackBlock__sane(fb1));
+   tl_assert(StackBlock__sane(fb2));
+   return fb1->base == fb2->base
+          && fb1->spRel == fb2->spRel
+          && fb1->isVec == fb2->isVec
+          && 0 == VG_(strcmp)(fb1->name, fb2->name);
+}
+
+
+/* Generate an arbitrary total ordering on vectors of StackBlocks. */
+static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
+{
+   Word i, r, n1, n2;
+   n1 = VG_(sizeXA)( fb1s );
+   n2 = VG_(sizeXA)( fb2s );
+   if (n1 < n2) return -1;
+   if (n1 > n2) return 1;
+   for (i = 0; i < n1; i++) {
+      StackBlock *fb1, *fb2;
+      fb1 = VG_(indexXA)( fb1s, i );
+      fb2 = VG_(indexXA)( fb2s, i );
+      r = StackBlock__cmp( fb1, fb2 );
+      if (r != 0) return r;
+   }
+   tl_assert(i == n1 && i == n2);
+   return 0;
+}
+
+static void pp_StackBlock ( StackBlock* sb )
+{
+   VG_(printf)("StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }",
+               sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
+               sb->isVec ? 'Y' : 'N', &sb->name[0] );
+}
+
+static void pp_StackBlocks ( XArray* sbs )
+{
+   Word i, n = VG_(sizeXA)( sbs );
+   VG_(printf)("<<< STACKBLOCKS\n" );
+   for (i = 0; i < n; i++) {
+      VG_(printf)("   ");
+      pp_StackBlock( (StackBlock*)VG_(indexXA)( sbs, i ) );
+      VG_(printf)("\n");
+   }
+   VG_(printf)(">>> STACKBLOCKS\n" );
+}
+
+
+/* ---------- The StackBlock vector cache ---------- */
+
+static WordFM* /* XArray* of StackBlock -> nothing */
+       frameBlocks_set = NULL;
+
+static void init_StackBlocks_set ( void )
+{
+   tl_assert(!frameBlocks_set);
+   frameBlocks_set
+      = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free, 
+                    (Word(*)(UWord,UWord))StackBlocks__cmp );
+   tl_assert(frameBlocks_set);
+}
+
+/* Find the given StackBlock-vector in our collection thereof.  If
+   found, deallocate the supplied one, and return the address of the
+   copy.  If not found, add the supplied one to our collection and
+   return its address. */
+static XArray* /* of StackBlock */
+       StackBlocks__find_and_dealloc__or_add
+          ( XArray* /* of StackBlock */ orig )
+{
+   UWord key, val;
+
+   /* First, normalise, as per comments above. */
+   VG_(setCmpFnXA)( orig, (Int(*)(void*,void*))StackBlock__cmp );
+   VG_(sortXA)( orig );
+
+   /* Now get rid of any exact duplicates. */
+  nuke_dups:
+   { Word r, w, nEQ, n = VG_(sizeXA)( orig );
+     if (n >= 2) {
+        w = 0;
+        nEQ = 0;
+        for (r = 0; r < n; r++) {
+           if (r+1 < n) {
+              StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
+              StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
+              Word c = StackBlock__cmp(pR0,pR1);
+              tl_assert(c == -1 || c == 0);
+              if (c == 0) { nEQ++; continue; }
+           }
+           if (w != r) {
+              StackBlock* pW = VG_(indexXA)( orig, w );
+              StackBlock* pR = VG_(indexXA)( orig, r );
+              *pW = *pR;
+           }
+           w++;
+        }
+        tl_assert(r == n);
+        tl_assert(w + nEQ == n);
+        if (w < n) {
+           VG_(dropTailXA)( orig, n-w );
+        }
+        if (0) VG_(printf)("delta %ld\n", n-w);
+     }
+   }
+
+   /* Deal with the following strangeness, where two otherwise
+      identical blocks are claimed to have different sizes.  In which
+      case we use the larger size. */
+   /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
+      StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
+      StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
+   */
+   { Word i, n = VG_(sizeXA)( orig );
+     if (n >= 2) {
+        for (i = 0; i < n-1; i++) {
+           StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
+           StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
+           if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
+              /* They can't be identical because the previous tidying
+                 pass would have removed the duplicates.  And they
+                 can't be > because the earlier sorting pass would
+                 have ordered otherwise-identical descriptors
+                 according to < on .szB fields.  Hence: */
+              tl_assert(sb0->szB < sb1->szB);
+              sb0->szB = sb1->szB;
+              /* This makes the blocks identical, at the size of the
+                 larger one.  Rather than go to all the hassle of
+                 sliding the rest down, simply go back to the
+                 remove-duplicates stage.  The assertion guarantees
+                 that we eventually make progress, since the rm-dups
+                 stage will get rid of one of the blocks.  This is
+                 expected to happen only exceedingly rarely. */
+              tl_assert(StackBlock__cmp(sb0,sb1) == 0);
+              goto nuke_dups;
+           }
+        }
+     }
+   }
+
+   /* A rather poor sanity check on the results. */
+   { Word i, n = VG_(sizeXA)( orig );
+     for (i = 0; i < n-1; i++) {
+       StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
+       StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
+       if (sb1->base == sb2->base)
+          pp_StackBlocks(orig);
+       tl_assert(sb1->base != sb2->base);
+     }
+   }
+
+   /* Now, do we have it already? */
+   if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
+      /* yes */
+      XArray* res;
+      tl_assert(val == 0);
+      tl_assert(key != (UWord)orig);
+      VG_(deleteXA)(orig);
+      res = (XArray*)key;
+      return res;
+   } else {
+      /* no */
+      VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
+      return orig;
+   }
+}
+
+/* Top level function for getting the StackBlock vector for a given
+   instruction.  It is guaranteed that the returned pointer will be
+   valid for the entire rest of the run, and also that the addresses
+   of the individual elements of the array will not change. */
+
+static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
+{
+   XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
+   tl_assert(blocks);
+   return StackBlocks__find_and_dealloc__or_add( blocks );
+}
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// GlobalBlocks Persistent Cache                            //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+/* Generate an arbitrary total ordering on GlobalBlocks. */
+static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
+{
+   Word r;
+   /* compare addrs */
+   if (gb1->addr < gb2->addr) return -1;
+   if (gb1->addr > gb2->addr) return 1;
+   /* compare sizes */
+   if (gb1->szB < gb2->szB) return -1;
+   if (gb1->szB > gb2->szB) return 1;
+   /* compare is/is-not array-typed flag */
+   if (gb1->isVec < gb2->isVec) return -1;
+   if (gb1->isVec > gb2->isVec) return 1;
+   /* compare the name */
+   r = (Word)VG_(strcmp)(gb1->name, gb2->name);
+   if (r != 0) return r;
+   /* compare the soname */
+   r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
+   return r;
+}
+
+static WordFM* /* GlobalBlock* -> nothing */
+       globalBlock_set = NULL;
+
+static void init_GlobalBlock_set ( void )
+{
+   tl_assert(!globalBlock_set);
+    globalBlock_set
+       = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free, 
+                     (Word(*)(UWord,UWord))GlobalBlock__cmp );
+   tl_assert(globalBlock_set);
+}
+
+
+/* Top level function for making GlobalBlocks persistent.  Call here
+   with a non-persistent version, and the returned one is guaranteed
+   to be valid for the entire rest of the run.  The supplied one is
+   copied, not stored, so can be freed after the call. */
+
+static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
+{
+   UWord key, val;
+   /* Now, do we have it already? */
+   if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
+      /* yes, return the copy */
+      GlobalBlock* res;
+      tl_assert(val == 0);
+      res = (GlobalBlock*)key;
+      tl_assert(res != orig);
+      return res;
+   } else {
+      /* no.  clone it, store the clone and return the clone's
+         address. */
+      GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
+                                      sizeof(GlobalBlock) );
+      tl_assert(clone);
+      *clone = *orig;
+      VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
+      return clone;
+   }
+}
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// Interval tree of StackTreeBlock                          //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+/* A node in a stack interval tree.  Zero length intervals (.szB == 0)
+   are not allowed.
+
+   A stack interval tree is a (WordFM StackTreeNode* void).  There is
+   one stack interval tree for each thread.
+*/
+typedef
+   struct {
+      Addr        addr;
+      SizeT       szB;   /* copied from .descr->szB */
+      StackBlock* descr; /* it's an instance of this block */
+      UWord       depth; /* depth of stack at time block was pushed */
+   }
+   StackTreeNode;
+
+static void pp_StackTree ( WordFM* sitree, HChar* who )
+{
+   UWord keyW, valW;
+   VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
+   VG_(initIterFM)( sitree );
+   while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
+      StackTreeNode* nd = (StackTreeNode*)keyW;
+      VG_(printf)("  [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
+                  nd->descr, nd->descr->name, nd->descr->szB);
+   }
+   VG_(printf)(">>> END   pp_StackTree %s\n", who );
+}
+
+/* Interval comparison function for StackTreeNode */
+static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
+                                          StackTreeNode* sn2 )
+{
+   return cmp_nonempty_intervals(sn1->addr, sn1->szB,
+                                 sn2->addr, sn2->szB);
+}
+
+/* Find the node holding 'a', if any. */
+static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
+{
+   UWord keyW, valW;
+   StackTreeNode key;
+   tl_assert(sitree);
+   key.addr = a;
+   key.szB  = 1;
+   if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
+      StackTreeNode* res = (StackTreeNode*)keyW;
+      tl_assert(valW == 0);
+      tl_assert(res != &key);
+      return res;
+   } else {
+      return NULL;
+   }
+}
+
+/* Note that the supplied XArray of FrameBlock must have been
+   made persistent already. */
+__attribute__((noinline))
+static void add_blocks_to_StackTree (
+               /*MOD*/WordFM* sitree,
+               XArray* /* FrameBlock */ descrs,
+               XArray* /* Addr */ bases,
+               UWord depth
+            )
+{
+   Bool debug = (Bool)0;
+   Word i, nDescrs, nBases;
+
+   nDescrs = VG_(sizeXA)( descrs ),
+   nBases = VG_(sizeXA)( bases );
+   tl_assert(nDescrs == nBases);
+
+   if (nDescrs == 0) return;
+
+   tl_assert(sitree);
+   if (debug) {
+      VG_(printf)("\n");
+      pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
+   }
+
+   for (i = 0; i < nDescrs; i++) {
+      Bool already_present;
+      StackTreeNode* nyu;
+      Addr        addr  = *(Addr*)VG_(indexXA)( bases, i );
+      StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
+      tl_assert(descr->szB > 0);
+      nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
+      nyu->addr  = addr;
+      nyu->szB   = descr->szB;
+      nyu->descr = descr;
+      nyu->depth = depth;
+      if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
+      already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
+      /* The interval can't already be there; else we have
+         overlapping stack blocks. */
+      tl_assert(!already_present);
+      if (debug) {
+         pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
+      }
+   }
+   if (debug) {
+      pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
+      VG_(printf)("\n");
+   }
+}
+
+static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
+                                        XArray* /* Addr */ bases ) 
+{
+   UWord oldK, oldV;
+   Word i, nBases = VG_(sizeXA)( bases );
+   for (i = 0; i < nBases; i++) {
+      Bool b;
+      Addr addr = *(Addr*)VG_(indexXA)( bases, i );
+      StackTreeNode* nd = find_StackTreeNode(sitree, addr);
+      /* The interval must be there; we added it earlier when
+         the associated frame was created. */
+      tl_assert(nd);
+      b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
+      /* we just found the block! */
+      tl_assert(b);
+      tl_assert(oldV == 0);
+      tl_assert(nd == (StackTreeNode*)oldK);
+      sg_free(nd);
+   }
+}
+
+
+static void delete_StackTree__kFin ( UWord keyW ) {
+   StackTreeNode* nd = (StackTreeNode*)keyW;
+   tl_assert(nd);
+   sg_free(nd);
+}
+static void delete_StackTree__vFin ( UWord valW ) {
+   tl_assert(valW == 0);
+}
+static void delete_StackTree ( WordFM* sitree )
+{
+   VG_(deleteFM)( sitree,
+                 delete_StackTree__kFin, delete_StackTree__vFin );
+}
+
+static WordFM* new_StackTree ( void ) {
+   return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
+                      (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
+}
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// Interval tree of GlobalTreeBlock                         //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+/* A node in a global interval tree.  Zero length intervals 
+   (.szB == 0) are not allowed.
+
+   A global interval tree is a (WordFM GlobalTreeNode* void).  There
+   is one global interval tree for the entire process.
+*/
+typedef
+   struct {
+      Addr         addr; /* copied from .descr->addr */
+      SizeT        szB; /* copied from .descr->szB */
+      GlobalBlock* descr; /* it's this block */
+   }
+   GlobalTreeNode;
+
+static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
+   tl_assert(nd->descr);
+   VG_(printf)("GTNode [%#lx,+%ld) %s", 
+               nd->addr, nd->szB, nd->descr->name);
+}
+
+static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
+                             HChar* who )
+{
+   UWord keyW, valW;
+   GlobalTreeNode* nd;
+   VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
+   VG_(initIterFM)( gitree );
+   while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
+      tl_assert(valW == 0);
+      nd = (GlobalTreeNode*)keyW;
+      VG_(printf)("  ");
+      GlobalTreeNode__pp(nd);
+      VG_(printf)("\n");
+   }
+   VG_(doneIterFM)( gitree );
+   VG_(printf)(">>>\n");
+}
+
+/* Interval comparison function for GlobalTreeNode */
+static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
+                                           GlobalTreeNode* gn2 )
+{
+   return cmp_nonempty_intervals( gn1->addr, gn1->szB,
+                                  gn2->addr, gn2->szB );
+}
+
+/* Find the node holding 'a', if any. */
+static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
+{
+   UWord keyW, valW;
+   GlobalTreeNode key;
+   key.addr = a;
+   key.szB  = 1;
+   if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
+      GlobalTreeNode* res = (GlobalTreeNode*)keyW;
+      tl_assert(valW == 0);
+      tl_assert(res != &key);
+      return res;
+   } else {
+      return NULL;
+   }
+}
+
+/* Note that the supplied GlobalBlock must have been made persistent
+   already. */
+static void add_block_to_GlobalTree (
+               /*MOD*/WordFM* gitree,
+               GlobalBlock* descr
+            )
+{
+   Bool already_present;
+   GlobalTreeNode *nyu, *nd;
+   UWord keyW, valW;
+
+   tl_assert(descr->szB > 0);
+   nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
+   nyu->addr  = descr->addr;
+   nyu->szB   = descr->szB;
+   nyu->descr = descr;
+
+   /* Basically it's an error to add a global block to the tree that
+      is already in the tree.  However, detect and ignore attempts to
+      insert exact duplicates; they do appear for some reason
+      (possible a bug in m_debuginfo?) */
+   already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
+   if (already_present) {
+      tl_assert(valW == 0);
+      nd = (GlobalTreeNode*)keyW;
+      tl_assert(nd);
+      tl_assert(nd != nyu);
+      tl_assert(nd->descr);
+      tl_assert(nyu->descr);
+      if (nd->addr == nyu->addr && nd->szB == nyu->szB
+          /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
+          /* Although it seems reasonable to demand that duplicate
+             blocks have identical names, that is too strict.  For
+             example, reading debuginfo from glibc produces two
+             otherwise identical blocks with names "tzname" and
+             "__tzname".  A constraint of the form "must be identical,
+             or one must be a substring of the other" would fix that.
+             However, such trickery is scuppered by the fact that we
+             truncate all variable names to 15 characters to make
+             storage management simpler, hence giving pairs like
+             "__EI___pthread_" (truncated) vs "__pthread_keys".  So
+             it's simplest just to skip the name comparison
+             completely. */
+          && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
+         /* exact duplicate; ignore it */
+         sg_free(nyu);
+         return;
+      }
+      /* else fall through; the assertion below will catch it */
+   }
+
+   already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
+   /* The interval can't already be there; else we have
+      overlapping global blocks. */
+   if (already_present) {
+      GlobalTree__pp( gitree, "add_block_to_GlobalTree: non-exact duplicate" );
+      VG_(printf)("Overlapping block: ");
+      GlobalTreeNode__pp(nyu);
+      VG_(printf)("\n");
+   }
+   tl_assert(!already_present);
+}
+
+static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
+                                   Addr a, SizeT szB )
+{
+   /* One easy way to do this: look up [a,a+szB) in the tree.  That
+      will either succeed, producing a block which intersects that
+      range, in which case we delete it and repeat; or it will fail,
+      in which case there are no blocks intersecting the range, and we
+      can bring the process to a halt. */
+   UWord keyW, valW, oldK, oldV;
+   GlobalTreeNode key, *nd;
+   Bool b, anyFound;
+
+   tl_assert(szB > 0);
+
+   anyFound = False;
+
+   key.addr = a;
+   key.szB  = szB;
+
+   while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
+      anyFound = True;
+      nd = (GlobalTreeNode*)keyW;
+      tl_assert(valW == 0);
+      tl_assert(nd != &key);
+      tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
+
+      b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
+      tl_assert(b);
+      tl_assert(oldV == 0);
+      tl_assert(oldK == keyW); /* check we deleted the node we just found */
+   }
+
+   return anyFound;
+}
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// Invar                                                    //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+/* An invariant, as resulting from watching the destination of a
+   memory referencing instruction.  Initially is Inv_Unset until the
+   instruction makes a first access. */
+
+typedef
+   enum {
+      Inv_Unset=1,  /* not established yet */
+      Inv_Unknown,  /* unknown location */
+      Inv_Stack0,   /* array-typed stack block in innermost frame */
+      Inv_StackN,   /* array-typed stack block in non-innermost frame */
+      Inv_Global,   /* array-typed global block */
+   }
+   InvarTag;
+
+typedef
+   struct {
+      InvarTag tag;
+      union {
+         struct {
+         } Unset;
+         struct {
+         } Unknown;
+         struct {
+            Addr  addr;
+            SizeT szB;
+            StackBlock* descr;
+         } Stack0; /* innermost stack frame */
+         struct {
+            /* Pointer to a node in the interval tree for
+              this thread. */
+            StackTreeNode* nd;
+         } StackN; /* non-innermost stack frame */
+         struct {
+           /* Pointer to a GlobalBlock in the interval tree of
+              global blocks. */
+           GlobalTreeNode* nd;
+         } Global;
+      }
+      Inv;
+   }
+   Invar;
+
+/* Partial debugging printing for an Invar. */
+static void pp_Invar ( Invar* i )
+{
+   switch (i->tag) {
+      case Inv_Unset: 
+         VG_(printf)("Unset");
+         break;
+      case Inv_Unknown:
+         VG_(printf)("Unknown");
+         break;
+      case Inv_Stack0:
+         VG_(printf)("Stack0 [%#lx,+%lu)",
+                     i->Inv.Stack0.addr, i->Inv.Stack0.szB);
+         break;
+      case Inv_StackN:
+         VG_(printf)("StackN [%#lx,+%lu)",
+                     i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
+         break;
+      case Inv_Global:
+         VG_(printf)("Global [%#lx,+%lu)",
+                     i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
+         break;
+      default:
+         tl_assert(0);
+   }
+}
+
+/* Compare two Invars for equality. */
+static Bool eq_Invar ( Invar* i1, Invar* i2 )
+{
+   tl_assert(i1->tag != Inv_Unset);
+   tl_assert(i2->tag != Inv_Unset);
+   if (i1->tag != i2->tag)
+      return False;
+   switch (i1->tag) {
+      case Inv_Unknown:
+         return True;
+      case Inv_Stack0:
+         return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
+                && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
+      case Inv_StackN:
+         return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
+      case Inv_Global:
+         return i1->Inv.Global.nd == i2->Inv.Global.nd;
+      default:
+         tl_assert(0);
+   }
+   /*NOTREACHED*/
+   tl_assert(0);
+}
+
+/* Print selected parts of an Invar, suitable for use in error
+   messages. */
+static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
+{
+   HChar* str;
+   tl_assert(nBuf >= 96);
+   buf[0] = 0;
+   switch (inv->tag) {
+      case Inv_Unknown:
+         VG_(sprintf)(buf, "%s", "unknown");
+         break;
+      case Inv_Stack0:
+         str = "array";
+         VG_(sprintf)(buf, "stack %s \"%s\" in this frame",
+                      str, inv->Inv.Stack0.descr->name );
+         break;
+      case Inv_StackN:
+         str = "array";
+         VG_(sprintf)(buf, "stack %s \"%s\" in frame %lu back from here",
+                      str, inv->Inv.StackN.nd->descr->name,
+                           depth - inv->Inv.StackN.nd->depth );
+         break;
+      case Inv_Global:
+         str = "array";
+         VG_(sprintf)(buf, "global %s \"%s\" in object with soname \"%s\"",
+                      str, inv->Inv.Global.nd->descr->name,
+                           inv->Inv.Global.nd->descr->soname );
+         break;
+      case Inv_Unset:
+         VG_(sprintf)(buf, "%s", "Unset!");
+         break;
+      default:
+         tl_assert(0);
+   }
+}
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// our globals                                              //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////
+///
+
+#define N_QCACHE 16
+
+/* Powers of two only, else the result will be chaos */
+#define QCACHE_ADVANCE_EVERY 16
+
+/* Per-thread query cache.  Note that the invar can only be Inv_StackN
+   (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
+typedef
+   struct {
+      Addr  addr;
+      SizeT szB;
+      Invar inv;
+   }
+   QCElem;
+
+typedef
+   struct {
+      Word   nInUse;
+      QCElem elems[N_QCACHE];
+   }
+   QCache;
+
+static void QCache__invalidate ( QCache* qc ) {
+   tl_assert(qc->nInUse >= 0);
+   qc->nInUse = 0;
+}
+
+static void QCache__pp ( QCache* qc, HChar* who )
+{
+   Word i;
+   VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
+   for (i = 0; i < qc->nInUse; i++) {
+      VG_(printf)("  [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
+      pp_Invar(&qc->elems[i].inv);
+      VG_(printf)("\n");
+   }
+   VG_(printf)(">>>\n");
+}
+
+static ULong stats__qcache_queries = 0;
+static ULong stats__qcache_misses  = 0;
+static ULong stats__qcache_probes  = 0;
+
+///
+//////////////////////////////////////////////////////////////
+
+/* Each thread has:
+   * a shadow stack of StackFrames, which is a double-linked list
+   * an stack block interval tree
+*/
+static  struct _StackFrame*          shadowStacks[VG_N_THREADS];
+
+static  WordFM* /* StackTreeNode */  siTrees[VG_N_THREADS];
+
+static  QCache                       qcaches[VG_N_THREADS];
+
+
+/* Additionally, there is one global variable interval tree
+   for the entire process.
+*/
+static WordFM* /* GlobalTreeNode */ giTree;
+
+
+static void invalidate_all_QCaches ( void )
+{
+   Word i;
+   for (i = 0; i < VG_N_THREADS; i++) {
+      QCache__invalidate( &qcaches[i] );
+   }
+}
+
+static void ourGlobals_init ( void )
+{
+   Word i;
+   for (i = 0; i < VG_N_THREADS; i++) {
+      shadowStacks[i] = NULL;
+      siTrees[i] = NULL;
+   }
+   invalidate_all_QCaches();
+   giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free, 
+                        (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
+}
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// Handle global variable load/unload events                //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+static void acquire_globals ( ULong di_handle )
+{
+   Word n, i;
+   XArray* /* of GlobalBlock */ gbs;
+   if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
+   gbs = VG_(di_get_global_blocks_from_dihandle)
+            (di_handle, True/*arrays only*/);
+   if (0) VG_(printf)("   GOT %ld globals\n", VG_(sizeXA)( gbs ));
+
+   n = VG_(sizeXA)( gbs );
+   for (i = 0; i < n; i++) {
+      GlobalBlock* gbp;
+      GlobalBlock* gb = VG_(indexXA)( gbs, i );
+      if (0) VG_(printf)("   new Global size %2lu at %#lx:  %s %s\n", 
+                         gb->szB, gb->addr, gb->soname, gb->name );
+      tl_assert(gb->szB > 0);
+      /* Make a persistent copy of each GlobalBlock, and add it
+         to the tree. */
+      gbp = get_persistent_GlobalBlock( gb );
+      add_block_to_GlobalTree( giTree, gbp );
+   }
+
+   VG_(deleteXA)( gbs );
+}
+
+
+/* We only intercept these two because we need to see any di_handles
+   that might arise from the mappings/allocations. */
+void sg_new_mem_mmap( Addr a, SizeT len,
+                      Bool rr, Bool ww, Bool xx, ULong di_handle )
+{
+   if (di_handle > 0)
+      acquire_globals(di_handle);
+}
+void sg_new_mem_startup( Addr a, SizeT len,
+                         Bool rr, Bool ww, Bool xx, ULong di_handle )
+{
+   if (di_handle > 0)
+      acquire_globals(di_handle);
+}
+void sg_die_mem_munmap ( Addr a, SizeT len )
+{
+   Bool debug = (Bool)0;
+   Bool overlap = False;
+
+   if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
+
+   if (len == 0)
+      return;
+
+   overlap = del_GlobalTree_range(giTree, a, len);
+
+   { /* redundant sanity check */
+     UWord keyW, valW;
+     VG_(initIterFM)( giTree );
+     while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
+       GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
+        tl_assert(valW == 0);
+        tl_assert(nd->szB > 0);
+        tl_assert(nd->addr + nd->szB <= a
+                  || a + len <= nd->addr);
+     }
+     VG_(doneIterFM)( giTree );
+   }
+
+   if (!overlap)
+      return;
+
+   /* Ok, the range contained some blocks.  Therefore we'll need to
+      visit all the Invars in all the thread shadow stacks, and
+      convert all Inv_Global{S,V} entries that intersect [a,a+len) to
+      Inv_Unknown. */
+   tl_assert(len > 0);
+   preen_Invars( a, len, False/*!isHeap*/ );
+   invalidate_all_QCaches();
+}
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// StackFrame                                               //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+static ULong stats__total_accesses   = 0;
+static ULong stats__classify_Stack0  = 0;
+static ULong stats__classify_StackN  = 0;
+static ULong stats__classify_Global  = 0;
+static ULong stats__classify_Unknown = 0;
+static ULong stats__Invars_preened   = 0;
+static ULong stats__Invars_changed   = 0;
+static ULong stats__t_i_b_empty      = 0;
+static ULong stats__htab_fast        = 0;
+static ULong stats__htab_searches    = 0;
+static ULong stats__htab_probes      = 0;
+static ULong stats__htab_resizes     = 0;
+
+
+/* A dynamic instance of an instruction */
+typedef
+   struct {
+      /* IMMUTABLE */
+      Addr    insn_addr; /* NB! zero means 'not in use' */
+      XArray* blocks; /* XArray* of StackBlock, or NULL if none */
+      /* MUTABLE */
+      Invar invar;
+   }
+   IInstance;
+
+
+#define N_HTAB_FIXED 64
+
+typedef
+   struct _StackFrame {
+      /* The sp when the frame was created, so we know when to get rid
+         of it. */
+      Addr creation_sp;
+      /* The stack frames for a thread are arranged as a doubly linked
+         list.  Obviously the outermost frame in the stack has .outer
+         as NULL and the innermost in theory has .inner as NULL.
+         However, when a function returns, we don't delete the
+         just-vacated StackFrame.  Instead, it is retained in the list
+         and will be re-used when the next call happens.  This is so
+         as to avoid constantly having to dynamically allocate and
+         deallocate frames. */
+      struct _StackFrame* inner;
+      struct _StackFrame* outer;
+      Word depth; /* 0 for outermost; increases inwards */
+      /* Information for each memory referencing instruction, for this
+         instantiation of the function.  The iinstances array is
+         operated as a simple linear-probe hash table, which is
+         dynamically expanded as necessary.  Once critical thing is
+         that an IInstance with a .insn_addr of zero is interpreted to
+         mean that hash table slot is unused.  This means we can't
+         store an IInstance for address zero. */
+      /* Note that htab initially points to htab_fixed.  If htab_fixed
+         turns out not to be big enough then htab is made to point to
+         dynamically allocated memory.  But it's often the case that
+         htab_fixed is big enough, so this optimisation saves a huge
+         number of sg_malloc/sg_free call pairs. */
+      IInstance* htab;
+      UWord      htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
+      UWord      htab_used; /* number of hash table slots currently in use */
+      /* If this frame is currently making a call, then the following
+         are relevant. */
+      Addr sp_at_call;
+      Addr fp_at_call;
+      XArray* /* of Addr */ blocks_added_by_call;
+      /* See comment just above */
+      IInstance htab_fixed[N_HTAB_FIXED];
+   }
+   StackFrame;
+
+
+
+
+
+/* Move this somewhere else? */
+/* Visit all Invars in the entire system.  If 'isHeap' is True, change
+   all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown.  If
+   'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
+   instead. */
+
+__attribute__((noinline))
+static void preen_Invar ( Invar* inv, Addr a, SizeT len, Bool isHeap )
+{
+   stats__Invars_preened++;
+   tl_assert(len > 0);
+   tl_assert(inv);
+   switch (inv->tag) {
+#if 0
+      case Inv_Heap:
+         tl_assert(inv->Inv.Heap.len > 0);
+         if (isHeap && rangesOverlap(a, len, inv->Inv.Heap.start,
+                                             inv->Inv.Heap.len)) {
+            inv->tag = Inv_Unknown;
+            stats__Invars_changed++;
+         }
+         break;
+      case Inv_GlobalS:
+      case Inv_GlobalV:
+         tl_assert(inv->Inv.Global.len > 0);
+         if ((!isHeap)
+             && rangesOverlap(a, len, inv->Inv.Global.start,
+                                      inv->Inv.Global.len)) {
+            inv->tag = Inv_Unknown;
+            stats__Invars_changed++;
+         }
+         break;
+      case Inv_StackS:
+      case Inv_StackV:
+      case Inv_Unknown:
+         break;
+#endif
+      default: tl_assert(0);
+   }
+}
+
+__attribute__((noinline))
+static void preen_Invars ( Addr a, SizeT len, Bool isHeap )
+{
+  tl_assert(0);
+#if 0
+   Int         i;
+   Word        ixFrames, nFrames;
+   UWord       u;
+   XArray*     stack; /* XArray* of StackFrame */
+   StackFrame* frame;
+   tl_assert(len > 0);
+   tl_assert(0);
+   for (i = 0; i < VG_N_THREADS; i++) {
+tl_assert(0);
+      stack = shadowStacks[i];
+      if (!stack)
+         continue;
+      nFrames = VG_(sizeXA)( stack );
+      for (ixFrames = 0; ixFrames < nFrames; ixFrames++) {
+         UWord xx = 0; /* sanity check only; count of used htab entries */
+         frame = VG_(indexXA)( stack, ixFrames );
+         tl_assert(frame->htab);
+         for (u = 0; u < frame->htab_size; u++) {
+            IInstance* ii = &frame->htab[u];
+            if (ii->insn_addr == 0)
+               continue; /* not in use */
+            preen_Invar( &ii->invar, a, len, isHeap );
+            xx++;           
+         }
+         tl_assert(xx == frame->htab_used);
+      }
+   }
+#endif
+}
+
+
+/* XXX this should be >> 2 on ppc32/64 since the bottom two bits
+   of the ip are guaranteed to be zero */
+inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
+   return (ip >> 0) & (htab_size - 1);
+}
+
+__attribute__((noinline))
+static void initialise_II_hash_table ( StackFrame* sf )
+{
+   UWord i;
+   sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
+   sf->htab = &sf->htab_fixed[0];
+   tl_assert(sf->htab);
+   sf->htab_used = 0;
+   for (i = 0; i < sf->htab_size; i++)
+      sf->htab[i].insn_addr = 0; /* NOT IN USE */
+}
+
+
+__attribute__((noinline))
+static void resize_II_hash_table ( StackFrame* sf )
+{
+   UWord     i, j, ix, old_size, new_size;
+   IInstance *old_htab, *new_htab, *old;
+
+   tl_assert(sf && sf->htab);
+   old_size = sf->htab_size;
+   new_size = 2 * old_size;
+   old_htab = sf->htab;
+   new_htab = sg_malloc( "di.sg_main.rIht.1",
+                         new_size * sizeof(IInstance) );
+   for (i = 0; i < new_size; i++) {
+      new_htab[i].insn_addr = 0; /* NOT IN USE */
+   }
+   for (i = 0; i < old_size; i++) {
+      old = &old_htab[i];
+      if (old->insn_addr == 0 /* NOT IN USE */)
+         continue;
+      ix = compute_II_hash(old->insn_addr, new_size);
+      /* find out where to put this, in the new table */
+      j = new_size;
+      while (1) {
+         if (new_htab[ix].insn_addr == 0)
+            break;
+         /* This can't ever happen, because it would mean the new
+            table is full; that isn't allowed -- even the old table is
+            only allowed to become half full. */
+         tl_assert(j > 0);
+         j--;
+         ix++; if (ix == new_size) ix = 0;
+      }
+      /* copy the old entry to this location */
+      tl_assert(ix < new_size);
+      tl_assert(new_htab[ix].insn_addr == 0);
+      new_htab[ix] = *old;
+      tl_assert(new_htab[ix].insn_addr != 0);
+   }
+   /* all entries copied; free old table. */
+   if (old_htab != &sf->htab_fixed[0])
+      sg_free(old_htab);
+   sf->htab = new_htab;
+   sf->htab_size = new_size;
+   /* check sf->htab_used is correct.  Optional and a bit expensive
+      but anyway: */
+   j = 0;
+   for (i = 0; i < new_size; i++) {
+      if (new_htab[i].insn_addr != 0) {
+         j++;
+      }
+   }
+   tl_assert(j == sf->htab_used);
+   if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
+}
+
+
+__attribute__((noinline))
+static IInstance* find_or_create_IInstance_SLOW (
+                     StackFrame* sf, 
+                     Addr ip,
+                     XArray* /* StackBlock */ ip_frameblocks
+                  )
+{
+   UWord i, ix;
+
+   stats__htab_searches++;
+
+   tl_assert(sf);
+   tl_assert(sf->htab);
+
+   /* Make sure the table loading doesn't get too high. */
+   if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
+      stats__htab_resizes++;
+      resize_II_hash_table(sf);
+   }
+   tl_assert(2 * sf->htab_used <= sf->htab_size);
+  
+   ix = compute_II_hash(ip, sf->htab_size);
+   i = sf->htab_size;
+   while (1) {
+      stats__htab_probes++;
+      /* Note that because of the way the fast-case handler works,
+         these two tests are actually redundant in the first iteration
+         of this loop.  (Except they aren't redundant if the code just
+         above resized the table first. :-) */
+      if (sf->htab[ix].insn_addr == ip)
+         return &sf->htab[ix];
+      if (sf->htab[ix].insn_addr == 0)
+         break;
+      /* If i ever gets to zero and we have found neither what we're
+         looking for nor an empty slot, the table must be full.  Which
+         isn't possible -- we monitor the load factor to ensure it
+         doesn't get above say 50%; if that ever does happen the table
+         is resized. */
+      tl_assert(i > 0);
+      i--;
+      ix++;
+      if (ix == sf->htab_size) ix = 0;
+   }
+
+   /* So now we've found a free slot at ix, and we can use that. */
+   tl_assert(sf->htab[ix].insn_addr == 0);
+
+   /* Add a new record in this slot. */
+   tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
+   sf->htab[ix].insn_addr = ip;
+   sf->htab[ix].blocks    = ip_frameblocks;
+   sf->htab[ix].invar.tag = Inv_Unset;
+   sf->htab_used++;
+   return &sf->htab[ix];
+}
+
+
+inline
+static IInstance* find_or_create_IInstance (
+                     StackFrame* sf, 
+                     Addr ip,
+                     XArray* /* StackBlock */ ip_frameblocks
+                  )
+{
+   UWord ix = compute_II_hash(ip, sf->htab_size);
+   /* Is it in the first slot we come to? */
+   if (LIKELY(sf->htab[ix].insn_addr == ip)) {
+      stats__htab_fast++;
+      return &sf->htab[ix];
+   }
+   /* If the first slot we come to is empty, bag it. */
+   if (LIKELY(sf->htab[ix].insn_addr == 0)) {
+      stats__htab_fast++;
+      tl_assert(ip != 0);
+      sf->htab[ix].insn_addr = ip;
+      sf->htab[ix].blocks    = ip_frameblocks;
+      sf->htab[ix].invar.tag = Inv_Unset;
+      sf->htab_used++;
+      return &sf->htab[ix];
+   }
+   /* Otherwise we hand off to the slow case, which searches other
+      slots, and optionally resizes the table if necessary. */
+   return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
+}
+
+
+__attribute__((noinline))
+static Addr calculate_StackBlock_EA ( StackBlock* descr,
+                                      Addr sp, Addr fp ) {
+   UWord w1 = (UWord)descr->base;
+   UWord w2 = (UWord)(descr->spRel ? sp : fp);
+   UWord ea = w1 + w2;
+   return ea;
+}
+
+/* Given an array of StackBlocks, return an array of Addrs, holding
+   their effective addresses.  Caller deallocates result array. */
+__attribute__((noinline))
+static XArray* /* Addr */ calculate_StackBlock_EAs (
+                             XArray* /* StackBlock */ blocks,
+                             Addr sp, Addr fp
+                          )
+{
+   XArray* res;
+   Word i, n = VG_(sizeXA)( blocks );
+   tl_assert(n > 0);
+   res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
+   for (i = 0; i < n; i++) {
+      StackBlock* blk = VG_(indexXA)( blocks, i );
+      Addr ea = calculate_StackBlock_EA( blk, sp, fp );
+      VG_(addToXA)( res, &ea );
+   }
+   return res;
+}
+
+
+/* Try to classify the block into which a memory access falls, and
+   write the result in 'inv'.  This writes all relevant fields of
+   'inv'. */
+__attribute__((noinline)) 
+static void classify_address ( /*OUT*/Invar* inv,
+                               ThreadId tid,
+                               Addr ea, Addr sp, Addr fp,
+                               UWord szB,
+                               XArray* /* of StackBlock */ thisInstrBlocks )
+{
+   tl_assert(szB > 0);
+   /* First, look in the stack blocks accessible in this instruction's
+      frame. */
+   { 
+     Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
+     if (nBlocks == 0) stats__t_i_b_empty++;
+     for (i = 0; i < nBlocks; i++) {
+        StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
+        Addr bea = calculate_StackBlock_EA( descr, sp, fp );
+        if (bea <= ea && ea + szB <= bea + descr->szB) {
+           /* found it */
+           inv->tag = Inv_Stack0;
+           inv->Inv.Stack0.addr  = bea;
+           inv->Inv.Stack0.szB   = descr->szB;
+           inv->Inv.Stack0.descr = descr;
+           stats__classify_Stack0++;
+           return;
+        }
+     }
+   }
+   /* Look in this thread's query cache */
+   { Word i;
+     QCache* cache = &qcaches[tid];
+     static UWord ctr = 0;
+     stats__qcache_queries++;
+     for (i = 0; i < cache->nInUse; i++) {
+        if (0) /* expensive in a loop like this */
+               tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
+        stats__qcache_probes++;
+        if (is_subinterval_of(cache->elems[i].addr,
+                              cache->elems[i].szB, ea, szB)) {
+           if (i > 0
+               && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
+              QCElem tmp;
+              tmp = cache->elems[i-1];
+              cache->elems[i-1] = cache->elems[i];
+              cache->elems[i] = tmp;
+              i--;
+           }
+           *inv = cache->elems[i].inv;
+           return;
+        }
+     }
+     stats__qcache_misses++;
+   }
+   /* Ok, so it's not a block in the top frame.  Perhaps it's a block
+      in some calling frame?  Consult this thread's stack-block
+      interval tree to find out. */
+   { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
+     /* We know that [ea,ea+1) is in the block, but we need to
+        restrict to the case where the whole access falls within
+        it. */
+     if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
+        nd = NULL;
+     }
+     if (nd) {
+        /* found it */
+        inv->tag = Inv_StackN;
+        inv->Inv.StackN.nd = nd;
+        stats__classify_StackN++;
+        goto out;
+     }
+   }
+   /* Not in a stack block.  Try the global pool. */
+   { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
+     /* We know that [ea,ea+1) is in the block, but we need to
+        restrict to the case where the whole access falls within
+        it. */
+     if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
+        nd = NULL;
+     }
+     if (nd) {
+        /* found it */
+        inv->tag = Inv_Global;
+        inv->Inv.Global.nd = nd;
+        stats__classify_Global++;
+        goto out;
+     }
+   }
+   /* No idea - give up. */
+   inv->tag = Inv_Unknown;
+   stats__classify_Unknown++;
+
+   /* Update the cache */
+  out:
+   { Addr    toadd_addr = 0;
+     SizeT   toadd_szB  = 0;
+     QCache* cache      = &qcaches[tid];
+
+     static UWord ctr = 0;
+     Bool show = False;
+     if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
+
+     if (show) QCache__pp(cache, "before upd");
+
+     switch (inv->tag) {
+        case Inv_Global:
+           toadd_addr = inv->Inv.Global.nd->addr;
+           toadd_szB  = inv->Inv.Global.nd->szB;
+           break;
+        case Inv_StackN:
+           toadd_addr = inv->Inv.StackN.nd->addr;
+           toadd_szB  = inv->Inv.StackN.nd->szB;
+           break;
+        case Inv_Unknown: {
+           /* This is more complex.  We need to figure out the
+              intersection of the "holes" in the global and stack
+              interval trees into which [ea,ea+szB) falls.  This is
+              further complicated by the fact that [ea,ea+szB) might
+              not fall cleanly into a hole; it may instead fall across
+              the boundary of a stack or global block.  In that case
+              we just ignore it and don't update the cache, since we
+              have no way to represent this situation precisely. */
+           StackTreeNode  sNegInf, sPosInf, sKey, *sLB, *sUB;
+           GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
+           Addr gMin, gMax, sMin, sMax, uMin, uMax;
+           Bool sOK, gOK;
+           sNegInf.addr = 0;
+           sNegInf.szB  = 1;
+           sPosInf.addr = ~(UWord)0;
+           sPosInf.szB  = 1;
+           gNegInf.addr = 0;
+           gNegInf.szB  = 1;
+           gPosInf.addr = ~(UWord)0;
+           gPosInf.szB  = 1;
+           sKey.addr = ea;
+           sKey.szB  = szB;
+           gKey.addr = ea;
+           gKey.szB  = szB;
+           if (0) VG_(printf)("Tree sizes %ld %ld\n",
+                              VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
+           sOK = VG_(findBoundsFM)( siTrees[tid], 
+                                    (UWord*)&sLB, (UWord*)&sUB,
+                                    (UWord)&sNegInf, (UWord)&sPosInf,
+                                    (UWord)&sKey );
+           gOK = VG_(findBoundsFM)( giTree,
+                                    (UWord*)&gLB, (UWord*)&gUB,
+                                    (UWord)&gNegInf, (UWord)&gPosInf,
+                                    (UWord)&gKey );
+           if (!(sOK && gOK)) {
+              /* If this happens, then [ea,ea+szB) partially overlaps
+                 a heap or stack block.  We can't represent that, so
+                 just forget it (should be very rare).  However, do
+                 maximum sanity checks first.  In such a
+                 partial overlap case, it can't be the case that both
+                 [ea] and [ea+szB-1] overlap the same block, since if
+                 that were indeed the case then it wouldn't be a
+                 partial overlap; rather it would simply fall inside
+                 that block entirely and we shouldn't be inside this
+                 conditional at all. */
+              if (!sOK) {
+                 StackTreeNode *ndFirst, *ndLast;
+                 ndFirst = find_StackTreeNode( siTrees[tid], ea );
+                 ndLast  = find_StackTreeNode( siTrees[tid], ea+szB-1 );
+                 /* if both ends of the range fall inside a block,
+                    they can't be in the same block. */
+                 if (ndFirst && ndLast)
+                    tl_assert(ndFirst != ndLast);
+                 /* for each end of the range, if it is in a block,
+                    the range as a whole can't be entirely within the
+                    block. */
+                 if (ndFirst)
+                    tl_assert(!is_subinterval_of(ndFirst->addr,
+                                                 ndFirst->szB, ea, szB));
+                 if (ndLast)
+                    tl_assert(!is_subinterval_of(ndLast->addr,
+                                                 ndLast->szB, ea, szB));
+              }
+              if (!gOK) {
+                 GlobalTreeNode *ndFirst, *ndLast;
+                 ndFirst = find_GlobalTreeNode( giTree, ea );
+                 ndLast  = find_GlobalTreeNode( giTree, ea+szB-1 );
+                 /* if both ends of the range fall inside a block,
+                    they can't be in the same block. */
+                 if (ndFirst && ndLast)
+                    tl_assert(ndFirst != ndLast);
+                 /* for each end of the range, if it is in a block,
+                    the range as a whole can't be entirely within the
+                    block. */
+                 if (ndFirst)
+                    tl_assert(!is_subinterval_of(ndFirst->addr,
+                                                 ndFirst->szB, ea, szB));
+                 if (ndLast)
+                    tl_assert(!is_subinterval_of(ndLast->addr,
+                                                 ndLast->szB, ea, szB));
+              }
+              if (0) VG_(printf)("overlapping blocks in cache\n");
+              return;
+           }
+           sMin = sLB == &sNegInf  ? 0         : (sLB->addr + sLB->szB);
+           sMax = sUB == &sPosInf  ? ~(UWord)0 : (sUB->addr - 1);
+           gMin = gLB == &gNegInf  ? 0         : (gLB->addr + gLB->szB);
+           gMax = gUB == &gPosInf  ? ~(UWord)0 : (gUB->addr - 1);
+           if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
+                              sMin, sMax, gMin, gMax);
+           /* [sMin,sMax] and [gMin,gMax] must both contain
+              [ea,ea+szB) (right?)  That implies they must overlap at
+              at least over [ea,ea+szB). */
+           tl_assert(sMin <= ea && ea+szB-1 <= sMax);
+           tl_assert(gMin <= ea && ea+szB-1 <= gMax);
+           /* So now compute their intersection. */
+           uMin = Addr__max( sMin, gMin );
+           uMax = Addr__min( sMax, gMax );
+           if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
+           tl_assert(uMin <= uMax);
+           tl_assert(uMin <= ea && ea+szB-1 <= uMax);
+           /* Finally, we can park [uMin,uMax] in the cache.  However,
+              if uMax is ~0, we can't represent the difference; hence
+              fudge uMax. */
+           if (uMin < uMax && uMax == ~(UWord)0)
+              uMax--;
+           toadd_addr = uMin;
+           toadd_szB  = uMax - uMin + 1;
+           break;
+        }
+        default:
+           /* We should only be caching info for the above 3 cases */
+          tl_assert(0);
+     } /* switch (inv->tag) */
+
+     { /* and actually add this to the cache, finally */
+       Word i;
+       Word ip = cache->nInUse / 2; /* doesn't seem critical */
+
+       if (cache->nInUse < N_QCACHE)
+          cache->nInUse++;
+       for (i = cache->nInUse-1; i > ip; i--) {
+          cache->elems[i] = cache->elems[i-1];
+       }
+
+       tl_assert(toadd_szB > 0);
+       cache->elems[ip].addr = toadd_addr;
+       cache->elems[ip].szB  = toadd_szB;
+       cache->elems[ip].inv  = *inv;
+     }
+
+     if (show) QCache__pp(cache, "after upd");
+
+   }
+}
+
+
+/* CALLED FROM GENERATED CODE */
+static 
+VG_REGPARM(3)
+void helperc__mem_access ( /* Known only at run time: */
+                           Addr ea, Addr sp, Addr fp,
+                           /* Known at translation time: */
+                           Word sszB, Addr ip, XArray* ip_frameBlocks )
+{
+   UWord szB;
+   IInstance* iinstance;
+   Invar* inv;
+   Invar new_inv;
+   ThreadId tid = VG_(get_running_tid)();
+   StackFrame* frame;
+   HChar bufE[128], bufA[128];
+
+   stats__total_accesses++;
+
+   tl_assert(is_sane_TId(tid));
+   frame = shadowStacks[tid];
+   tl_assert(frame);
+
+   /* Find the instance info for this instruction. */
+   tl_assert(ip_frameBlocks);
+   iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
+   tl_assert(iinstance);
+   tl_assert(iinstance->blocks == ip_frameBlocks);
+
+   szB = (sszB < 0) ? (-sszB) : sszB;
+   tl_assert(szB > 0);
+
+   inv = &iinstance->invar;
+
+   /* Deal with first uses of instruction instances. */
+   if (inv->tag == Inv_Unset) {
+      /* This is the first use of this instance of the instruction, so
+         we can't make any check; we merely record what we saw, so we
+         can compare it against what happens for 2nd and subsequent
+         accesses. */
+      classify_address( inv,
+                        tid, ea, sp, fp, szB, iinstance->blocks );
+      tl_assert(inv->tag != Inv_Unset);
+      return;
+   }
+
+   /* So generate an Invar and see if it's different from what
+      we had before. */
+   classify_address( &new_inv,
+                     tid, ea, sp, fp, szB, iinstance->blocks );
+   tl_assert(new_inv.tag != Inv_Unset);
+
+   /* Did we see something different from before?  If no, then there's
+      no error. */
+   if (eq_Invar(&new_inv, inv))
+      return;
+
+   tl_assert(inv->tag != Inv_Unset);
+
+   VG_(memset)(bufE, 0, sizeof(bufE));
+   show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
+
+   VG_(memset)(bufA, 0, sizeof(bufA));
+   show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
+
+   sg_record_error_SorG( tid, ea, sszB, bufE, bufA );
+
+   /* And now install the new observation as "standard", so as to
+      make future error messages make more sense. */
+   *inv = new_inv;
+}
+
+
+////////////////////////////////////////
+/* Primary push-a-new-frame routine.  Called indirectly from
+   generated code. */
+
+static UWord stats__max_sitree_size = 0;
+static UWord stats__max_gitree_size = 0;
+
+static
+void shadowStack_new_frame ( ThreadId tid,
+                             Addr     sp_at_call_insn,
+                             Addr     sp_post_call_insn,
+                             Addr     fp_at_call_insn,
+                             Addr     ip_post_call_insn,
+                             XArray*  descrs_at_call_insn )
+{
+   StackFrame *callee, *caller;
+   tl_assert(is_sane_TId(tid));
+
+   caller = shadowStacks[tid];
+   tl_assert(caller);
+
+   if (caller->outer) { /* "this is not the outermost frame" */
+      tl_assert(caller->outer->inner == caller);
+      tl_assert(caller->outer->depth >= 0);
+      tl_assert(1 + caller->outer->depth == caller->depth);
+   } else {
+      tl_assert(caller->depth == 0);
+   }
+
+   caller->sp_at_call = sp_at_call_insn;
+   caller->fp_at_call = fp_at_call_insn;
+
+   if (descrs_at_call_insn) {
+      tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
+      caller->blocks_added_by_call
+         = calculate_StackBlock_EAs( descrs_at_call_insn,
+                                     sp_at_call_insn, fp_at_call_insn );
+      if (caller->blocks_added_by_call)
+         add_blocks_to_StackTree( siTrees[tid], 
+                                  descrs_at_call_insn,
+                                  caller->blocks_added_by_call,
+                                  caller->depth /* stack depth at which
+                                                   these blocks are
+                                                   considered to exist*/ );
+      if (1) {
+         UWord s  = VG_(sizeFM)( siTrees[tid] );
+         UWord g  = VG_(sizeFM)( giTree );
+         Bool  sb = s > stats__max_sitree_size;
+         Bool  gb = g > stats__max_gitree_size;
+         if (sb) stats__max_sitree_size = s;
+         if (gb) stats__max_gitree_size = g;
+         if (0 && (sb || gb))
+            VG_(message)(Vg_DebugMsg, 
+                         "exp-sgcheck: new max tree sizes: "
+                         "StackTree %ld, GlobalTree %ld",
+                         stats__max_sitree_size, stats__max_gitree_size );
+      }
+   } else {
+      caller->blocks_added_by_call = NULL;
+   }
+
+   /* caller->blocks_added_by_call is used again (and then freed) when
+      this frame is removed from the stack. */
+
+   if (caller->inner) {
+      callee = caller->inner;
+   } else {
+      callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
+      VG_(memset)(callee, 0, sizeof(StackFrame));
+      callee->outer = caller;
+      caller->inner = callee;
+      callee->depth = 1 + caller->depth;
+      tl_assert(callee->inner == NULL);
+   }
+
+   /* This sets up .htab, .htab_size and .htab_used */
+   initialise_II_hash_table( callee );
+
+   callee->creation_sp    = sp_post_call_insn;
+   callee->sp_at_call     = 0; // not actually required ..
+   callee->fp_at_call     = 0; // .. these 3 initialisations are ..
+   callee->blocks_added_by_call = NULL; // .. just for cleanness
+
+   /* record the new running stack frame */
+   shadowStacks[tid] = callee;
+
+   /* and this thread's query cache is now invalid */
+   QCache__invalidate( &qcaches[tid] );
+
+   if (0)
+   { Word d = callee->depth;
+     HChar fnname[80];
+     Bool ok;
+     Addr ip = ip_post_call_insn;
+     ok = VG_(get_fnname_w_offset)( ip, fnname, sizeof(fnname) );
+     while (d > 0) {
+        VG_(printf)(" ");
+        d--;
+     }
+     VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
+   }
+}
+
+/* CALLED FROM GENERATED CODE */
+static
+VG_REGPARM(3)
+void helperc__new_frame ( Addr sp_post_call_insn,
+                          Addr fp_at_call_insn,
+                          Addr ip_post_call_insn,
+                          XArray* blocks_at_call_insn,
+                          Word sp_adjust )
+{
+   ThreadId tid = VG_(get_running_tid)();
+   Addr     sp_at_call_insn = sp_post_call_insn + sp_adjust;
+   shadowStack_new_frame( tid,
+                          sp_at_call_insn,
+                          sp_post_call_insn,
+                          fp_at_call_insn,
+                          ip_post_call_insn,
+                          blocks_at_call_insn );
+}
+
+
+////////////////////////////////////////
+/* Primary remove-frame(s) routine.  Called indirectly from
+   generated code. */
+
+__attribute__((noinline))
+static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
+{
+   StackFrame *innermost, *innermostOrig;
+   tl_assert(is_sane_TId(tid));
+   innermost = shadowStacks[tid];
+   tl_assert(innermost);
+   innermostOrig = innermost;
+   //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
+   while (1) {
+      if (!innermost->outer)
+         break;
+      if (innermost->inner)
+         tl_assert(innermost->inner->outer == innermost);
+      tl_assert(innermost->outer->inner == innermost);
+      tl_assert(innermost->blocks_added_by_call == NULL);
+      if (sp_now <= innermost->creation_sp) break;
+      //VG_(printf)("UNWIND     dump %p\n", innermost->creation_sp);
+      tl_assert(innermost->htab);
+      if (innermost->htab != &innermost->htab_fixed[0])
+         sg_free(innermost->htab);
+      /* be on the safe side */
+      innermost->creation_sp = 0;
+      innermost->htab = NULL;
+      innermost->htab_size = 0;
+      innermost->htab_used = 0;
+      innermost->sp_at_call = 0;
+      innermost->fp_at_call = 0;
+      innermost->blocks_added_by_call = NULL;
+      innermost = innermost->outer;
+
+      /* So now we're "back" in the calling frame.  Remove from this
+         thread's stack-interval-tree, the blocks added at the time of
+         the call. */
+
+      if (innermost->outer) { /* not at the outermost frame */
+         if (innermost->blocks_added_by_call == NULL) {
+         } else {
+            del_blocks_from_StackTree( siTrees[tid],
+                                       innermost->blocks_added_by_call );
+            VG_(deleteXA)( innermost->blocks_added_by_call );
+            innermost->blocks_added_by_call = NULL;
+         }
+      }
+      /* That completes the required tidying of the interval tree
+         associated with the frame we just removed. */
+
+      if (0) {
+         Word d = innermost->depth;
+         while (d > 0) {
+            VG_(printf)(" ");
+            d--;
+         }
+         VG_(printf)("X\n");
+      }
+
+   }
+
+   tl_assert(innermost);
+
+   if (innermost != innermostOrig) {
+      shadowStacks[tid] = innermost;
+      /* this thread's query cache is now invalid */
+      QCache__invalidate( &qcaches[tid] );
+   }
+}
+
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// Instrumentation                                          //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+/* What does instrumentation need to do?
+
+   - at each Call transfer, generate a call to shadowStack_new_frame
+     do this by manually inspecting the IR
+
+   - at each sp change, if the sp change is negative, 
+     call shadowStack_unwind
+     do this by asking for SP-change analysis
+
+   - for each memory referencing instruction,
+     call helperc__mem_access
+*/
+
+/* A complication: sg_ instrumentation and h_ instrumentation need to
+   be interleaved.  Since the latter is a lot more complex than the
+   former, we split the sg_ instrumentation here into four functions
+   and let the h_ instrumenter call the four functions as it goes.
+   Hence the h_ instrumenter drives the sg_ instrumenter.
+
+   To make this viable, the sg_ instrumenter carries what running
+   state it needs in 'struct _SGEnv'.  This is exported only
+   abstractly from this file.
+*/
+
+struct _SGEnv {
+   /* the current insn's IP */
+   Addr64 curr_IP;
+   /* whether the above is actually known */
+   Bool curr_IP_known;
+   /* if we find a mem ref, is it the first for this insn?  Used for
+      detecting insns which make more than one memory ref, a situation
+      we basically can't really handle properly; and so we ignore all
+      but the first ref. */
+   Bool firstRef;
+};
+
+
+/* --- Helper fns for instrumentation --- */
+
+static IRTemp gen_Get_SP ( IRSB*           bbOut,
+                           VexGuestLayout* layout,
+                           Int             hWordTy_szB )
+{
+   IRExpr* sp_expr;
+   IRTemp  sp_temp;
+   IRType  sp_type;
+   /* This in effect forces the host and guest word sizes to be the
+      same. */
+   tl_assert(hWordTy_szB == layout->sizeof_SP);
+   sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
+   sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
+   sp_temp = newIRTemp( bbOut->tyenv, sp_type );
+   addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
+   return sp_temp;
+}
+
+static IRTemp gen_Get_FP ( IRSB*           bbOut,
+                           VexGuestLayout* layout,
+                           Int             hWordTy_szB )
+{
+   IRExpr* fp_expr;
+   IRTemp  fp_temp;
+   IRType  fp_type;
+   /* This in effect forces the host and guest word sizes to be the
+      same. */
+   tl_assert(hWordTy_szB == layout->sizeof_SP);
+   fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
+   fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
+   fp_temp = newIRTemp( bbOut->tyenv, fp_type );
+   addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
+   return fp_temp;
+}
+
+static void instrument_mem_access ( IRSB*   bbOut, 
+                                    IRExpr* addr,
+                                    Int     szB,
+                                    Bool    isStore,
+                                    Int     hWordTy_szB,
+                                    Addr    curr_IP,
+                                    VexGuestLayout* layout )
+{
+   IRType  tyAddr      = Ity_INVALID;
+   XArray* frameBlocks = NULL;
+
+   tl_assert(isIRAtom(addr));
+   tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
+
+   tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
+   tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
+
+#if defined(VGA_x86)
+   { UChar* p = (UChar*)curr_IP;
+     // pop %ebp; RET
+     if (p[-1] == 0x5d && p[0] == 0xc3) return;
+     // pop %ebp; RET $imm16
+     if (p[-1] == 0x5d && p[0] == 0xc2) return;
+     // PUSH %EBP; mov %esp,%ebp
+     if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
+   }
+#endif
+
+   /* First off, find or create the StackBlocks for this instruction. */
+   frameBlocks = get_StackBlocks_for_IP( curr_IP );
+   tl_assert(frameBlocks);
+   //if (VG_(sizeXA)( frameBlocks ) == 0)
+   //   frameBlocks = NULL;
+
+   /* Generate a call to "helperc__mem_access", passing:
+         addr current_SP current_FP szB curr_IP frameBlocks
+   */
+   { IRTemp t_SP = gen_Get_SP( bbOut, layout, hWordTy_szB );
+     IRTemp t_FP = gen_Get_FP( bbOut, layout, hWordTy_szB );
+     IRExpr** args
+        = mkIRExprVec_6( addr,
+                         IRExpr_RdTmp(t_SP),
+                         IRExpr_RdTmp(t_FP),
+                         mkIRExpr_HWord( isStore ? (-szB) : szB ),
+                         mkIRExpr_HWord( curr_IP ),
+                         mkIRExpr_HWord( (HWord)frameBlocks ) );
+     IRDirty* di
+        = unsafeIRDirty_0_N( 3/*regparms*/, 
+                             "helperc__mem_access", 
+                             VG_(fnptr_to_fnentry)( &helperc__mem_access ),
+                             args );
+
+     addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
+   }
+}
+
+
+/* --- Instrumentation main (4 fns) --- */
+
+struct _SGEnv *  sg_instrument_init ( void )
+{
+   struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
+                                   sizeof(struct _SGEnv));
+   tl_assert(env);
+   env->curr_IP       = 0;
+   env->curr_IP_known = False;
+   env->firstRef      = True;
+   return env;
+}
+
+void sg_instrument_fini ( struct _SGEnv * env )
+{
+   sg_free(env);
+}
+
+/* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
+   as required.  This must be called before 'st' itself is added to
+   'sbOut'. */
+void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env, 
+                            /*MOD*/IRSB* sbOut,
+                            IRStmt* st,
+                            VexGuestLayout* layout,
+                            IRType gWordTy, IRType hWordTy )
+{
+   tl_assert(st);
+   tl_assert(isFlatIRStmt(st));
+   switch (st->tag) {
+      case Ist_NoOp:
+      case Ist_AbiHint:
+      case Ist_Put:
+      case Ist_PutI:
+      case Ist_MBE:
+         /* None of these can contain any memory references. */
+         break;
+
+      case Ist_Exit:
+         tl_assert(st->Ist.Exit.jk != Ijk_Call);
+         /* else we must deal with a conditional call */
+         break;
+
+      case Ist_IMark:
+         env->curr_IP_known = True;
+         env->curr_IP       = (Addr)st->Ist.IMark.addr;
+         env->firstRef      = True;
+         break;
+
+      case Ist_Store:
+         tl_assert(env->curr_IP_known);
+         if (env->firstRef) {
+            instrument_mem_access( 
+               sbOut, 
+               st->Ist.Store.addr, 
+               sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
+               True/*isStore*/,
+               sizeofIRType(hWordTy),
+               env->curr_IP, layout
+            );
+            env->firstRef = False;
+         }
+         break;
+
+      case Ist_WrTmp: {
+         IRExpr* data = st->Ist.WrTmp.data;
+         if (data->tag == Iex_Load) {
+            tl_assert(env->curr_IP_known);
+            if (env->firstRef) {
+               instrument_mem_access(
+                  sbOut,
+                  data->Iex.Load.addr,
+                  sizeofIRType(data->Iex.Load.ty),
+                  False/*!isStore*/,
+                  sizeofIRType(hWordTy),
+                  env->curr_IP, layout
+               );
+               env->firstRef = False;
+            }
+         }
+         break;
+      }
+
+      case Ist_Dirty: {
+         Int      dataSize;
+         IRDirty* d = st->Ist.Dirty.details;
+         if (d->mFx != Ifx_None) {
+            /* This dirty helper accesses memory.  Collect the
+               details. */
+            tl_assert(env->curr_IP_known);
+            if (env->firstRef) {
+               tl_assert(d->mAddr != NULL);
+               tl_assert(d->mSize != 0);
+               dataSize = d->mSize;
+               if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
+                  instrument_mem_access( 
+                     sbOut, d->mAddr, dataSize, False/*!isStore*/,
+                     sizeofIRType(hWordTy), env->curr_IP, layout
+                  );
+               }
+               if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
+                  instrument_mem_access( 
+                     sbOut, d->mAddr, dataSize, True/*isStore*/,
+                     sizeofIRType(hWordTy), env->curr_IP, layout
+                  );
+               }
+               env->firstRef = False;
+            }
+         } else {
+            tl_assert(d->mAddr == NULL);
+            tl_assert(d->mSize == 0);
+         }
+         break;
+      }
+
+      default:
+         tl_assert(0);
+
+   } /* switch (st->tag) */
+}
+
+
+/* Add instrumentation for the final jump of an IRSB 'sbOut', and
+   possibly modify 'env' as required.  This must be the last
+   instrumentation statement in the block. */
+void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env, 
+                                /*MOD*/IRSB* sbOut,
+                                IRExpr* next,
+                                IRJumpKind jumpkind,
+                                VexGuestLayout* layout,
+                                IRType gWordTy, IRType hWordTy )
+{
+   if (jumpkind == Ijk_Call) {
+      // Assumes x86 or amd64
+      IRTemp   sp_post_call_insn, fp_post_call_insn;
+      XArray*  frameBlocks;
+      IRExpr** args;
+      IRDirty* di;
+      sp_post_call_insn
+         = gen_Get_SP( sbOut, layout, sizeofIRType(hWordTy) );
+      fp_post_call_insn
+         = gen_Get_FP( sbOut, layout, sizeofIRType(hWordTy) );
+      tl_assert(env->curr_IP_known);
+      frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
+      tl_assert(frameBlocks);
+      if (VG_(sizeXA)(frameBlocks) == 0)
+         frameBlocks = NULL;
+      args
+         = mkIRExprVec_5(
+              IRExpr_RdTmp(sp_post_call_insn),
+              IRExpr_RdTmp(fp_post_call_insn), 
+                         /* assume the call doesn't change FP */
+              next,
+              mkIRExpr_HWord( (HWord)frameBlocks ),
+              mkIRExpr_HWord( sizeofIRType(gWordTy) )
+           );
+      di = unsafeIRDirty_0_N(
+              3/*regparms*/,
+              "helperc__new_frame",
+              VG_(fnptr_to_fnentry)( &helperc__new_frame ),
+              args ); 
+      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
+   }
+}
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// end Instrumentation                                      //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// misc                                                     //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+/* Make a new empty stack frame that is suitable for being the
+   outermost frame in a stack.  It has a creation_sp of effectively
+   infinity, so it can never be removed. */
+static StackFrame* new_root_StackFrame ( void )
+{
+   StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
+   VG_(memset)( sframe, 0, sizeof(*sframe) );
+   sframe->creation_sp = ~0UL;
+
+   /* This sets up .htab, .htab_size and .htab_used */
+   initialise_II_hash_table( sframe );
+
+   /* ->depth, ->outer, ->inner are 0, NULL, NULL */
+
+   return sframe;
+}
+
+/* Primary routine for setting up the shadow stack for a new thread.
+   Note that this is used to create not only child thread stacks, but
+   the root thread's stack too.  We create a new stack with
+   .creation_sp set to infinity, so that the outermost frame can never
+   be removed (by shadowStack_unwind).  The core calls this function
+   as soon as a thread is created.  We cannot yet get its SP value,
+   since that may not yet be set. */
+static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
+{
+   tl_assert(is_sane_TId(child));
+   if (parent == VG_INVALID_THREADID) {
+      /* creating the main thread's stack */
+   } else {
+      tl_assert(is_sane_TId(parent));
+      tl_assert(parent != child);
+      tl_assert(shadowStacks[parent] != NULL);
+      tl_assert(siTrees[parent] != NULL);
+   }
+
+   /* Create the child's stack.  Bear in mind we may be re-using
+      it. */
+   if (shadowStacks[child] == NULL) {
+      /* First use of this stack.  Just allocate an initial frame. */
+      tl_assert(siTrees[child] == NULL);
+   } else {
+      StackFrame *frame, *frame2;
+      /* re-using a stack. */
+      /* get rid of the interval tree */
+      tl_assert(siTrees[child] != NULL);
+      delete_StackTree( siTrees[child] );
+      siTrees[child] = NULL;
+      /* Throw away all existing frames. */
+      frame = shadowStacks[child];
+      while (frame->outer)
+         frame = frame->outer;
+      tl_assert(frame->depth == 0);
+      while (frame) {
+         frame2 = frame->inner;
+         if (frame2) tl_assert(1 + frame->depth == frame2->depth);
+         sg_free(frame);
+         frame = frame2;
+      }
+      shadowStacks[child] = NULL;
+   }
+
+   tl_assert(shadowStacks[child] == NULL);
+   tl_assert(siTrees[child] == NULL);
+
+   /* Set up the initial stack frame. */
+   shadowStacks[child] = new_root_StackFrame();
+
+   /* and set up the child's stack block interval tree. */
+   siTrees[child] = new_StackTree();
+}
+
+/* Once a thread is ready to go, the core calls here.  We take the
+   opportunity to push a second frame on its stack, with the
+   presumably valid SP value that is going to be used for the thread's
+   startup.  Hence we should always wind up with a valid outermost
+   frame for the thread. */
+static void shadowStack_set_initial_SP ( ThreadId tid )
+{
+   StackFrame* sf;
+   tl_assert(is_sane_TId(tid));
+   sf = shadowStacks[tid];
+   tl_assert(sf != NULL);
+   tl_assert(sf->outer == NULL);
+   tl_assert(sf->inner == NULL);
+   tl_assert(sf->creation_sp == ~0UL);
+   shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
+                               0, VG_(get_IP)(tid), NULL );
+}
+
+
+//////////////////////////////////////////////////////////////
+//                                                          //
+// main-ish                                                 //
+//                                                          //
+//////////////////////////////////////////////////////////////
+
+/* CALLED indirectly FROM GENERATED CODE.  Calls here are created by
+   sp-change analysis, as requested in pc_pre_clo_int(). */
+void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
+   ThreadId  tid = VG_(get_running_tid)();
+   shadowStack_unwind( tid, old_SP+len );
+}
+
+void sg_pre_clo_init ( void ) {
+   ourGlobals_init();
+   init_StackBlocks_set();
+   init_GlobalBlock_set();
+}
+
+void sg_post_clo_init ( void ) {
+}
+
+void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
+   shadowStack_thread_create(parent, child);
+}
+
+void sg_pre_thread_first_insn ( ThreadId tid ) {
+   shadowStack_set_initial_SP(tid);
+}
+
+void sg_fini(Int exitcode)
+{
+   if (VG_(clo_verbosity) >= 2) {
+      VG_(message)(Vg_DebugMsg,
+         " sg_:  %'llu total accesses, of which:", stats__total_accesses);
+      VG_(message)(Vg_DebugMsg,
+         " sg_:     stack0: %'12llu classify",
+         stats__classify_Stack0);
+      VG_(message)(Vg_DebugMsg,
+         " sg_:     stackN: %'12llu classify",
+         stats__classify_StackN);
+      VG_(message)(Vg_DebugMsg,
+         " sg_:     global: %'12llu classify",
+         stats__classify_Global);
+      VG_(message)(Vg_DebugMsg,
+         " sg_:    unknown: %'12llu classify",
+         stats__classify_Unknown);
+      VG_(message)(Vg_DebugMsg,
+         " sg_:  %'llu Invars preened, of which %'llu changed",
+         stats__Invars_preened, stats__Invars_changed);
+      VG_(message)(Vg_DebugMsg,
+         " sg_:   t_i_b_MT: %'12llu", stats__t_i_b_empty);
+      VG_(message)(Vg_DebugMsg, 
+         " sg_:     qcache: %'llu searches, %'llu probes, %'llu misses",
+         stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
+      VG_(message)(Vg_DebugMsg, 
+         " sg_:  htab-fast: %'llu hits",
+         stats__htab_fast);
+      VG_(message)(Vg_DebugMsg, 
+         " sg_:  htab-slow: %'llu searches, %'llu probes, %'llu resizes",
+         stats__htab_searches, stats__htab_probes, stats__htab_resizes);
+   }
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                sg_main.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/exp-ptrcheck/sg_main.h b/exp-ptrcheck/sg_main.h
new file mode 100644
index 0000000..b8cfb25
--- /dev/null
+++ b/exp-ptrcheck/sg_main.h
@@ -0,0 +1,77 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Ptrcheck: a pointer-use checker.                             ---*/
+/*--- Exports for stack and global access checking.                ---*/
+/*---                                                    sg_main.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Ptrcheck, a Valgrind tool for checking pointer
+   use in programs.
+
+   Copyright (C) 2008-2008 OpenWorks Ltd
+      info@open-works.co.uk
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __SG_MAIN_H
+
+#define __SG_MAIN_H
+
+void sg_pre_clo_init ( void );
+void sg_post_clo_init ( void );
+void sg_fini(Int exitcode);
+
+void sg_die_mem_stack ( Addr old_SP, SizeT len );
+void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child );
+void sg_pre_thread_first_insn ( ThreadId tid );
+
+void sg_new_mem_mmap( Addr a, SizeT len,
+                      Bool rr, Bool ww, Bool xx, ULong di_handle );
+void sg_new_mem_startup( Addr a, SizeT len,
+                         Bool rr, Bool ww, Bool xx, ULong di_handle );
+void sg_die_mem_munmap ( Addr a, SizeT len );
+
+/* These really ought to be moved elsewhere, so that we don't have to
+   include this file in h_main.c.  See comments in sg_main.c and
+   h_main.c for what this is about. */
+
+struct _SGEnv;  /* abstract export */
+
+struct _SGEnv *  sg_instrument_init ( void );
+
+void sg_instrument_fini ( struct _SGEnv * env );
+
+void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env, 
+                            /*MOD*/IRSB* sbOut,
+                            IRStmt* st,
+                            VexGuestLayout* layout,
+                            IRType gWordTy, IRType hWordTy );
+
+void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env, 
+                                /*MOD*/IRSB* sbOut,
+                                IRExpr* next,
+                                IRJumpKind jumpkind,
+                                VexGuestLayout* layout,
+                                IRType gWordTy, IRType hWordTy );
+#endif
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                sg_main.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/exp-ptrcheck/tests/Makefile.am b/exp-ptrcheck/tests/Makefile.am
new file mode 100644
index 0000000..9b6f46f
--- /dev/null
+++ b/exp-ptrcheck/tests/Makefile.am
@@ -0,0 +1,132 @@
+
+# For AM_FLAG_M3264_PRI
+include $(top_srcdir)/Makefile.flags.am
+
+#SUBDIRS = .
+#if VGP_X86_LINUX
+#SUBDIRS += x86
+#endif
+#if VGP_AMD64_LINUX
+#SUBDIRS += amd64
+#endif
+#if VGP_PPC32_LINUX
+#SUBDIRS += ppc32
+#endif
+#if VGP_PPC64_LINUX
+#SUBDIRS += ppc64
+#endif
+
+#DIST_SUBDIRS = ${VG_ARCH_ALL} .
+
+noinst_SCRIPTS = filter_stderr filter_add filter_suppgen \
+	sh_script
+
+EXTRA_DIST = $(noinst_SCRIPTS) \
+	add.vgtest-disabled add.stderr.exp \
+	and.vgtest-disabled and.stderr.exp \
+	arith.vgtest-disabled arith.stderr.exp \
+	arith_include1.c arith_include2.c \
+	bad_percentify.vgtest bad_percentify.c \
+	bad_percentify.stdout.exp bad_percentify.stderr.exp-glibc28-amd64 \
+	base.vgtest \
+	base.stderr.exp-glibc25-amd64 base.stderr.exp-glibc25-x86 \
+	ccc.vgtest \
+	ccc.stderr.exp-glibc25-x86 ccc.stderr.exp-glibc25-amd64 \
+	ccc.stderr.exp-glibc27-x86 ccc.stderr.exp-glibc28-amd64 \
+	cmp.vgtest-disabled cmp.stderr.exp \
+	globalerr.vgtest globalerr.stdout.exp \
+	globalerr.stderr.exp-glibc28-amd64 \
+	fp.vgtest fp.stderr.exp \
+	hackedbz2.vgtest hackedbz2.stdout.exp \
+	hackedbz2.stderr.exp-glibc28-amd64 \
+	hp_bounds.vgtest hp_bounds.stderr.exp \
+	hp_dangle.vgtest hp_dangle.stderr.exp \
+	idiv.vgtest-disabled idiv.stderr.exp \
+	imul.vgtest-disabled imul.stderr.exp \
+	justify.vgtest justify.stderr.exp \
+	mm.vgtest-disabled mm.stderr.exp \
+	neg.vgtest-disabled neg.stderr.exp \
+	not.vgtest-disabled not.stderr.exp \
+	or.vgtest-disabled or.stderr.exp \
+	partial_bad.vgtest \
+	partial_bad.stderr.exp-glibc25-x86 \
+	partial_bad.stderr.exp-glibc25-amd64 \
+	partial_good.vgtest \
+	partial_good.stderr.exp-glibc25-x86 \
+	partial_good.stderr.exp-glibc25-amd64 \
+	pth_create.vgtest pth_create.stderr.exp \
+	pth_specific.vgtest pth_specific.stderr.exp \
+	realloc.vgtest \
+	realloc.stderr.exp-glibc25-x86 realloc.stderr.exp-glibc25-amd64 \
+	sh_script.vgtest-disabled sh_script.stderr.exp \
+	stackerr.vgtest stackerr.stdout.exp \
+	stackerr.stderr.exp-glibc28-amd64 stackerr.stderr.exp-glibc27-x86 \
+	strcpy.vgtest strcpy.stderr.exp \
+	strlen_bad.vgtest-disabled strlen_bad.stderr.exp \
+	strlen_good.vgtest-disabled strlen_good.stderr.exp \
+	sub.vgtest-disabled sub.stderr.exp \
+	supp.vgtest supp.stderr.exp supp.supp \
+	suppgen.vgtest-disabled suppgen.stderr.exp suppgen.stdin \
+	syscall.vgtest-disabled syscall.stderr.exp \
+	tricky.vgtest tricky.stderr.exp \
+	unaligned.vgtest \
+	unaligned.stderr.exp-glibc25-x86 unaligned.stderr.exp-glibc25-amd64 \
+	xor.vgtest-disabled xor.stderr.exp \
+	zero.vgtest zero.stderr.exp
+
+check_PROGRAMS = \
+	add and arith bad_percentify base ccc cmp fp \
+	globalerr hackedbz2 \
+	hp_bounds hp_dangle idiv imul \
+	justify mm not neg or partial pth_create pth_specific realloc \
+	stackerr \
+	strcpy strlen sub supp syscall tricky unaligned xor zero
+
+AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/include \
+		-I$(top_srcdir)/coregrind -I$(top_builddir)/include \
+		-I@VEX_DIR@/pub
+AM_CFLAGS   = $(WERROR) -Winline -Wall -Wshadow -g $(AM_FLAG_M3264_PRI)
+AM_CXXFLAGS = $(AM_CFLAGS)
+
+# To make it a bit more realistic, build hackedbz2.c with at 
+# least some optimisation.
+hackedbz2_CFLAGS	= $(AM_FLAG_M3264_PRI) $(AM_CFLAGS) -O -g
+
+# C ones
+add_SOURCES		= add.c
+and_SOURCES		= and.c
+arith_SOURCES		= arith.c
+bad_percentify_SOURCES	= bad_percentify.c
+base_SOURCES		= base.c
+cmp_SOURCES		= cmp.c
+fp_SOURCES		= fp.c
+globalerr_SOURCE	= globalerr.c
+hackedbz2_SOURCES	= hackedbz2.c
+hp_bounds_SOURCES	= hp_bounds.c
+hp_dangle_SOURCES	= hp_dangle.c
+idiv_SOURCES		= idiv.c
+imul_SOURCES		= imul.c
+justify_SOURCES		= justify.c
+mm_SOURCES		= mm.c
+neg_SOURCES		= neg.c
+not_SOURCES		= not.c
+or_SOURCES		= or.c
+partial_SOURCES		= partial.c
+pth_create_SOURCES	= pth_create.c
+pth_create_LDADD	= -lpthread
+pth_specific_SOURCES	= pth_specific.c
+pth_specific_LDADD	= -lpthread
+realloc_SOURCES		= realloc.c
+stackerr_SOURCES	= stackerr.c
+strcpy_SOURCES		= strcpy.c
+strlen_SOURCES		= strlen.c
+sub_SOURCES		= sub.c
+supp_SOURCES		= supp.c
+syscall_SOURCES		= syscall.c
+tricky_SOURCES		= tricky.c
+unaligned_SOURCES	= unaligned.c
+xor_SOURCES		= xor.c
+zero_SOURCES		= zero.c
+
+# C++ ones
+ccc_SOURCES		= ccc.cpp
diff --git a/exp-ptrcheck/tests/add.c b/exp-ptrcheck/tests/add.c
new file mode 100644
index 0000000..f861a62
--- /dev/null
+++ b/exp-ptrcheck/tests/add.c
@@ -0,0 +1,40 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <assert.h>
+
+#include "arith_include1.c"
+
+int main(void)
+{
+   #include "arith_include2.c"
+
+   // ADD =========================================================
+   g(+, n,  n,  n);  // det, det
+
+   g(+, n,  p,  p);  // ok, det
+
+   g(+, p,  n,  p);  // ok, det
+
+   g(+, p,  p,  e);  // detected bad add;  det, det
+
+   g(+, n,  un, u);  // undet, undet
+   g(+, n,  up, u);  // ok, undet
+
+   g(+, un, n,  u);  // undet, undet
+   g(+, up, n,  u);  // ok, undet
+
+   g(+, un, un, u);  // undet, undet
+   g(+, un, up, u);  // undet, undet
+   g(+, up, un, u);  // undet, undet
+   g(+, up, up, u);  // undetected bad add; undet, undet
+
+   g(+, un, p,  u);  // undet, undet
+   g(+, up, p,  u);  // undetected bad add; undet, undet
+
+   g(+, p,  un, u);  // undet, undet
+   g(+, p,  up, u);  // undetected bad add; undet, undet
+  
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/add.stderr.exp b/exp-ptrcheck/tests/add.stderr.exp
new file mode 100644
index 0000000..e4e0f11
--- /dev/null
+++ b/exp-ptrcheck/tests/add.stderr.exp
@@ -0,0 +1,70 @@
+
+about to do 14 [0]
+Invalid read of size 4
+   at 0x........: main (add.c:14)
+ Address 0x........ is not derived from any known block
+about to do 14 [-1]
+
+Invalid read of size 4
+   at 0x........: main (add.c:14)
+ Address 0x........ is not derived from any known block
+about to do 16 [0]
+about to do 16 [-1]
+
+Invalid read of size 4
+   at 0x........: main (add.c:16)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+about to do 18 [0]
+about to do 18 [-1]
+
+Invalid read of size 4
+   at 0x........: main (add.c:18)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+
+Invalid arguments to Add32/Add64
+   at 0x........: main (add.c:20)
+ Both args derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+about to do 20 [0]
+
+Invalid read of size 4
+   at 0x........: main (add.c:20)
+ Address 0x........ is not derived from any known block
+about to do 20 [-1]
+
+Invalid read of size 4
+   at 0x........: main (add.c:20)
+ Address 0x........ is not derived from any known block
+about to do 22 [0]
+about to do 22 [-1]
+about to do 23 [0]
+about to do 23 [-1]
+about to do 25 [0]
+about to do 25 [-1]
+about to do 26 [0]
+about to do 26 [-1]
+about to do 28 [0]
+about to do 28 [-1]
+about to do 29 [0]
+about to do 29 [-1]
+about to do 30 [0]
+about to do 30 [-1]
+about to do 31 [0]
+about to do 31 [-1]
+about to do 33 [0]
+about to do 33 [-1]
+about to do 34 [0]
+about to do 34 [-1]
+about to do 36 [0]
+about to do 36 [-1]
+about to do 37 [0]
+about to do 37 [-1]
+
+ERROR SUMMARY: 7 errors from 7 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/add.vgtest-disabled b/exp-ptrcheck/tests/add.vgtest-disabled
new file mode 100644
index 0000000..1fb7c79
--- /dev/null
+++ b/exp-ptrcheck/tests/add.vgtest-disabled
@@ -0,0 +1,2 @@
+prog: add
+stderr_filter: filter_add
diff --git a/exp-ptrcheck/tests/and.c b/exp-ptrcheck/tests/and.c
new file mode 100644
index 0000000..09a1ef5
--- /dev/null
+++ b/exp-ptrcheck/tests/and.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <assert.h>
+
+#include "arith_include1.c"
+
+int main(void)
+{
+   #include "arith_include2.c"
+
+   // Not testing the n&p-->p type cases, too hard to find an 'n' that gives
+   // something that looks like a pointer!  (Eg. if the pointer is
+   // 0x40c38000, masking to 0x40000000 won't give invalid memory, and
+   // masking below that, eg. to 0x00c38000 doesn't give a pointer result.)
+
+   // AND =========================================================
+   g(&, n,  n,  n);  // det, det
+
+   g(&, n,  p,  n);  // det, det
+   g(&, nFF,p,  n);  // ok, det
+
+   g(&, p,  n,  n);  // det, det
+   g(&, p,  nFF,n);  // ok, det
+
+   g(&, p,  p,  p);  // ok, det
+   g(&, p,  p2, e);  // bad AND detected;  det, det
+
+   g(&, n,  un, n);  // det, det
+   g(&, n,  up, n);  // det, det
+
+   g(&, un, n,  n);  // det, det
+   g(&, up, n,  n);  // det, det
+
+   g(&, un, un, u);  // undet, undet
+   g(&, un, up, n);  // det, det
+   g(&, up, un, n);  // det, det
+   g(&, up, up, u);  // ok,  undet
+   g(&, up, up2,u);  // undet, undet 
+
+   g(&, un, p,  n);  // det, det
+   g(&, up, p,  n);  // det, det (result doesn't look like a pointer)
+
+   g(&, p,  un, n);  // det, det
+   g(&, p,  up, u);  // det, det
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/and.stderr.exp b/exp-ptrcheck/tests/and.stderr.exp
new file mode 100644
index 0000000..9ef3730
--- /dev/null
+++ b/exp-ptrcheck/tests/and.stderr.exp
@@ -0,0 +1,184 @@
+
+about to do 19 [0]
+Invalid read of size 4
+   at 0x........: main (and.c:19)
+ Address 0x........ is not derived from any known block
+about to do 19 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:19)
+ Address 0x........ is not derived from any known block
+about to do 21 [0]
+
+Invalid read of size 4
+   at 0x........: main (and.c:21)
+ Address 0x........ is not derived from any known block
+about to do 21 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:21)
+ Address 0x........ is not derived from any known block
+about to do 22 [0]
+about to do 22 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:22)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+about to do 24 [0]
+
+Invalid read of size 4
+   at 0x........: main (and.c:24)
+ Address 0x........ is not derived from any known block
+about to do 24 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:24)
+ Address 0x........ is not derived from any known block
+about to do 25 [0]
+about to do 25 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:25)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+about to do 27 [0]
+about to do 27 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:27)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+
+Invalid arguments to And32/And64
+   at 0x........: main (and.c:28)
+ First arg derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+ Second arg derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+about to do 28 [0]
+
+Invalid read of size 4
+   at 0x........: main (and.c:28)
+ Address 0x........ is not derived from any known block
+about to do 28 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:28)
+ Address 0x........ is not derived from any known block
+about to do 30 [0]
+
+Invalid read of size 4
+   at 0x........: main (and.c:30)
+ Address 0x........ is not derived from any known block
+about to do 30 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:30)
+ Address 0x........ is not derived from any known block
+about to do 31 [0]
+
+Invalid read of size 4
+   at 0x........: main (and.c:31)
+ Address 0x........ is not derived from any known block
+about to do 31 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:31)
+ Address 0x........ is not derived from any known block
+about to do 33 [0]
+
+Invalid read of size 4
+   at 0x........: main (and.c:33)
+ Address 0x........ is not derived from any known block
+about to do 33 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:33)
+ Address 0x........ is not derived from any known block
+about to do 34 [0]
+
+Invalid read of size 4
+   at 0x........: main (and.c:34)
+ Address 0x........ is not derived from any known block
+about to do 34 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:34)
+ Address 0x........ is not derived from any known block
+about to do 36 [0]
+about to do 36 [-1]
+about to do 37 [0]
+
+Invalid read of size 4
+   at 0x........: main (and.c:37)
+ Address 0x........ is not derived from any known block
+about to do 37 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:37)
+ Address 0x........ is not derived from any known block
+about to do 38 [0]
+
+Invalid read of size 4
+   at 0x........: main (and.c:38)
+ Address 0x........ is not derived from any known block
+about to do 38 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:38)
+ Address 0x........ is not derived from any known block
+about to do 39 [0]
+about to do 39 [-1]
+about to do 40 [0]
+about to do 40 [-1]
+about to do 42 [0]
+
+Invalid read of size 4
+   at 0x........: main (and.c:42)
+ Address 0x........ is not derived from any known block
+about to do 42 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:42)
+ Address 0x........ is not derived from any known block
+about to do 43 [0]
+
+Invalid read of size 4
+   at 0x........: main (and.c:43)
+ Address 0x........ is not derived from any known block
+about to do 43 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:43)
+ Address 0x........ is not derived from any known block
+about to do 45 [0]
+
+Invalid read of size 4
+   at 0x........: main (and.c:45)
+ Address 0x........ is not derived from any known block
+about to do 45 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:45)
+ Address 0x........ is not derived from any known block
+about to do 46 [0]
+
+Invalid read of size 4
+   at 0x........: main (and.c:46)
+ Address 0x........ is not derived from any known block
+about to do 46 [-1]
+
+Invalid read of size 4
+   at 0x........: main (and.c:46)
+ Address 0x........ is not derived from any known block
+
+ERROR SUMMARY: 32 errors from 32 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/and.vgtest-disabled b/exp-ptrcheck/tests/and.vgtest-disabled
new file mode 100644
index 0000000..2a27612
--- /dev/null
+++ b/exp-ptrcheck/tests/and.vgtest-disabled
@@ -0,0 +1,2 @@
+prog: and
+stderr_filter: filter_add
diff --git a/exp-ptrcheck/tests/arith.c b/exp-ptrcheck/tests/arith.c
new file mode 100644
index 0000000..32af88c
--- /dev/null
+++ b/exp-ptrcheck/tests/arith.c
@@ -0,0 +1,57 @@
+
+#include <stdlib.h>
+
+typedef unsigned long Ulong;
+
+int main(void)
+{
+   long* x = malloc(sizeof(long) * 10);
+   long* y = malloc(sizeof(long) * 10);
+   long* y2 = y + 3;
+
+   // ok -- same segment
+   long  w = y2 - y;
+
+   // ok -- different heap segments (result can only be used to index off
+   // 'x', but glibc's strcpy() does this...)
+   long* z = (long*)((long)x - (long)y);
+
+   w = (long)y2 + (long)y;           // bad (same segment)
+
+   w = (long)x  & (long)y;           // bad (different segments)
+
+   w = (long)y2 / (long)4;           // bad, but indistinguishable from
+                                     // acceptable '%' cases...
+
+   w = (long)y2 % (long)4;           // ok
+   w = (long)y2 % (long)y;           // bad -- modulor(?) is a pointer
+   w = (long)0xffffffff % (long)y;   // bad -- modulend(?) is a non-pointer
+
+   w = (Ulong)y2 % (Ulong)4;         // ok
+   w = (Ulong)y2 % (Ulong)y;         // bad -- modulor(?) is a pointer
+   w = (Ulong)0xffffffff % (Ulong)y; // bad -- modulend(?) is a non-pointer
+
+   w = (long)y * (long)y2;           // bad
+
+   w = (long)y >> (long)2;           // ok
+   w = (long)y << (long)2;           // ok
+
+   w = (long)y &  0xffff;            // ok
+   w = (long)y |  0xffff;            // ok
+   w = (long)y ^  (long)y2;          // ok
+
+   w = ~((long)y);                   // ok
+
+   w = -((long)y);                   // bad -- operand is a non-polonger
+
+   w = (long)x ^ (long)x;            // xor(ptr,ptr) --> constant (0)
+   z = x + w;                        // ok, because xor result was zero
+
+   w = (long)x ^ ((long)x+1);        // xor(ptr,ptr') --> constant (small)
+   z = x + w;                        // ok, because xor result was constant
+
+   w = (long)x ^ (long)y;            // xor(ptr,ptr') --> constant (small)
+   z = x + w;                        // ok, because xor result was constant
+
+   return (long)z;
+}
diff --git a/exp-ptrcheck/tests/arith.stderr.exp b/exp-ptrcheck/tests/arith.stderr.exp
new file mode 100644
index 0000000..4af05c5
--- /dev/null
+++ b/exp-ptrcheck/tests/arith.stderr.exp
@@ -0,0 +1,79 @@
+
+Invalid ADD
+   at 0x........: main (arith.c:19)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Both args derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith.c:9)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+Invalid AND
+   at 0x........: main (arith.c:21)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First arg derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith.c:9)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Second arg derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith.c:8)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+Invalid IDIV
+   at 0x........: main (arith.c:27)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Both args derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith.c:9)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+Invalid IDIV
+   at 0x........: main (arith.c:28)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First arg not a pointer
+Second arg derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith.c:9)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+Invalid DIV
+   at 0x........: main (arith.c:31)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Both args derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith.c:9)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+Invalid DIV
+   at 0x........: main (arith.c:32)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First arg not a pointer
+Second arg derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith.c:9)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+Invalid IMUL
+   at 0x........: main (arith.c:34)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Both args derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith.c:9)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+ERROR SUMMARY: 7 errors from 7 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/arith.vgtest-disabled b/exp-ptrcheck/tests/arith.vgtest-disabled
new file mode 100644
index 0000000..5a6f607
--- /dev/null
+++ b/exp-ptrcheck/tests/arith.vgtest-disabled
@@ -0,0 +1 @@
+prog: arith
diff --git a/exp-ptrcheck/tests/arith_include1.c b/exp-ptrcheck/tests/arith_include1.c
new file mode 100644
index 0000000..dfbbe6b
--- /dev/null
+++ b/exp-ptrcheck/tests/arith_include1.c
@@ -0,0 +1,17 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <assert.h>
+#include <string.h>
+
+static jmp_buf TTT_jmpbuf;
+
+void SEGV_handler(int signum)
+{
+   //fprintf(stderr, "segv caught\n");
+   __builtin_longjmp(TTT_jmpbuf, 1);
+}
+
+int up[10], up2[10];
+
diff --git a/exp-ptrcheck/tests/arith_include2.c b/exp-ptrcheck/tests/arith_include2.c
new file mode 100644
index 0000000..2cc7e41
--- /dev/null
+++ b/exp-ptrcheck/tests/arith_include2.c
@@ -0,0 +1,41 @@
+
+   // Comment "both" means tp[0] and tp[-1] are both bad.
+   // Otherwise only tp[-1] is bad.
+
+   #define TTT \
+      if (__builtin_setjmp(TTT_jmpbuf) == 0) \
+      { fprintf(stderr,  "about to do %d [0]\n", __LINE__); tn = tp[ 0]; } \
+      if (__builtin_setjmp(TTT_jmpbuf) == 0) \
+      { fprintf(stderr, "about to do %d [-1]\n", __LINE__); tn = tp[-1]; }
+
+   #define b(    a,  c)   tp = (long*)a;                    TTT
+   #define ui(op, a,  c)  tp = (long*)op(long)a;            TTT
+   #define g(op, a,b,c)   tp = (long*)((long)a op (long)b); TTT
+   #define UNU            __attribute__((unused))
+
+   struct sigaction sigsegv;
+   // Scratch values
+   long  a, tn;
+   long* tp;
+   
+   // Known pointers
+   long* p = malloc(sizeof(long)*10);  UNU long* p2 = malloc(sizeof(long)*10);
+   UNU long* pp = p;
+   // Unknown pointers
+//   long up[10], UNU up2[10];
+
+   // Known nonptrs;  make them zero and known
+   long n = a ^ a, UNU n2 = n+1, UNU n7F = 0x7fffffffUL, UNU nFF = ~n;
+   
+   // Unknown nonptrs;  make them zero but unknown
+   long un = 0x01100000UL, UNU un2 = un;
+
+   // Known nonptr, from pointerness range check
+   UNU long nn = 0;
+
+   // Intall SEGV handler 
+   memset(&sigsegv, 0, sizeof(sigsegv));
+   sigsegv.sa_handler = SEGV_handler;
+   sigsegv.sa_flags   = SA_NODEFER; /* so we can handle signal many times */
+   assert( 0 == sigemptyset( &sigsegv.sa_mask ) );
+   assert( 0 == sigaction(SIGSEGV, &sigsegv, NULL) );
diff --git a/exp-ptrcheck/tests/bad_percentify.c b/exp-ptrcheck/tests/bad_percentify.c
new file mode 100644
index 0000000..2567742
--- /dev/null
+++ b/exp-ptrcheck/tests/bad_percentify.c
@@ -0,0 +1,109 @@
+
+/* This demonstrates a stack overrun bug that exp-ptrcheck found while
+   running Valgrind itself (self hosting).  As at 12 Sept 08 this bug
+   is still in Valgrind. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+
+typedef  unsigned long long int  ULong;
+typedef  unsigned int            UInt;
+typedef  signed int              Int;
+typedef  char             Char;
+
+/* ---------------------------------------------------------------------
+   percentify()
+   ------------------------------------------------------------------ */
+
+/* This part excerpted from coregrind/m_libcbase.c */
+
+// Percentify n/m with d decimal places.  Includes the '%' symbol at the end.
+// Right justifies in 'buf'.
+void VG_percentify(ULong n, ULong m, UInt d, Int n_buf, char buf[]) 
+{
+   Int i, len, space;
+   ULong p1;
+   Char fmt[32];
+
+   if (m == 0) {
+      // Have to generate the format string in order to be flexible about
+      // the width of the field.
+      sprintf(fmt, "%%-%ds", n_buf);
+      // fmt is now "%<n_buf>s" where <d> is 1,2,3...
+      sprintf(buf, fmt, "--%");
+      return;
+   }
+   
+   p1 = (100*n) / m;
+    
+   if (d == 0) {
+      sprintf(buf, "%lld%%", p1);
+   } else {
+      ULong p2;
+      UInt  ex;
+      switch (d) {
+      case 1: ex = 10;    break;
+      case 2: ex = 100;   break;
+      case 3: ex = 1000;  break;
+      default: assert(0);
+      /* was: VG_(tool_panic)("Currently can only handle 3 decimal places"); */
+      }
+      p2 = ((100*n*ex) / m) % ex;
+      // Have to generate the format string in order to be flexible about
+      // the width of the post-decimal-point part.
+      sprintf(fmt, "%%lld.%%0%dlld%%%%", d);
+      // fmt is now "%lld.%0<d>lld%%" where <d> is 1,2,3...
+      sprintf(buf, fmt, p1, p2);
+   }
+
+   len = strlen(buf);
+   space = n_buf - len;
+   if (space < 0) space = 0;     /* Allow for v. small field_width */
+   i = len;
+
+   /* Right justify in field */
+   for (     ; i >= 0;    i--)  buf[i + space] = buf[i];
+   for (i = 0; i < space; i++)  buf[i] = ' ';
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Stats                                                ---*/
+/*------------------------------------------------------------*/
+
+/* This part excerpted from coregrind/m_translate.c */
+
+static UInt n_SP_updates_fast            = 0;
+static UInt n_SP_updates_generic_known   = 0;
+static UInt n_SP_updates_generic_unknown = 0;
+
+void VG_print_translation_stats ( void )
+{
+   Char buf[6];
+   UInt n_SP_updates = n_SP_updates_fast + n_SP_updates_generic_known
+                                         + n_SP_updates_generic_unknown;
+   VG_percentify(n_SP_updates_fast, n_SP_updates, 1, 6, buf);
+   printf(
+      "translate:            fast SP updates identified: %'u (%s)\n",
+      n_SP_updates_fast, buf );
+
+   VG_percentify(n_SP_updates_generic_known, n_SP_updates, 1, 6, buf);
+   printf(
+      "translate:   generic_known SP updates identified: %'u (%s)\n",
+      n_SP_updates_generic_known, buf );
+
+   VG_percentify(n_SP_updates_generic_unknown, n_SP_updates, 1, 6, buf);
+   printf(
+      "translate: generic_unknown SP updates identified: %'u (%s)\n",
+      n_SP_updates_generic_unknown, buf );
+}
+
+
+
+int main ( void )
+{
+  VG_print_translation_stats();
+  return 0;
+}
diff --git a/exp-ptrcheck/tests/bad_percentify.stderr.exp-glibc28-amd64 b/exp-ptrcheck/tests/bad_percentify.stderr.exp-glibc28-amd64
new file mode 100644
index 0000000..1cd498c
--- /dev/null
+++ b/exp-ptrcheck/tests/bad_percentify.stderr.exp-glibc28-amd64
@@ -0,0 +1,32 @@
+
+Invalid read of size 1
+   at 0x........: strlen (h_intercepts.c:109)
+   by 0x........: ...
+   by 0x........: ...
+   by 0x........: VG_print_translation_stats (bad_percentify.c:88)
+   by 0x........: main (bad_percentify.c:107)
+ Address 0x........ expected vs actual:
+ Expected: stack array "buf" in frame 3 back from here
+ Actual:   unknown
+
+Invalid read of size 1
+   at 0x........: strlen (h_intercepts.c:109)
+   by 0x........: ...
+   by 0x........: ...
+   by 0x........: VG_print_translation_stats (bad_percentify.c:93)
+   by 0x........: main (bad_percentify.c:107)
+ Address 0x........ expected vs actual:
+ Expected: stack array "buf" in frame 3 back from here
+ Actual:   unknown
+
+Invalid read of size 1
+   at 0x........: strlen (h_intercepts.c:109)
+   by 0x........: ...
+   by 0x........: ...
+   by 0x........: VG_print_translation_stats (bad_percentify.c:98)
+   by 0x........: main (bad_percentify.c:107)
+ Address 0x........ expected vs actual:
+ Expected: stack array "buf" in frame 3 back from here
+ Actual:   unknown
+
+ERROR SUMMARY: 3 errors from 3 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/bad_percentify.stdout.exp b/exp-ptrcheck/tests/bad_percentify.stdout.exp
new file mode 100644
index 0000000..fd99c2c
--- /dev/null
+++ b/exp-ptrcheck/tests/bad_percentify.stdout.exp
@@ -0,0 +1,3 @@
+translate:            fast SP updates identified: 0 (--%   )
+translate:   generic_known SP updates identified: 0 (--%   )
+translate: generic_unknown SP updates identified: 0 (--%   )
diff --git a/exp-ptrcheck/tests/bad_percentify.vgtest b/exp-ptrcheck/tests/bad_percentify.vgtest
new file mode 100644
index 0000000..ab3e21d
--- /dev/null
+++ b/exp-ptrcheck/tests/bad_percentify.vgtest
@@ -0,0 +1 @@
+prog: bad_percentify
diff --git a/exp-ptrcheck/tests/base.c b/exp-ptrcheck/tests/base.c
new file mode 100644
index 0000000..e24b57b
--- /dev/null
+++ b/exp-ptrcheck/tests/base.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <assert.h>
+
+#include "arith_include1.c"
+
+int main(void)
+{
+   #include "arith_include2.c"
+   
+   // Base ========================================================
+   b(p,  p);         // ok
+
+   b(up, u);         // ok
+
+   b(un, u);         // undet
+
+   b(n,  n);         // det
+
+   b(nn, n);         // det
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/base.stderr.exp-glibc25-amd64 b/exp-ptrcheck/tests/base.stderr.exp-glibc25-amd64
new file mode 100644
index 0000000..f6924d4
--- /dev/null
+++ b/exp-ptrcheck/tests/base.stderr.exp-glibc25-amd64
@@ -0,0 +1,35 @@
+
+about to do 14 [0]
+about to do 14 [-1]
+Invalid read of size 8
+   at 0x........: main (base.c:14)
+ Address 0x........ is 8 bytes before the accessing pointer's
+ legitimate range, a block of size 80 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+about to do 16 [0]
+about to do 16 [-1]
+about to do 18 [0]
+about to do 18 [-1]
+about to do 20 [0]
+
+Invalid read of size 8
+   at 0x........: main (base.c:20)
+ Address 0x........ is not derived from any known block
+about to do 20 [-1]
+
+Invalid read of size 8
+   at 0x........: main (base.c:20)
+ Address 0x........ is not derived from any known block
+about to do 22 [0]
+
+Invalid read of size 8
+   at 0x........: main (base.c:22)
+ Address 0x........ is not derived from any known block
+about to do 22 [-1]
+
+Invalid read of size 8
+   at 0x........: main (base.c:22)
+ Address 0x........ is not derived from any known block
+
+ERROR SUMMARY: 5 errors from 5 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/base.stderr.exp-glibc25-x86 b/exp-ptrcheck/tests/base.stderr.exp-glibc25-x86
new file mode 100644
index 0000000..01dc9ff
--- /dev/null
+++ b/exp-ptrcheck/tests/base.stderr.exp-glibc25-x86
@@ -0,0 +1,35 @@
+
+about to do 14 [0]
+about to do 14 [-1]
+Invalid read of size 4
+   at 0x........: main (base.c:14)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+about to do 16 [0]
+about to do 16 [-1]
+about to do 18 [0]
+about to do 18 [-1]
+about to do 20 [0]
+
+Invalid read of size 4
+   at 0x........: main (base.c:20)
+ Address 0x........ is not derived from any known block
+about to do 20 [-1]
+
+Invalid read of size 4
+   at 0x........: main (base.c:20)
+ Address 0x........ is not derived from any known block
+about to do 22 [0]
+
+Invalid read of size 4
+   at 0x........: main (base.c:22)
+ Address 0x........ is not derived from any known block
+about to do 22 [-1]
+
+Invalid read of size 4
+   at 0x........: main (base.c:22)
+ Address 0x........ is not derived from any known block
+
+ERROR SUMMARY: 5 errors from 5 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/base.vgtest b/exp-ptrcheck/tests/base.vgtest
new file mode 100644
index 0000000..4359690
--- /dev/null
+++ b/exp-ptrcheck/tests/base.vgtest
@@ -0,0 +1,2 @@
+prog: base
+stderr_filter: filter_add
diff --git a/exp-ptrcheck/tests/ccc.cpp b/exp-ptrcheck/tests/ccc.cpp
new file mode 100644
index 0000000..e14f956
--- /dev/null
+++ b/exp-ptrcheck/tests/ccc.cpp
@@ -0,0 +1,41 @@
+#include <assert.h>
+#include <stdlib.h>
+#include <malloc.h> // for memalign()
+static __attribute__((noinline)) void bar ( int ); /* fwds */
+int main(void)
+{
+   int  y1, y2, y3, y4, y5, y6, sum = 0;
+   int* x1 = (int*)malloc(sizeof(int));
+   int* x2 = new int;
+   int* x3 = new int[10];
+   int* x4 = (int*)calloc(1, sizeof(int));
+   int* x5 = (int*)memalign(8, sizeof(int));
+   int* x6;  void* v6;
+   int res = posix_memalign(&v6, 8, sizeof(int)); x6 = (int*)v6;
+
+   assert(NULL != x1 && NULL != x2 && NULL != x3 && NULL != x4 &&
+          NULL != x5 && 0 == res);
+   __asm__ __volatile__("":::"memory");
+   // all underruns
+   sum += x1[-1]; __asm__ __volatile__("":::"memory"); bar(1);
+   sum += x2[-1]; __asm__ __volatile__("":::"memory"); bar(2);
+   sum += x3[-1]; __asm__ __volatile__("":::"memory"); bar(3);
+   sum += x4[-1]; __asm__ __volatile__("":::"memory"); bar(4);
+   sum += x5[-1]; __asm__ __volatile__("":::"memory"); bar(5);
+   sum += x6[-1]; __asm__ __volatile__("":::"memory"); bar(6);
+   __asm__ __volatile__("":::"memory");
+   return sum;
+}
+
+/* What's with all this __asm__ __volatile__ stuff?  Well, it's an
+   attempt to get gcc-4.1.2 not to claim the memory references that
+   we're interested in -- x1[-1] through x6[-1] -- appear on different
+   lines than they really do.  By its own rules, gcc can't move code
+   across an __asm__ __volatile__, and the "memory" item says each one
+   clobbers memory in some way which gcc can't know, so that probably
+   (!)  persuades it not to carry memory CSEs around either. */
+
+static __attribute__((noinline)) void bar ( int x )
+{
+   __asm__ __volatile__("":::"memory");
+}
diff --git a/exp-ptrcheck/tests/ccc.stderr.exp-glibc25-amd64 b/exp-ptrcheck/tests/ccc.stderr.exp-glibc25-amd64
new file mode 100644
index 0000000..f11b217
--- /dev/null
+++ b/exp-ptrcheck/tests/ccc.stderr.exp-glibc25-amd64
@@ -0,0 +1,45 @@
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:20)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:8)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:21)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: operator new(unsigned long) (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:9)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:22)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: operator new[](unsigned long) (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:10)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:22)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: calloc (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:11)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:23)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: memalign (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:12)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:24)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: memalign (vg_replace_malloc.c:...)
+   by 0x........: posix_memalign (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:14)
+
+ERROR SUMMARY: 6 errors from 6 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/ccc.stderr.exp-glibc25-x86 b/exp-ptrcheck/tests/ccc.stderr.exp-glibc25-x86
new file mode 100644
index 0000000..ab57a78
--- /dev/null
+++ b/exp-ptrcheck/tests/ccc.stderr.exp-glibc25-x86
@@ -0,0 +1,45 @@
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:20)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:8)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:21)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: operator new(unsigned) (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:9)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:22)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: operator new[](unsigned) (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:10)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:22)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: calloc (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:11)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:23)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: memalign (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:12)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:24)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: memalign (vg_replace_malloc.c:...)
+   by 0x........: posix_memalign (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:14)
+
+ERROR SUMMARY: 6 errors from 6 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/ccc.stderr.exp-glibc27-x86 b/exp-ptrcheck/tests/ccc.stderr.exp-glibc27-x86
new file mode 100644
index 0000000..3e58b97
--- /dev/null
+++ b/exp-ptrcheck/tests/ccc.stderr.exp-glibc27-x86
@@ -0,0 +1,45 @@
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:20)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:8)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:21)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: operator new(unsigned) (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:9)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:22)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: operator new[](unsigned) (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:10)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:23)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: calloc (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:11)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:24)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: memalign (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:12)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:25)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: memalign (vg_replace_malloc.c:...)
+   by 0x........: posix_memalign (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:14)
+
+ERROR SUMMARY: 6 errors from 6 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/ccc.stderr.exp-glibc28-amd64 b/exp-ptrcheck/tests/ccc.stderr.exp-glibc28-amd64
new file mode 100644
index 0000000..64e471a
--- /dev/null
+++ b/exp-ptrcheck/tests/ccc.stderr.exp-glibc28-amd64
@@ -0,0 +1,45 @@
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:20)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:8)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:21)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: operator new(unsigned long) (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:9)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:22)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: operator new[](unsigned long) (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:10)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:23)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: calloc (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:11)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:24)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: memalign (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:12)
+
+Invalid read of size 4
+   at 0x........: main (ccc.cpp:22)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: memalign (vg_replace_malloc.c:...)
+   by 0x........: posix_memalign (vg_replace_malloc.c:...)
+   by 0x........: main (ccc.cpp:14)
+
+ERROR SUMMARY: 6 errors from 6 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/ccc.vgtest b/exp-ptrcheck/tests/ccc.vgtest
new file mode 100644
index 0000000..7c0c728
--- /dev/null
+++ b/exp-ptrcheck/tests/ccc.vgtest
@@ -0,0 +1 @@
+prog: ccc
diff --git a/exp-ptrcheck/tests/cmp.c b/exp-ptrcheck/tests/cmp.c
new file mode 100644
index 0000000..049295b
--- /dev/null
+++ b/exp-ptrcheck/tests/cmp.c
@@ -0,0 +1,27 @@
+#include <stdlib.h>
+
+// The comparisons use SUB instructions, and this can result in having a
+// (nonptr - ptr) situation legitimately;  at one point I was flagging
+// errors when that happened.
+
+int main(void)
+{
+   char* buf = malloc(sizeof(char) * 6);
+
+   // Known zero non-pointer
+   char* nz = (char*)((long)buf^(long)buf);  // known non-pointer
+
+   // Unknown zero nonptr;  make them zero but unknown
+   char* unz;
+   ((char*)&unz)[0] = '\0';
+   ((char*)&unz)[1] = '\0';
+   ((char*)&unz)[2] = '\0';
+   ((char*)&unz)[3] = '\0';
+
+   if (buf == nz)  return 1;
+   if (nz  == buf) return 1;     // --> n - p, but legitimate
+   if (buf == unz) return 1;
+   if (unz == buf) return 1;
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/cmp.stderr.exp b/exp-ptrcheck/tests/cmp.stderr.exp
new file mode 100644
index 0000000..d18786f
--- /dev/null
+++ b/exp-ptrcheck/tests/cmp.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/cmp.vgtest-disabled b/exp-ptrcheck/tests/cmp.vgtest-disabled
new file mode 100644
index 0000000..9b73f14
--- /dev/null
+++ b/exp-ptrcheck/tests/cmp.vgtest-disabled
@@ -0,0 +1 @@
+prog: cmp
diff --git a/exp-ptrcheck/tests/filter_add b/exp-ptrcheck/tests/filter_add
new file mode 100755
index 0000000..ad8fbbc
--- /dev/null
+++ b/exp-ptrcheck/tests/filter_add
@@ -0,0 +1,8 @@
+#! /bin/sh
+
+dir=`dirname $0`
+
+$dir/filter_stderr | 
+
+# Anonymise "before" distances (if greater than 9 bytes)
+sed "s/Address 0x........ is [0-9][0-9]\+ bytes /Address 0x........ is ... bytes /"
diff --git a/exp-ptrcheck/tests/filter_stderr b/exp-ptrcheck/tests/filter_stderr
new file mode 100755
index 0000000..c679ff1
--- /dev/null
+++ b/exp-ptrcheck/tests/filter_stderr
@@ -0,0 +1,34 @@
+#! /bin/sh
+
+dir=`dirname $0`
+
+$dir/../../tests/filter_stderr_basic                    |
+
+# Anonymise addresses
+$dir/../../tests/filter_addresses                       |
+
+$dir/../../tests/filter_test_paths                      |
+
+# Anonymise paths like "(in /foo/bar/libc-baz.so)"
+sed "s/(in \/.*libc.*)$/(in \/...libc...)/"             |
+sed "s/(in \/.*libpthread.*)$/(in \/...libpthread...)/"             |
+
+# Anonymise paths like "__libc_start_main (../foo/bar/libc-quux.c:129)"
+sed "s/__libc_\(.*\) (.*)$/__libc_\1 (...libc...)/" |
+
+# Remove preambly stuff
+sed \
+-e "/^exp-ptrcheck, a heap, stack & global array overrun detector\.$/d" \
+-e "/^NOTE: This is an Experimental-Class Valgrind Tool.$/d"  \
+-e "/^Copyright (C) 2003-200., and GNU GPL'd, by OpenWorks Ltd et al.$/d" |
+
+# Tidy up in cases where glibc (+ libdl + libpthread + ld) have
+# been built with debugging information, hence source locs are present
+sed \
+-e "s/vfprintf ([a-z]*printf.c:[0-9]*)/.../" \
+-e "s/vsprintf ([a-z]*printf.c:[0-9]*)/.../" \
+-e "s/sprintf (sprintf.c:[0-9]*)/.../" \
+-e "s/printf (printf.c:[0-9]*)/.../" \
+-e "s/strdup (strdup.c:[0-9]*)/.../" \
+-e "s/pthread_key_create.c:[0-9]*/in \/...libpthread.../" \
+-e "s/genops.c:[0-9]*/in \/...libc.../"
diff --git a/exp-ptrcheck/tests/filter_suppgen b/exp-ptrcheck/tests/filter_suppgen
new file mode 100755
index 0000000..6a95de5
--- /dev/null
+++ b/exp-ptrcheck/tests/filter_suppgen
@@ -0,0 +1,11 @@
+
+#! /bin/sh
+
+dir=`dirname $0`
+
+$dir/filter_stderr | 
+
+# Anonymise "obj:" path
+sed "s/obj:.*\/annelid\/tests\/supp/obj:*\/annelid\/tests\/supp/"
+
+
diff --git a/exp-ptrcheck/tests/fp.c b/exp-ptrcheck/tests/fp.c
new file mode 100644
index 0000000..8bcf1ad
--- /dev/null
+++ b/exp-ptrcheck/tests/fp.c
@@ -0,0 +1,17 @@
+
+#include <stdlib.h>
+
+int main ( void )
+{
+   double* dp = malloc(sizeof(double));
+   float*  fp = malloc(sizeof(float));
+
+   *dp += 3.0;    // ok
+   *fp += 30.0;   // ok
+   free(dp);
+   free(fp);
+   *dp += 3.0;    // bad, been freed
+   *fp += 30.0;   // bad, been freed
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/fp.stderr.exp b/exp-ptrcheck/tests/fp.stderr.exp
new file mode 100644
index 0000000..bc5032b
--- /dev/null
+++ b/exp-ptrcheck/tests/fp.stderr.exp
@@ -0,0 +1,30 @@
+
+Invalid read of size 8
+   at 0x........: main (fp.c:13)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ once-legitimate range, a block of size 8 free'd
+   at 0x........: free (vg_replace_malloc.c:...)
+   by 0x........: main (fp.c:11)
+
+Invalid write of size 8
+   at 0x........: main (fp.c:13)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ once-legitimate range, a block of size 8 free'd
+   at 0x........: free (vg_replace_malloc.c:...)
+   by 0x........: main (fp.c:11)
+
+Invalid read of size 4
+   at 0x........: main (fp.c:14)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ once-legitimate range, a block of size 4 free'd
+   at 0x........: free (vg_replace_malloc.c:...)
+   by 0x........: main (fp.c:12)
+
+Invalid write of size 4
+   at 0x........: main (fp.c:14)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ once-legitimate range, a block of size 4 free'd
+   at 0x........: free (vg_replace_malloc.c:...)
+   by 0x........: main (fp.c:12)
+
+ERROR SUMMARY: 4 errors from 4 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/fp.vgtest b/exp-ptrcheck/tests/fp.vgtest
new file mode 100644
index 0000000..d178745
--- /dev/null
+++ b/exp-ptrcheck/tests/fp.vgtest
@@ -0,0 +1 @@
+prog: fp
diff --git a/exp-ptrcheck/tests/globalerr.c b/exp-ptrcheck/tests/globalerr.c
new file mode 100644
index 0000000..5b42763
--- /dev/null
+++ b/exp-ptrcheck/tests/globalerr.c
@@ -0,0 +1,15 @@
+
+#include <stdio.h>
+
+short a[7];
+static short b[7];
+
+int main ( void )
+{
+  int i;
+  short sum;
+  for (i = 0; i < 7+1; i++) {
+     sum += a[i] * b[i];
+  }
+  return 1 & ((unsigned int)sum / 1000000);
+}
diff --git a/exp-ptrcheck/tests/globalerr.stderr.exp-glibc28-amd64 b/exp-ptrcheck/tests/globalerr.stderr.exp-glibc28-amd64
new file mode 100644
index 0000000..75c13f7
--- /dev/null
+++ b/exp-ptrcheck/tests/globalerr.stderr.exp-glibc28-amd64
@@ -0,0 +1,14 @@
+
+Invalid read of size 2
+   at 0x........: main (globalerr.c:12)
+ Address 0x........ expected vs actual:
+ Expected: global array "a" in object with soname "NONE"
+ Actual:   unknown
+
+Invalid read of size 2
+   at 0x........: main (globalerr.c:12)
+ Address 0x........ expected vs actual:
+ Expected: global array "b" in object with soname "NONE"
+ Actual:   unknown
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/globalerr.stdout.exp b/exp-ptrcheck/tests/globalerr.stdout.exp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/exp-ptrcheck/tests/globalerr.stdout.exp
diff --git a/exp-ptrcheck/tests/globalerr.vgtest b/exp-ptrcheck/tests/globalerr.vgtest
new file mode 100644
index 0000000..f75fcee
--- /dev/null
+++ b/exp-ptrcheck/tests/globalerr.vgtest
@@ -0,0 +1 @@
+prog: globalerr
diff --git a/exp-ptrcheck/tests/hackedbz2.c b/exp-ptrcheck/tests/hackedbz2.c
new file mode 100644
index 0000000..76b3ad2
--- /dev/null
+++ b/exp-ptrcheck/tests/hackedbz2.c
@@ -0,0 +1,6538 @@
+
+/* This is a very slightly modified version of perf/bz2.c, with a
+   single change that causes it to overrun a global array by one byte.
+   The change in question is a change of the size of myprintf_buf from
+   1000 to 70, at line 1278.  ptrcheck should report exactly one
+   error, resulting from an out of range access to this array. */
+
+// This benchmark is basically bzip2 (mashed to be a single file)
+// compressing and decompressing some data.  It tests Valgrind's handling of
+// realistic and "difficult" (ie. lots of branches and memory accesses)
+// integer code.  Execution is spread out over quite a few basic blocks; 
+// --profile-flags indicates that to get to the top 90%th percentile of
+// dynamic BB counts requires considering the top 51 basic blocks
+
+// This program can be used both as part of the performance test
+// suite, in which case we want it to run for quite a while,
+// and as part of the regression (correctness) test suite, in
+// which case we want it to run quickly and be verbose.
+// So it does the latter iff given a command line arg.
+
+// Licensing: the code within is mostly taken from bzip2, which has a BSD
+// license.  There is a little code from VEX, which is licensed under GPLv2
+// And it's all written by Julian Seward.
+
+#define BZ_NO_STDIO
+
+
+/*-------------------------------------------------------------*/
+/*--- Private header file for the library.                  ---*/
+/*---                                       bzlib_private.h ---*/
+/*-------------------------------------------------------------*/
+
+/*--
+  This file is a part of bzip2 and/or libbzip2, a program and
+  library for lossless, block-sorting data compression.
+
+  Copyright (C) 1996-2004 Julian R Seward.  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+  1. Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+  2. The origin of this software must not be misrepresented; you must 
+     not claim that you wrote the original software.  If you use this 
+     software in a product, an acknowledgment in the product 
+     documentation would be appreciated but is not required.
+
+  3. Altered source versions must be plainly marked as such, and must
+     not be misrepresented as being the original software.
+
+  4. The name of the author may not be used to endorse or promote 
+     products derived from this software without specific prior written 
+     permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  Julian Seward, Cambridge, UK.
+  jseward@bzip.org
+  bzip2/libbzip2 version 1.0 of 21 March 2000
+
+  This program is based on (at least) the work of:
+     Mike Burrows
+     David Wheeler
+     Peter Fenwick
+     Alistair Moffat
+     Radford Neal
+     Ian H. Witten
+     Robert Sedgewick
+     Jon L. Bentley
+
+  For more information on these sources, see the manual.
+--*/
+
+
+#ifndef _BZLIB_PRIVATE_H
+#define _BZLIB_PRIVATE_H
+
+#include <stdlib.h>
+
+#ifndef BZ_NO_STDIO
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#endif
+
+
+/*-------------------------------------------------------------*/
+/*--- Public header file for the library.                   ---*/
+/*---                                               bzlib.h ---*/
+/*-------------------------------------------------------------*/
+
+/*--
+  This file is a part of bzip2 and/or libbzip2, a program and
+  library for lossless, block-sorting data compression.
+
+  Copyright (C) 1996-2004 Julian R Seward.  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+  1. Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+  2. The origin of this software must not be misrepresented; you must 
+     not claim that you wrote the original software.  If you use this 
+     software in a product, an acknowledgment in the product 
+     documentation would be appreciated but is not required.
+
+  3. Altered source versions must be plainly marked as such, and must
+     not be misrepresented as being the original software.
+
+  4. The name of the author may not be used to endorse or promote 
+     products derived from this software without specific prior written 
+     permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  Julian Seward, Cambridge, UK.
+  jseward@bzip.org
+  bzip2/libbzip2 version 1.0 of 21 March 2000
+
+  This program is based on (at least) the work of:
+     Mike Burrows
+     David Wheeler
+     Peter Fenwick
+     Alistair Moffat
+     Radford Neal
+     Ian H. Witten
+     Robert Sedgewick
+     Jon L. Bentley
+
+  For more information on these sources, see the manual.
+--*/
+
+
+#ifndef _BZLIB_H
+#define _BZLIB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BZ_RUN               0
+#define BZ_FLUSH             1
+#define BZ_FINISH            2
+
+#define BZ_OK                0
+#define BZ_RUN_OK            1
+#define BZ_FLUSH_OK          2
+#define BZ_FINISH_OK         3
+#define BZ_STREAM_END        4
+#define BZ_SEQUENCE_ERROR    (-1)
+#define BZ_PARAM_ERROR       (-2)
+#define BZ_MEM_ERROR         (-3)
+#define BZ_DATA_ERROR        (-4)
+#define BZ_DATA_ERROR_MAGIC  (-5)
+#define BZ_IO_ERROR          (-6)
+#define BZ_UNEXPECTED_EOF    (-7)
+#define BZ_OUTBUFF_FULL      (-8)
+#define BZ_CONFIG_ERROR      (-9)
+
+typedef 
+   struct {
+      char *next_in;
+      unsigned int avail_in;
+      unsigned int total_in_lo32;
+      unsigned int total_in_hi32;
+
+      char *next_out;
+      unsigned int avail_out;
+      unsigned int total_out_lo32;
+      unsigned int total_out_hi32;
+
+      void *state;
+
+      void *(*bzalloc)(void *,int,int);
+      void (*bzfree)(void *,void *);
+      void *opaque;
+   } 
+   bz_stream;
+
+
+#ifndef BZ_IMPORT
+#define BZ_EXPORT
+#endif
+
+#ifndef BZ_NO_STDIO
+/* Need a definitition for FILE */
+#include <stdio.h>
+#endif
+
+#ifdef _WIN32
+#   include <windows.h>
+#   ifdef small
+      /* windows.h define small to char */
+#      undef small
+#   endif
+#   ifdef BZ_EXPORT
+#   define BZ_API(func) WINAPI func
+#   define BZ_EXTERN extern
+#   else
+   /* import windows dll dynamically */
+#   define BZ_API(func) (WINAPI * func)
+#   define BZ_EXTERN
+#   endif
+#else
+#   define BZ_API(func) func
+#   define BZ_EXTERN extern
+#endif
+
+
+/*-- Core (low-level) library functions --*/
+
+BZ_EXTERN int BZ_API(BZ2_bzCompressInit) ( 
+      bz_stream* strm, 
+      int        blockSize100k, 
+      int        verbosity, 
+      int        workFactor 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzCompress) ( 
+      bz_stream* strm, 
+      int action 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) ( 
+      bz_stream* strm 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) ( 
+      bz_stream *strm, 
+      int       verbosity, 
+      int       small
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzDecompress) ( 
+      bz_stream* strm 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) ( 
+      bz_stream *strm 
+   );
+
+
+
+/*-- High(er) level library functions --*/
+
+#ifndef BZ_NO_STDIO
+#define BZ_MAX_UNUSED 5000
+
+typedef void BZFILE;
+
+BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) ( 
+      int*  bzerror,   
+      FILE* f, 
+      int   verbosity, 
+      int   small,
+      void* unused,    
+      int   nUnused 
+   );
+
+BZ_EXTERN void BZ_API(BZ2_bzReadClose) ( 
+      int*    bzerror, 
+      BZFILE* b 
+   );
+
+BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) ( 
+      int*    bzerror, 
+      BZFILE* b, 
+      void**  unused,  
+      int*    nUnused 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzRead) ( 
+      int*    bzerror, 
+      BZFILE* b, 
+      void*   buf, 
+      int     len 
+   );
+
+BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) ( 
+      int*  bzerror,      
+      FILE* f, 
+      int   blockSize100k, 
+      int   verbosity, 
+      int   workFactor 
+   );
+
+BZ_EXTERN void BZ_API(BZ2_bzWrite) ( 
+      int*    bzerror, 
+      BZFILE* b, 
+      void*   buf, 
+      int     len 
+   );
+
+BZ_EXTERN void BZ_API(BZ2_bzWriteClose) ( 
+      int*          bzerror, 
+      BZFILE*       b, 
+      int           abandon, 
+      unsigned int* nbytes_in, 
+      unsigned int* nbytes_out 
+   );
+
+BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) ( 
+      int*          bzerror, 
+      BZFILE*       b, 
+      int           abandon, 
+      unsigned int* nbytes_in_lo32, 
+      unsigned int* nbytes_in_hi32, 
+      unsigned int* nbytes_out_lo32, 
+      unsigned int* nbytes_out_hi32
+   );
+#endif
+
+
+/*-- Utility functions --*/
+
+BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) ( 
+      char*         dest, 
+      unsigned int* destLen,
+      char*         source, 
+      unsigned int  sourceLen,
+      int           blockSize100k, 
+      int           verbosity, 
+      int           workFactor 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) ( 
+      char*         dest, 
+      unsigned int* destLen,
+      char*         source, 
+      unsigned int  sourceLen,
+      int           small, 
+      int           verbosity 
+   );
+
+
+/*--
+   Code contributed by Yoshioka Tsuneo
+   (QWF00133@niftyserve.or.jp/tsuneo-y@is.aist-nara.ac.jp),
+   to support better zlib compatibility.
+   This code is not _officially_ part of libbzip2 (yet);
+   I haven't tested it, documented it, or considered the
+   threading-safeness of it.
+   If this code breaks, please contact both Yoshioka and me.
+--*/
+
+BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) (
+      void
+   );
+
+#ifndef BZ_NO_STDIO
+BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) (
+      const char *path,
+      const char *mode
+   );
+
+BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) (
+      int        fd,
+      const char *mode
+   );
+         
+BZ_EXTERN int BZ_API(BZ2_bzread) (
+      BZFILE* b, 
+      void* buf, 
+      int len 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzwrite) (
+      BZFILE* b, 
+      void*   buf, 
+      int     len 
+   );
+
+BZ_EXTERN int BZ_API(BZ2_bzflush) (
+      BZFILE* b
+   );
+
+BZ_EXTERN void BZ_API(BZ2_bzclose) (
+      BZFILE* b
+   );
+
+BZ_EXTERN const char * BZ_API(BZ2_bzerror) (
+      BZFILE *b, 
+      int    *errnum
+   );
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+/*-------------------------------------------------------------*/
+/*--- end                                           bzlib.h ---*/
+/*-------------------------------------------------------------*/
+
+
+
+
+/*-- General stuff. --*/
+
+#define BZ_VERSION  "1.0.3, 17-Oct-2004"
+
+typedef char            Char;
+typedef unsigned char   Bool;
+typedef unsigned char   UChar;
+typedef int             Int32;
+typedef unsigned int    UInt32;
+typedef short           Int16;
+typedef unsigned short  UInt16;
+
+#define True  ((Bool)1)
+#define False ((Bool)0)
+
+#ifndef __GNUC__
+#define __inline__  /* */
+#endif 
+
+#ifndef BZ_NO_STDIO
+extern void BZ2_bz__AssertH__fail ( int errcode );
+#define AssertH(cond,errcode) \
+   { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); }
+#if BZ_DEBUG
+#define AssertD(cond,msg) \
+   { if (!(cond)) {       \
+      fprintf ( stderr,   \
+        "\n\nlibbzip2(debug build): internal error\n\t%s\n", msg );\
+      exit(1); \
+   }}
+#else
+#define AssertD(cond,msg) /* */
+#endif
+#define VPrintf0(zf) \
+   fprintf(stderr,zf)
+#define VPrintf1(zf,za1) \
+   fprintf(stderr,zf,za1)
+#define VPrintf2(zf,za1,za2) \
+   fprintf(stderr,zf,za1,za2)
+#define VPrintf3(zf,za1,za2,za3) \
+   fprintf(stderr,zf,za1,za2,za3)
+#define VPrintf4(zf,za1,za2,za3,za4) \
+   fprintf(stderr,zf,za1,za2,za3,za4)
+#define VPrintf5(zf,za1,za2,za3,za4,za5) \
+   fprintf(stderr,zf,za1,za2,za3,za4,za5)
+#else
+extern void bz_internal_error ( int errcode );
+#define AssertH(cond,errcode) \
+   { if (!(cond)) bz_internal_error ( errcode ); }
+#define AssertD(cond,msg) /* */
+#define VPrintf0(zf) \
+   vex_printf(zf)
+#define VPrintf1(zf,za1) \
+   vex_printf(zf,za1)
+#define VPrintf2(zf,za1,za2) \
+   vex_printf(zf,za1,za2)
+#define VPrintf3(zf,za1,za2,za3) \
+   vex_printf(zf,za1,za2,za3)
+#define VPrintf4(zf,za1,za2,za3,za4) \
+   vex_printf(zf,za1,za2,za3,za4)
+#define VPrintf5(zf,za1,za2,za3,za4,za5) \
+   vex_printf(zf,za1,za2,za3,za4,za5)
+#endif
+
+
+#define BZALLOC(nnn) (strm->bzalloc)(strm->opaque,(nnn),1)
+#define BZFREE(ppp)  (strm->bzfree)(strm->opaque,(ppp))
+
+
+/*-- Header bytes. --*/
+
+#define BZ_HDR_B 0x42   /* 'B' */
+#define BZ_HDR_Z 0x5a   /* 'Z' */
+#define BZ_HDR_h 0x68   /* 'h' */
+#define BZ_HDR_0 0x30   /* '0' */
+  
+/*-- Constants for the back end. --*/
+
+#define BZ_MAX_ALPHA_SIZE 258
+#define BZ_MAX_CODE_LEN    23
+
+#define BZ_RUNA 0
+#define BZ_RUNB 1
+
+#define BZ_N_GROUPS 6
+#define BZ_G_SIZE   50
+#define BZ_N_ITERS  4
+
+#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE))
+
+
+
+/*-- Stuff for randomising repetitive blocks. --*/
+
+extern Int32 BZ2_rNums[512];
+
+#define BZ_RAND_DECLS                          \
+   Int32 rNToGo;                               \
+   Int32 rTPos                                 \
+
+#define BZ_RAND_INIT_MASK                      \
+   s->rNToGo = 0;                              \
+   s->rTPos  = 0                               \
+
+#define BZ_RAND_MASK ((s->rNToGo == 1) ? 1 : 0)
+
+#define BZ_RAND_UPD_MASK                       \
+   if (s->rNToGo == 0) {                       \
+      s->rNToGo = BZ2_rNums[s->rTPos];         \
+      s->rTPos++;                              \
+      if (s->rTPos == 512) s->rTPos = 0;       \
+   }                                           \
+   s->rNToGo--;
+
+
+
+/*-- Stuff for doing CRCs. --*/
+
+extern UInt32 BZ2_crc32Table[256];
+
+#define BZ_INITIALISE_CRC(crcVar)              \
+{                                              \
+   crcVar = 0xffffffffL;                       \
+}
+
+#define BZ_FINALISE_CRC(crcVar)                \
+{                                              \
+   crcVar = ~(crcVar);                         \
+}
+
+#define BZ_UPDATE_CRC(crcVar,cha)              \
+{                                              \
+   crcVar = (crcVar << 8) ^                    \
+            BZ2_crc32Table[(crcVar >> 24) ^    \
+                           ((UChar)cha)];      \
+}
+
+
+
+/*-- States and modes for compression. --*/
+
+#define BZ_M_IDLE      1
+#define BZ_M_RUNNING   2
+#define BZ_M_FLUSHING  3
+#define BZ_M_FINISHING 4
+
+#define BZ_S_OUTPUT    1
+#define BZ_S_INPUT     2
+
+#define BZ_N_RADIX 2
+#define BZ_N_QSORT 12
+#define BZ_N_SHELL 18
+#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2)
+
+
+
+
+/*-- Structure holding all the compression-side stuff. --*/
+
+typedef
+   struct {
+      /* pointer back to the struct bz_stream */
+      bz_stream* strm;
+
+      /* mode this stream is in, and whether inputting */
+      /* or outputting data */
+      Int32    mode;
+      Int32    state;
+
+      /* remembers avail_in when flush/finish requested */
+      UInt32   avail_in_expect;
+
+      /* for doing the block sorting */
+      UInt32*  arr1;
+      UInt32*  arr2;
+      UInt32*  ftab;
+      Int32    origPtr;
+
+      /* aliases for arr1 and arr2 */
+      UInt32*  ptr;
+      UChar*   block;
+      UInt16*  mtfv;
+      UChar*   zbits;
+
+      /* for deciding when to use the fallback sorting algorithm */
+      Int32    workFactor;
+
+      /* run-length-encoding of the input */
+      UInt32   state_in_ch;
+      Int32    state_in_len;
+      BZ_RAND_DECLS;
+
+      /* input and output limits and current posns */
+      Int32    nblock;
+      Int32    nblockMAX;
+      Int32    numZ;
+      Int32    state_out_pos;
+
+      /* map of bytes used in block */
+      Int32    nInUse;
+      Bool     inUse[256];
+      UChar    unseqToSeq[256];
+
+      /* the buffer for bit stream creation */
+      UInt32   bsBuff;
+      Int32    bsLive;
+
+      /* block and combined CRCs */
+      UInt32   blockCRC;
+      UInt32   combinedCRC;
+
+      /* misc administratium */
+      Int32    verbosity;
+      Int32    blockNo;
+      Int32    blockSize100k;
+
+      /* stuff for coding the MTF values */
+      Int32    nMTF;
+      Int32    mtfFreq    [BZ_MAX_ALPHA_SIZE];
+      UChar    selector   [BZ_MAX_SELECTORS];
+      UChar    selectorMtf[BZ_MAX_SELECTORS];
+
+      UChar    len     [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      Int32    code    [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      Int32    rfreq   [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      /* second dimension: only 3 needed; 4 makes index calculations faster */
+      UInt32   len_pack[BZ_MAX_ALPHA_SIZE][4];
+
+   }
+   EState;
+
+
+
+/*-- externs for compression. --*/
+
+extern void 
+BZ2_blockSort ( EState* );
+
+extern void 
+BZ2_compressBlock ( EState*, Bool );
+
+extern void 
+BZ2_bsInitWrite ( EState* );
+
+extern void 
+BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 );
+
+extern void 
+BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 );
+
+
+
+/*-- states for decompression. --*/
+
+#define BZ_X_IDLE        1
+#define BZ_X_OUTPUT      2
+
+#define BZ_X_MAGIC_1     10
+#define BZ_X_MAGIC_2     11
+#define BZ_X_MAGIC_3     12
+#define BZ_X_MAGIC_4     13
+#define BZ_X_BLKHDR_1    14
+#define BZ_X_BLKHDR_2    15
+#define BZ_X_BLKHDR_3    16
+#define BZ_X_BLKHDR_4    17
+#define BZ_X_BLKHDR_5    18
+#define BZ_X_BLKHDR_6    19
+#define BZ_X_BCRC_1      20
+#define BZ_X_BCRC_2      21
+#define BZ_X_BCRC_3      22
+#define BZ_X_BCRC_4      23
+#define BZ_X_RANDBIT     24
+#define BZ_X_ORIGPTR_1   25
+#define BZ_X_ORIGPTR_2   26
+#define BZ_X_ORIGPTR_3   27
+#define BZ_X_MAPPING_1   28
+#define BZ_X_MAPPING_2   29
+#define BZ_X_SELECTOR_1  30
+#define BZ_X_SELECTOR_2  31
+#define BZ_X_SELECTOR_3  32
+#define BZ_X_CODING_1    33
+#define BZ_X_CODING_2    34
+#define BZ_X_CODING_3    35
+#define BZ_X_MTF_1       36
+#define BZ_X_MTF_2       37
+#define BZ_X_MTF_3       38
+#define BZ_X_MTF_4       39
+#define BZ_X_MTF_5       40
+#define BZ_X_MTF_6       41
+#define BZ_X_ENDHDR_2    42
+#define BZ_X_ENDHDR_3    43
+#define BZ_X_ENDHDR_4    44
+#define BZ_X_ENDHDR_5    45
+#define BZ_X_ENDHDR_6    46
+#define BZ_X_CCRC_1      47
+#define BZ_X_CCRC_2      48
+#define BZ_X_CCRC_3      49
+#define BZ_X_CCRC_4      50
+
+
+
+/*-- Constants for the fast MTF decoder. --*/
+
+#define MTFA_SIZE 4096
+#define MTFL_SIZE 16
+
+
+
+/*-- Structure holding all the decompression-side stuff. --*/
+
+typedef
+   struct {
+      /* pointer back to the struct bz_stream */
+      bz_stream* strm;
+
+      /* state indicator for this stream */
+      Int32    state;
+
+      /* for doing the final run-length decoding */
+      UChar    state_out_ch;
+      Int32    state_out_len;
+      Bool     blockRandomised;
+      BZ_RAND_DECLS;
+
+      /* the buffer for bit stream reading */
+      UInt32   bsBuff;
+      Int32    bsLive;
+
+      /* misc administratium */
+      Int32    blockSize100k;
+      Bool     smallDecompress;
+      Int32    currBlockNo;
+      Int32    verbosity;
+
+      /* for undoing the Burrows-Wheeler transform */
+      Int32    origPtr;
+      UInt32   tPos;
+      Int32    k0;
+      Int32    unzftab[256];
+      Int32    nblock_used;
+      Int32    cftab[257];
+      Int32    cftabCopy[257];
+
+      /* for undoing the Burrows-Wheeler transform (FAST) */
+      UInt32   *tt;
+
+      /* for undoing the Burrows-Wheeler transform (SMALL) */
+      UInt16   *ll16;
+      UChar    *ll4;
+
+      /* stored and calculated CRCs */
+      UInt32   storedBlockCRC;
+      UInt32   storedCombinedCRC;
+      UInt32   calculatedBlockCRC;
+      UInt32   calculatedCombinedCRC;
+
+      /* map of bytes used in block */
+      Int32    nInUse;
+      Bool     inUse[256];
+      Bool     inUse16[16];
+      UChar    seqToUnseq[256];
+
+      /* for decoding the MTF values */
+      UChar    mtfa   [MTFA_SIZE];
+      Int32    mtfbase[256 / MTFL_SIZE];
+      UChar    selector   [BZ_MAX_SELECTORS];
+      UChar    selectorMtf[BZ_MAX_SELECTORS];
+      UChar    len  [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+
+      Int32    limit  [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      Int32    base   [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      Int32    perm   [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+      Int32    minLens[BZ_N_GROUPS];
+
+      /* save area for scalars in the main decompress code */
+      Int32    save_i;
+      Int32    save_j;
+      Int32    save_t;
+      Int32    save_alphaSize;
+      Int32    save_nGroups;
+      Int32    save_nSelectors;
+      Int32    save_EOB;
+      Int32    save_groupNo;
+      Int32    save_groupPos;
+      Int32    save_nextSym;
+      Int32    save_nblockMAX;
+      Int32    save_nblock;
+      Int32    save_es;
+      Int32    save_N;
+      Int32    save_curr;
+      Int32    save_zt;
+      Int32    save_zn; 
+      Int32    save_zvec;
+      Int32    save_zj;
+      Int32    save_gSel;
+      Int32    save_gMinlen;
+      Int32*   save_gLimit;
+      Int32*   save_gBase;
+      Int32*   save_gPerm;
+
+   }
+   DState;
+
+
+
+/*-- Macros for decompression. --*/
+
+#define BZ_GET_FAST(cccc)                     \
+    s->tPos = s->tt[s->tPos];                 \
+    cccc = (UChar)(s->tPos & 0xff);           \
+    s->tPos >>= 8;
+
+#define BZ_GET_FAST_C(cccc)                   \
+    c_tPos = c_tt[c_tPos];                    \
+    cccc = (UChar)(c_tPos & 0xff);            \
+    c_tPos >>= 8;
+
+#define SET_LL4(i,n)                                          \
+   { if (((i) & 0x1) == 0)                                    \
+        s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0xf0) | (n); else    \
+        s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0x0f) | ((n) << 4);  \
+   }
+
+#define GET_LL4(i)                             \
+   ((((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4)) & 0xF)
+
+#define SET_LL(i,n)                          \
+   { s->ll16[i] = (UInt16)(n & 0x0000ffff);  \
+     SET_LL4(i, n >> 16);                    \
+   }
+
+#define GET_LL(i) \
+   (((UInt32)s->ll16[i]) | (GET_LL4(i) << 16))
+
+#define BZ_GET_SMALL(cccc)                            \
+      cccc = BZ2_indexIntoF ( s->tPos, s->cftab );    \
+      s->tPos = GET_LL(s->tPos);
+
+
+/*-- externs for decompression. --*/
+
+extern Int32 
+BZ2_indexIntoF ( Int32, Int32* );
+
+extern Int32 
+BZ2_decompress ( DState* );
+
+extern void 
+BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*,
+                           Int32,  Int32, Int32 );
+
+
+#endif
+
+
+/*-- BZ_NO_STDIO seems to make NULL disappear on some platforms. --*/
+
+#ifdef BZ_NO_STDIO
+#ifndef NULL
+#define NULL 0
+#endif
+#endif
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                   bzlib_private.h ---*/
+/*-------------------------------------------------------------*/
+
+
+/* Something which has the same size as void* on the host.  That is,
+   it is 32 bits on a 32-bit host and 64 bits on a 64-bit host, and so
+   it can safely be coerced to and from a pointer type on the host
+   machine. */
+typedef  unsigned long HWord;
+typedef  char          HChar;
+typedef  signed int    Int;
+typedef  unsigned int  UInt;
+
+typedef    signed long long int   Long;
+typedef  unsigned long long int   ULong;
+
+
+/////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////
+
+static HWord (*serviceFn)(HWord,HWord) = 0;
+
+#if 0
+static char* my_strcpy ( char* dest, const char* src )
+{
+   char* dest_orig = dest;
+   while (*src) *dest++ = *src++;
+   *dest = 0;
+   return dest_orig;
+}
+
+static void* my_memcpy ( void *dest, const void *src, int sz )
+{
+   const char *s = (const char *)src;
+   char *d = (char *)dest;
+
+   while (sz--)
+      *d++ = *s++;
+
+   return dest;
+}
+
+static void* my_memmove( void *dst, const void *src, unsigned int len )
+{
+    register char *d;
+    register char *s;
+    if ( dst > src ) {
+        d = (char *)dst + len - 1;
+        s = (char *)src + len - 1;
+        while ( len >= 4 ) {
+            *d-- = *s--;
+            *d-- = *s--;
+            *d-- = *s--;
+            *d-- = *s--;
+            len -= 4;
+        }
+        while ( len-- ) {
+            *d-- = *s--;
+        }
+    } else if ( dst < src ) {
+        d = (char *)dst;
+        s = (char *)src;
+        while ( len >= 4 ) {
+            *d++ = *s++;
+            *d++ = *s++;
+            *d++ = *s++;
+            *d++ = *s++;
+            len -= 4;
+        }
+        while ( len-- ) {
+            *d++ = *s++;
+        }
+    }
+    return dst;
+}
+#endif
+
+char* my_strcat ( char* dest, const char* src )
+{
+   char* dest_orig = dest;
+   while (*dest) dest++;
+   while (*src) *dest++ = *src++;
+   *dest = 0;
+   return dest_orig;
+}
+
+
+/////////////////////////////////////////////////////////////////////
+
+static void vex_log_bytes ( char* p, int n )
+{
+   int i;
+   for (i = 0; i < n; i++)
+      (*serviceFn)( 1, (int)p[i] );
+}
+
+/*---------------------------------------------------------*/
+/*--- vex_printf                                        ---*/
+/*---------------------------------------------------------*/
+
+/* This should be the only <...> include in the entire VEX library.
+   New code for vex_util.c should go above this point. */
+#include <stdarg.h>
+
+static HChar vex_toupper ( HChar c )
+{
+   if (c >= 'a' && c <= 'z')
+      return c + ('A' - 'a');
+   else
+      return c;
+}
+
+static Int vex_strlen ( const HChar* str )
+{
+   Int i = 0;
+   while (str[i] != 0) i++;
+   return i;
+}
+
+Bool vex_streq ( const HChar* s1, const HChar* s2 )
+{
+   while (True) {
+      if (*s1 == 0 && *s2 == 0)
+         return True;
+      if (*s1 != *s2)
+         return False;
+      s1++;
+      s2++;
+   }
+}
+
+/* Some flags.  */
+#define VG_MSG_SIGNED    1 /* The value is signed. */
+#define VG_MSG_ZJUSTIFY  2 /* Must justify with '0'. */
+#define VG_MSG_LJUSTIFY  4 /* Must justify on the left. */
+#define VG_MSG_PAREN     8 /* Parenthesize if present (for %y) */
+#define VG_MSG_COMMA    16 /* Add commas to numbers (for %d, %u) */
+
+/* Copy a string into the buffer. */
+static UInt
+myvprintf_str ( void(*send)(HChar), Int flags, Int width, HChar* str, 
+                Bool capitalise )
+{
+#  define MAYBE_TOUPPER(ch) (capitalise ? vex_toupper(ch) : (ch))
+   UInt ret = 0;
+   Int i, extra;
+   Int len = vex_strlen(str);
+
+   if (width == 0) {
+      ret += len;
+      for (i = 0; i < len; i++)
+         send(MAYBE_TOUPPER(str[i]));
+      return ret;
+   }
+
+   if (len > width) {
+      ret += width;
+      for (i = 0; i < width; i++)
+         send(MAYBE_TOUPPER(str[i]));
+      return ret;
+   }
+
+   extra = width - len;
+   if (flags & VG_MSG_LJUSTIFY) {
+      ret += extra;
+      for (i = 0; i < extra; i++)
+         send(' ');
+   }
+   ret += len;
+   for (i = 0; i < len; i++)
+      send(MAYBE_TOUPPER(str[i]));
+   if (!(flags & VG_MSG_LJUSTIFY)) {
+      ret += extra;
+      for (i = 0; i < extra; i++)
+         send(' ');
+   }
+
+#  undef MAYBE_TOUPPER
+
+   return ret;
+}
+
+/* Write P into the buffer according to these args:
+ *  If SIGN is true, p is a signed.
+ *  BASE is the base.
+ *  If WITH_ZERO is true, '0' must be added.
+ *  WIDTH is the width of the field.
+ */
+static UInt
+myvprintf_int64 ( void(*send)(HChar), Int flags, Int base, Int width, ULong pL)
+{
+   HChar buf[40];
+   Int   ind = 0;
+   Int   i, nc = 0;
+   Bool  neg = False;
+   HChar *digits = "0123456789ABCDEF";
+   UInt  ret = 0;
+   UInt  p = (UInt)pL;
+
+   if (base < 2 || base > 16)
+      return ret;
+ 
+   if ((flags & VG_MSG_SIGNED) && (Int)p < 0) {
+      p   = - (Int)p;
+      neg = True;
+   }
+
+   if (p == 0)
+      buf[ind++] = '0';
+   else {
+      while (p > 0) {
+         if ((flags & VG_MSG_COMMA) && 10 == base &&
+             0 == (ind-nc) % 3 && 0 != ind) 
+         {
+            buf[ind++] = ',';
+            nc++;
+         }
+         buf[ind++] = digits[p % base];
+         p /= base;
+      }
+   }
+
+   if (neg)
+      buf[ind++] = '-';
+
+   if (width > 0 && !(flags & VG_MSG_LJUSTIFY)) {
+      for(; ind < width; ind++) {
+	//vassert(ind < 39);
+         buf[ind] = ((flags & VG_MSG_ZJUSTIFY) ? '0': ' ');
+      }
+   }
+
+   /* Reverse copy to buffer.  */
+   ret += ind;
+   for (i = ind -1; i >= 0; i--) {
+      send(buf[i]);
+   }
+   if (width > 0 && (flags & VG_MSG_LJUSTIFY)) {
+      for(; ind < width; ind++) {
+	 ret++;
+         send(' ');  // Never pad with zeroes on RHS -- changes the value!
+      }
+   }
+   return ret;
+}
+
+
+/* A simple vprintf().  */
+static 
+UInt vprintf_wrk ( void(*send)(HChar), const HChar *format, va_list vargs )
+{
+   UInt ret = 0;
+   int i;
+   int flags;
+   int width;
+   Bool is_long;
+
+   /* We assume that vargs has already been initialised by the 
+      caller, using va_start, and that the caller will similarly
+      clean up with va_end.
+   */
+
+   for (i = 0; format[i] != 0; i++) {
+      if (format[i] != '%') {
+         send(format[i]);
+	 ret++;
+         continue;
+      }
+      i++;
+      /* A '%' has been found.  Ignore a trailing %. */
+      if (format[i] == 0)
+         break;
+      if (format[i] == '%') {
+         /* `%%' is replaced by `%'. */
+         send('%');
+	 ret++;
+         continue;
+      }
+      flags = 0;
+      is_long = False;
+      width = 0; /* length of the field. */
+      if (format[i] == '(') {
+	 flags |= VG_MSG_PAREN;
+	 i++;
+      }
+      /* If ',' follows '%', commas will be inserted. */
+      if (format[i] == ',') {
+         flags |= VG_MSG_COMMA;
+         i++;
+      }
+      /* If '-' follows '%', justify on the left. */
+      if (format[i] == '-') {
+         flags |= VG_MSG_LJUSTIFY;
+         i++;
+      }
+      /* If '0' follows '%', pads will be inserted. */
+      if (format[i] == '0') {
+         flags |= VG_MSG_ZJUSTIFY;
+         i++;
+      }
+      /* Compute the field length. */
+      while (format[i] >= '0' && format[i] <= '9') {
+         width *= 10;
+         width += format[i++] - '0';
+      }
+      while (format[i] == 'l') {
+         i++;
+         is_long = True;
+      }
+
+      switch (format[i]) {
+         case 'd': /* %d */
+            flags |= VG_MSG_SIGNED;
+            if (is_long)
+               ret += myvprintf_int64(send, flags, 10, width, 
+				      (ULong)(va_arg (vargs, Long)));
+            else
+               ret += myvprintf_int64(send, flags, 10, width, 
+				      (ULong)(va_arg (vargs, Int)));
+            break;
+         case 'u': /* %u */
+            if (is_long)
+               ret += myvprintf_int64(send, flags, 10, width, 
+				      (ULong)(va_arg (vargs, ULong)));
+            else
+               ret += myvprintf_int64(send, flags, 10, width, 
+				      (ULong)(va_arg (vargs, UInt)));
+            break;
+         case 'p': /* %p */
+	    ret += 2;
+            send('0');
+            send('x');
+            ret += myvprintf_int64(send, flags, 16, width, 
+				   (ULong)((HWord)va_arg (vargs, void *)));
+            break;
+         case 'x': /* %x */
+            if (is_long)
+               ret += myvprintf_int64(send, flags, 16, width, 
+				      (ULong)(va_arg (vargs, ULong)));
+            else
+               ret += myvprintf_int64(send, flags, 16, width, 
+				      (ULong)(va_arg (vargs, UInt)));
+            break;
+         case 'c': /* %c */
+	    ret++;
+            send((va_arg (vargs, int)));
+            break;
+         case 's': case 'S': { /* %s */
+            char *str = va_arg (vargs, char *);
+            if (str == (char*) 0) str = "(null)";
+            ret += myvprintf_str(send, flags, width, str, 
+                                 (format[i]=='S'));
+            break;
+	 }
+#        if 0
+	 case 'y': { /* %y - print symbol */
+	    Char buf[100];
+	    Char *cp = buf;
+	    Addr a = va_arg(vargs, Addr);
+
+	    if (flags & VG_MSG_PAREN)
+	       *cp++ = '(';
+	    if (VG_(get_fnname_w_offset)(a, cp, sizeof(buf)-4)) {
+	       if (flags & VG_MSG_PAREN) {
+		  cp += VG_(strlen)(cp);
+		  *cp++ = ')';
+		  *cp = '\0';
+	       }
+	       ret += myvprintf_str(send, flags, width, buf, 0);
+	    }
+	    break;
+	 }
+#        endif
+         default:
+            break;
+      }
+   }
+   return ret;
+}
+
+
+/* A general replacement for printf().  Note that only low-level 
+   debugging info should be sent via here.  The official route is to
+   to use vg_message().  This interface is deprecated.
+*/
+/* XXX re 930: make the buffer just to small (by 1 byte) to be OK
+   for this particular run. */
+static HChar myprintf_buf[1000   -930];
+static Int   n_myprintf_buf;
+
+static void add_to_myprintf_buf ( HChar c )
+{
+   if (c == '\n' || n_myprintf_buf >= 1000-10 /*paranoia*/ ) {
+      (*vex_log_bytes)( myprintf_buf, vex_strlen(myprintf_buf) );
+      n_myprintf_buf = 0;
+      myprintf_buf[n_myprintf_buf] = 0;      
+   }
+   myprintf_buf[n_myprintf_buf++] = c;
+   myprintf_buf[n_myprintf_buf] = 0;
+}
+
+static UInt vex_printf ( const char *format, ... )
+{
+   UInt ret;
+   va_list vargs;
+   va_start(vargs,format);
+   
+   n_myprintf_buf = 0;
+   myprintf_buf[n_myprintf_buf] = 0;      
+   ret = vprintf_wrk ( add_to_myprintf_buf, format, vargs );
+
+   if (n_myprintf_buf > 0) {
+      (*vex_log_bytes)( myprintf_buf, n_myprintf_buf );
+   }
+
+   va_end(vargs);
+
+   return ret;
+}
+
+/*---------------------------------------------------------------*/
+/*--- end                                          vex_util.c ---*/
+/*---------------------------------------------------------------*/
+
+
+/////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////
+
+
+/*-------------------------------------------------------------*/
+/*--- Decompression machinery                               ---*/
+/*---                                          decompress.c ---*/
+/*-------------------------------------------------------------*/
+
+/*--
+  This file is a part of bzip2 and/or libbzip2, a program and
+  library for lossless, block-sorting data compression.
+
+  Copyright (C) 1996-2004 Julian R Seward.  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+  1. Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+  2. The origin of this software must not be misrepresented; you must 
+     not claim that you wrote the original software.  If you use this 
+     software in a product, an acknowledgment in the product 
+     documentation would be appreciated but is not required.
+
+  3. Altered source versions must be plainly marked as such, and must
+     not be misrepresented as being the original software.
+
+  4. The name of the author may not be used to endorse or promote 
+     products derived from this software without specific prior written 
+     permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  Julian Seward, Cambridge, UK.
+  jseward@bzip.org
+  bzip2/libbzip2 version 1.0 of 21 March 2000
+
+  This program is based on (at least) the work of:
+     Mike Burrows
+     David Wheeler
+     Peter Fenwick
+     Alistair Moffat
+     Radford Neal
+     Ian H. Witten
+     Robert Sedgewick
+     Jon L. Bentley
+
+  For more information on these sources, see the manual.
+--*/
+
+
+
+
+/*---------------------------------------------------*/
+static
+void makeMaps_d ( DState* s )
+{
+   Int32 i;
+   s->nInUse = 0;
+   for (i = 0; i < 256; i++)
+      if (s->inUse[i]) {
+         s->seqToUnseq[s->nInUse] = i;
+         s->nInUse++;
+      }
+}
+
+
+/*---------------------------------------------------*/
+#define RETURN(rrr)                               \
+   { retVal = rrr; goto save_state_and_return; };
+
+#define GET_BITS(lll,vvv,nnn)                     \
+   case lll: s->state = lll;                      \
+   while (True) {                                 \
+      if (s->bsLive >= nnn) {                     \
+         UInt32 v;                                \
+         v = (s->bsBuff >>                        \
+             (s->bsLive-nnn)) & ((1 << nnn)-1);   \
+         s->bsLive -= nnn;                        \
+         vvv = v;                                 \
+         break;                                   \
+      }                                           \
+      if (s->strm->avail_in == 0) RETURN(BZ_OK);  \
+      s->bsBuff                                   \
+         = (s->bsBuff << 8) |                     \
+           ((UInt32)                              \
+              (*((UChar*)(s->strm->next_in))));   \
+      s->bsLive += 8;                             \
+      s->strm->next_in++;                         \
+      s->strm->avail_in--;                        \
+      s->strm->total_in_lo32++;                   \
+      if (s->strm->total_in_lo32 == 0)            \
+         s->strm->total_in_hi32++;                \
+   }
+
+#define GET_UCHAR(lll,uuu)                        \
+   GET_BITS(lll,uuu,8)
+
+#define GET_BIT(lll,uuu)                          \
+   GET_BITS(lll,uuu,1)
+
+/*---------------------------------------------------*/
+#define GET_MTF_VAL(label1,label2,lval)           \
+{                                                 \
+   if (groupPos == 0) {                           \
+      groupNo++;                                  \
+      if (groupNo >= nSelectors)                  \
+         RETURN(BZ_DATA_ERROR);                   \
+      groupPos = BZ_G_SIZE;                       \
+      gSel = s->selector[groupNo];                \
+      gMinlen = s->minLens[gSel];                 \
+      gLimit = &(s->limit[gSel][0]);              \
+      gPerm = &(s->perm[gSel][0]);                \
+      gBase = &(s->base[gSel][0]);                \
+   }                                              \
+   groupPos--;                                    \
+   zn = gMinlen;                                  \
+   GET_BITS(label1, zvec, zn);                    \
+   while (1) {                                    \
+      if (zn > 20 /* the longest code */)         \
+         RETURN(BZ_DATA_ERROR);                   \
+      if (zvec <= gLimit[zn]) break;              \
+      zn++;                                       \
+      GET_BIT(label2, zj);                        \
+      zvec = (zvec << 1) | zj;                    \
+   };                                             \
+   if (zvec - gBase[zn] < 0                       \
+       || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE)  \
+      RETURN(BZ_DATA_ERROR);                      \
+   lval = gPerm[zvec - gBase[zn]];                \
+}
+
+
+
+/*---------------------------------------------------*/
+__inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab )
+{
+   Int32 nb, na, mid;
+   nb = 0;
+   na = 256;
+   do {
+      mid = (nb + na) >> 1;
+      if (indx >= cftab[mid]) nb = mid; else na = mid;
+   }
+   while (na - nb != 1);
+   return nb;
+}
+
+/*---------------------------------------------------*/
+Int32 BZ2_decompress ( DState* s )
+{
+   UChar      uc;
+   Int32      retVal;
+   Int32      minLen, maxLen;
+   bz_stream* strm = s->strm;
+
+   /* stuff that needs to be saved/restored */
+   Int32  i;
+   Int32  j;
+   Int32  t;
+   Int32  alphaSize;
+   Int32  nGroups;
+   Int32  nSelectors;
+   Int32  EOB;
+   Int32  groupNo;
+   Int32  groupPos;
+   Int32  nextSym;
+   Int32  nblockMAX;
+   Int32  nblock;
+   Int32  es;
+   Int32  N;
+   Int32  curr;
+   Int32  zt;
+   Int32  zn; 
+   Int32  zvec;
+   Int32  zj;
+   Int32  gSel;
+   Int32  gMinlen;
+   Int32* gLimit;
+   Int32* gBase;
+   Int32* gPerm;
+
+   if (s->state == BZ_X_MAGIC_1) {
+      /*initialise the save area*/
+      s->save_i           = 0;
+      s->save_j           = 0;
+      s->save_t           = 0;
+      s->save_alphaSize   = 0;
+      s->save_nGroups     = 0;
+      s->save_nSelectors  = 0;
+      s->save_EOB         = 0;
+      s->save_groupNo     = 0;
+      s->save_groupPos    = 0;
+      s->save_nextSym     = 0;
+      s->save_nblockMAX   = 0;
+      s->save_nblock      = 0;
+      s->save_es          = 0;
+      s->save_N           = 0;
+      s->save_curr        = 0;
+      s->save_zt          = 0;
+      s->save_zn          = 0;
+      s->save_zvec        = 0;
+      s->save_zj          = 0;
+      s->save_gSel        = 0;
+      s->save_gMinlen     = 0;
+      s->save_gLimit      = NULL;
+      s->save_gBase       = NULL;
+      s->save_gPerm       = NULL;
+   }
+
+   /*restore from the save area*/
+   i           = s->save_i;
+   j           = s->save_j;
+   t           = s->save_t;
+   alphaSize   = s->save_alphaSize;
+   nGroups     = s->save_nGroups;
+   nSelectors  = s->save_nSelectors;
+   EOB         = s->save_EOB;
+   groupNo     = s->save_groupNo;
+   groupPos    = s->save_groupPos;
+   nextSym     = s->save_nextSym;
+   nblockMAX   = s->save_nblockMAX;
+   nblock      = s->save_nblock;
+   es          = s->save_es;
+   N           = s->save_N;
+   curr        = s->save_curr;
+   zt          = s->save_zt;
+   zn          = s->save_zn; 
+   zvec        = s->save_zvec;
+   zj          = s->save_zj;
+   gSel        = s->save_gSel;
+   gMinlen     = s->save_gMinlen;
+   gLimit      = s->save_gLimit;
+   gBase       = s->save_gBase;
+   gPerm       = s->save_gPerm;
+
+   retVal = BZ_OK;
+
+   switch (s->state) {
+
+      GET_UCHAR(BZ_X_MAGIC_1, uc);
+      if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC);
+
+      GET_UCHAR(BZ_X_MAGIC_2, uc);
+      if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC);
+
+      GET_UCHAR(BZ_X_MAGIC_3, uc)
+      if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC);
+
+      GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8)
+      if (s->blockSize100k < (BZ_HDR_0 + 1) || 
+          s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC);
+      s->blockSize100k -= BZ_HDR_0;
+
+      if (s->smallDecompress) {
+         s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) );
+         s->ll4  = BZALLOC( 
+                      ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar) 
+                   );
+         if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR);
+      } else {
+         s->tt  = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) );
+         if (s->tt == NULL) RETURN(BZ_MEM_ERROR);
+      }
+
+      GET_UCHAR(BZ_X_BLKHDR_1, uc);
+
+      if (uc == 0x17) goto endhdr_2;
+      if (uc != 0x31) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_2, uc);
+      if (uc != 0x41) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_3, uc);
+      if (uc != 0x59) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_4, uc);
+      if (uc != 0x26) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_5, uc);
+      if (uc != 0x53) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_BLKHDR_6, uc);
+      if (uc != 0x59) RETURN(BZ_DATA_ERROR);
+
+      s->currBlockNo++;
+      if (s->verbosity >= 2)
+         VPrintf1 ( "\n    [%d: huff+mtf ", s->currBlockNo );
+ 
+      s->storedBlockCRC = 0;
+      GET_UCHAR(BZ_X_BCRC_1, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_BCRC_2, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_BCRC_3, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_BCRC_4, uc);
+      s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+
+      GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1);
+
+      s->origPtr = 0;
+      GET_UCHAR(BZ_X_ORIGPTR_1, uc);
+      s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+      GET_UCHAR(BZ_X_ORIGPTR_2, uc);
+      s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+      GET_UCHAR(BZ_X_ORIGPTR_3, uc);
+      s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+
+      if (s->origPtr < 0)
+         RETURN(BZ_DATA_ERROR);
+      if (s->origPtr > 10 + 100000*s->blockSize100k) 
+         RETURN(BZ_DATA_ERROR);
+
+      /*--- Receive the mapping table ---*/
+      for (i = 0; i < 16; i++) {
+         GET_BIT(BZ_X_MAPPING_1, uc);
+         if (uc == 1) 
+            s->inUse16[i] = True; else 
+            s->inUse16[i] = False;
+      }
+
+      for (i = 0; i < 256; i++) s->inUse[i] = False;
+
+      for (i = 0; i < 16; i++)
+         if (s->inUse16[i])
+            for (j = 0; j < 16; j++) {
+               GET_BIT(BZ_X_MAPPING_2, uc);
+               if (uc == 1) s->inUse[i * 16 + j] = True;
+            }
+      makeMaps_d ( s );
+      if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
+      alphaSize = s->nInUse+2;
+
+      /*--- Now the selectors ---*/
+      GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
+      if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR);
+      GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
+      if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
+      for (i = 0; i < nSelectors; i++) {
+         j = 0;
+         while (True) {
+            GET_BIT(BZ_X_SELECTOR_3, uc);
+            if (uc == 0) break;
+            j++;
+            if (j >= nGroups) RETURN(BZ_DATA_ERROR);
+         }
+         s->selectorMtf[i] = j;
+      }
+
+      /*--- Undo the MTF values for the selectors. ---*/
+      {
+         UChar pos[BZ_N_GROUPS], tmp, v;
+         for (v = 0; v < nGroups; v++) pos[v] = v;
+   
+         for (i = 0; i < nSelectors; i++) {
+            v = s->selectorMtf[i];
+            tmp = pos[v];
+            while (v > 0) { pos[v] = pos[v-1]; v--; }
+            pos[0] = tmp;
+            s->selector[i] = tmp;
+         }
+      }
+
+      /*--- Now the coding tables ---*/
+      for (t = 0; t < nGroups; t++) {
+         GET_BITS(BZ_X_CODING_1, curr, 5);
+         for (i = 0; i < alphaSize; i++) {
+            while (True) {
+               if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
+               GET_BIT(BZ_X_CODING_2, uc);
+               if (uc == 0) break;
+               GET_BIT(BZ_X_CODING_3, uc);
+               if (uc == 0) curr++; else curr--;
+            }
+            s->len[t][i] = curr;
+         }
+      }
+
+      /*--- Create the Huffman decoding tables ---*/
+      for (t = 0; t < nGroups; t++) {
+         minLen = 32;
+         maxLen = 0;
+         for (i = 0; i < alphaSize; i++) {
+            if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
+            if (s->len[t][i] < minLen) minLen = s->len[t][i];
+         }
+         BZ2_hbCreateDecodeTables ( 
+            &(s->limit[t][0]), 
+            &(s->base[t][0]), 
+            &(s->perm[t][0]), 
+            &(s->len[t][0]),
+            minLen, maxLen, alphaSize
+         );
+         s->minLens[t] = minLen;
+      }
+
+      /*--- Now the MTF values ---*/
+
+      EOB      = s->nInUse+1;
+      nblockMAX = 100000 * s->blockSize100k;
+      groupNo  = -1;
+      groupPos = 0;
+
+      for (i = 0; i <= 255; i++) s->unzftab[i] = 0;
+
+      /*-- MTF init --*/
+      {
+         Int32 ii, jj, kk;
+         kk = MTFA_SIZE-1;
+         for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
+            for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
+               s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj);
+               kk--;
+            }
+            s->mtfbase[ii] = kk + 1;
+         }
+      }
+      /*-- end MTF init --*/
+
+      nblock = 0;
+      GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
+
+      while (True) {
+
+         if (nextSym == EOB) break;
+
+         if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
+
+            es = -1;
+            N = 1;
+            do {
+               if (nextSym == BZ_RUNA) es = es + (0+1) * N; else
+               if (nextSym == BZ_RUNB) es = es + (1+1) * N;
+               N = N * 2;
+               GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym);
+            }
+               while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
+
+            es++;
+            uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
+            s->unzftab[uc] += es;
+
+            if (s->smallDecompress)
+               while (es > 0) {
+                  if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+                  s->ll16[nblock] = (UInt16)uc;
+                  nblock++;
+                  es--;
+               }
+            else
+               while (es > 0) {
+                  if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+                  s->tt[nblock] = (UInt32)uc;
+                  nblock++;
+                  es--;
+               };
+
+            continue;
+
+         } else {
+
+            if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+
+            /*-- uc = MTF ( nextSym-1 ) --*/
+            {
+               Int32 ii, jj, kk, pp, lno, off;
+               UInt32 nn;
+               nn = (UInt32)(nextSym - 1);
+
+               if (nn < MTFL_SIZE) {
+                  /* avoid general-case expense */
+                  pp = s->mtfbase[0];
+                  uc = s->mtfa[pp+nn];
+                  while (nn > 3) {
+                     Int32 z = pp+nn;
+                     s->mtfa[(z)  ] = s->mtfa[(z)-1];
+                     s->mtfa[(z)-1] = s->mtfa[(z)-2];
+                     s->mtfa[(z)-2] = s->mtfa[(z)-3];
+                     s->mtfa[(z)-3] = s->mtfa[(z)-4];
+                     nn -= 4;
+                  }
+                  while (nn > 0) { 
+                     s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--; 
+                  };
+                  s->mtfa[pp] = uc;
+               } else { 
+                  /* general case */
+                  lno = nn / MTFL_SIZE;
+                  off = nn % MTFL_SIZE;
+                  pp = s->mtfbase[lno] + off;
+                  uc = s->mtfa[pp];
+                  while (pp > s->mtfbase[lno]) { 
+                     s->mtfa[pp] = s->mtfa[pp-1]; pp--; 
+                  };
+                  s->mtfbase[lno]++;
+                  while (lno > 0) {
+                     s->mtfbase[lno]--;
+                     s->mtfa[s->mtfbase[lno]] 
+                        = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
+                     lno--;
+                  }
+                  s->mtfbase[0]--;
+                  s->mtfa[s->mtfbase[0]] = uc;
+                  if (s->mtfbase[0] == 0) {
+                     kk = MTFA_SIZE-1;
+                     for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
+                        for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
+                           s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
+                           kk--;
+                        }
+                        s->mtfbase[ii] = kk + 1;
+                     }
+                  }
+               }
+            }
+            /*-- end uc = MTF ( nextSym-1 ) --*/
+
+            s->unzftab[s->seqToUnseq[uc]]++;
+            if (s->smallDecompress)
+               s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
+               s->tt[nblock]   = (UInt32)(s->seqToUnseq[uc]);
+            nblock++;
+
+            GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
+            continue;
+         }
+      }
+
+      /* Now we know what nblock is, we can do a better sanity
+         check on s->origPtr.
+      */
+      if (s->origPtr < 0 || s->origPtr >= nblock)
+         RETURN(BZ_DATA_ERROR);
+
+      /*-- Set up cftab to facilitate generation of T^(-1) --*/
+      s->cftab[0] = 0;
+      for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1];
+      for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1];
+      for (i = 0; i <= 256; i++) {
+         if (s->cftab[i] < 0 || s->cftab[i] > nblock) {
+            /* s->cftab[i] can legitimately be == nblock */
+            RETURN(BZ_DATA_ERROR);
+         }
+      }
+
+      s->state_out_len = 0;
+      s->state_out_ch  = 0;
+      BZ_INITIALISE_CRC ( s->calculatedBlockCRC );
+      s->state = BZ_X_OUTPUT;
+      if (s->verbosity >= 2) VPrintf0 ( "rt+rld" );
+
+      if (s->smallDecompress) {
+
+         /*-- Make a copy of cftab, used in generation of T --*/
+         for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i];
+
+         /*-- compute the T vector --*/
+         for (i = 0; i < nblock; i++) {
+            uc = (UChar)(s->ll16[i]);
+            SET_LL(i, s->cftabCopy[uc]);
+            s->cftabCopy[uc]++;
+         }
+
+         /*-- Compute T^(-1) by pointer reversal on T --*/
+         i = s->origPtr;
+         j = GET_LL(i);
+         do {
+            Int32 tmp = GET_LL(j);
+            SET_LL(j, i);
+            i = j;
+            j = tmp;
+         }
+            while (i != s->origPtr);
+
+         s->tPos = s->origPtr;
+         s->nblock_used = 0;
+         if (s->blockRandomised) {
+            BZ_RAND_INIT_MASK;
+            BZ_GET_SMALL(s->k0); s->nblock_used++;
+            BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 
+         } else {
+            BZ_GET_SMALL(s->k0); s->nblock_used++;
+         }
+
+      } else {
+
+         /*-- compute the T^(-1) vector --*/
+         for (i = 0; i < nblock; i++) {
+            uc = (UChar)(s->tt[i] & 0xff);
+            s->tt[s->cftab[uc]] |= (i << 8);
+            s->cftab[uc]++;
+         }
+
+         s->tPos = s->tt[s->origPtr] >> 8;
+         s->nblock_used = 0;
+         if (s->blockRandomised) {
+            BZ_RAND_INIT_MASK;
+            BZ_GET_FAST(s->k0); s->nblock_used++;
+            BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 
+         } else {
+            BZ_GET_FAST(s->k0); s->nblock_used++;
+         }
+
+      }
+
+      RETURN(BZ_OK);
+
+
+
+    endhdr_2:
+
+      GET_UCHAR(BZ_X_ENDHDR_2, uc);
+      if (uc != 0x72) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_3, uc);
+      if (uc != 0x45) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_4, uc);
+      if (uc != 0x38) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_5, uc);
+      if (uc != 0x50) RETURN(BZ_DATA_ERROR);
+      GET_UCHAR(BZ_X_ENDHDR_6, uc);
+      if (uc != 0x90) RETURN(BZ_DATA_ERROR);
+
+      s->storedCombinedCRC = 0;
+      GET_UCHAR(BZ_X_CCRC_1, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_CCRC_2, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_CCRC_3, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+      GET_UCHAR(BZ_X_CCRC_4, uc);
+      s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+
+      s->state = BZ_X_IDLE;
+      RETURN(BZ_STREAM_END);
+
+      default: AssertH ( False, 4001 );
+   }
+
+   AssertH ( False, 4002 );
+
+   save_state_and_return:
+
+   s->save_i           = i;
+   s->save_j           = j;
+   s->save_t           = t;
+   s->save_alphaSize   = alphaSize;
+   s->save_nGroups     = nGroups;
+   s->save_nSelectors  = nSelectors;
+   s->save_EOB         = EOB;
+   s->save_groupNo     = groupNo;
+   s->save_groupPos    = groupPos;
+   s->save_nextSym     = nextSym;
+   s->save_nblockMAX   = nblockMAX;
+   s->save_nblock      = nblock;
+   s->save_es          = es;
+   s->save_N           = N;
+   s->save_curr        = curr;
+   s->save_zt          = zt;
+   s->save_zn          = zn;
+   s->save_zvec        = zvec;
+   s->save_zj          = zj;
+   s->save_gSel        = gSel;
+   s->save_gMinlen     = gMinlen;
+   s->save_gLimit      = gLimit;
+   s->save_gBase       = gBase;
+   s->save_gPerm       = gPerm;
+
+   return retVal;   
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                      decompress.c ---*/
+/*-------------------------------------------------------------*/
+
+/*-------------------------------------------------------------*/
+/*--- Block sorting machinery                               ---*/
+/*---                                           blocksort.c ---*/
+/*-------------------------------------------------------------*/
+
+/*--
+  This file is a part of bzip2 and/or libbzip2, a program and
+  library for lossless, block-sorting data compression.
+
+  Copyright (C) 1996-2004 Julian R Seward.  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+  1. Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+  2. The origin of this software must not be misrepresented; you must 
+     not claim that you wrote the original software.  If you use this 
+     software in a product, an acknowledgment in the product 
+     documentation would be appreciated but is not required.
+
+  3. Altered source versions must be plainly marked as such, and must
+     not be misrepresented as being the original software.
+
+  4. The name of the author may not be used to endorse or promote 
+     products derived from this software without specific prior written 
+     permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  Julian Seward, Cambridge, UK.
+  jseward@bzip.org
+  bzip2/libbzip2 version 1.0 of 21 March 2000
+
+  This program is based on (at least) the work of:
+     Mike Burrows
+     David Wheeler
+     Peter Fenwick
+     Alistair Moffat
+     Radford Neal
+     Ian H. Witten
+     Robert Sedgewick
+     Jon L. Bentley
+
+  For more information on these sources, see the manual.
+
+  To get some idea how the block sorting algorithms in this file 
+  work, read my paper 
+     On the Performance of BWT Sorting Algorithms
+  in Proceedings of the IEEE Data Compression Conference 2000,
+  Snowbird, Utah, USA, 27-30 March 2000.  The main sort in this
+  file implements the algorithm called  cache  in the paper.
+--*/
+
+
+
+/*---------------------------------------------*/
+/*--- Fallback O(N log(N)^2) sorting        ---*/
+/*--- algorithm, for repetitive blocks      ---*/
+/*---------------------------------------------*/
+
+/*---------------------------------------------*/
+static 
+__inline__
+void fallbackSimpleSort ( UInt32* fmap, 
+                          UInt32* eclass, 
+                          Int32   lo, 
+                          Int32   hi )
+{
+   Int32 i, j, tmp;
+   UInt32 ec_tmp;
+
+   if (lo == hi) return;
+
+   if (hi - lo > 3) {
+      for ( i = hi-4; i >= lo; i-- ) {
+         tmp = fmap[i];
+         ec_tmp = eclass[tmp];
+         for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
+            fmap[j-4] = fmap[j];
+         fmap[j-4] = tmp;
+      }
+   }
+
+   for ( i = hi-1; i >= lo; i-- ) {
+      tmp = fmap[i];
+      ec_tmp = eclass[tmp];
+      for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
+         fmap[j-1] = fmap[j];
+      fmap[j-1] = tmp;
+   }
+}
+
+
+/*---------------------------------------------*/
+#define fswap(zz1, zz2) \
+   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+
+#define fvswap(zzp1, zzp2, zzn)       \
+{                                     \
+   Int32 yyp1 = (zzp1);               \
+   Int32 yyp2 = (zzp2);               \
+   Int32 yyn  = (zzn);                \
+   while (yyn > 0) {                  \
+      fswap(fmap[yyp1], fmap[yyp2]);  \
+      yyp1++; yyp2++; yyn--;          \
+   }                                  \
+}
+
+
+#define fmin(a,b) ((a) < (b)) ? (a) : (b)
+
+#define fpush(lz,hz) { stackLo[sp] = lz; \
+                       stackHi[sp] = hz; \
+                       sp++; }
+
+#define fpop(lz,hz) { sp--;              \
+                      lz = stackLo[sp];  \
+                      hz = stackHi[sp]; }
+
+#define FALLBACK_QSORT_SMALL_THRESH 10
+#define FALLBACK_QSORT_STACK_SIZE   100
+
+
+static
+void fallbackQSort3 ( UInt32* fmap, 
+                      UInt32* eclass,
+                      Int32   loSt, 
+                      Int32   hiSt )
+{
+   Int32 unLo, unHi, ltLo, gtHi, n, m;
+   Int32 sp, lo, hi;
+   UInt32 med, r, r3;
+   Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
+   Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
+
+   r = 0;
+
+   sp = 0;
+   fpush ( loSt, hiSt );
+
+   while (sp > 0) {
+
+      AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 );
+
+      fpop ( lo, hi );
+      if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
+         fallbackSimpleSort ( fmap, eclass, lo, hi );
+         continue;
+      }
+
+      /* Random partitioning.  Median of 3 sometimes fails to
+         avoid bad cases.  Median of 9 seems to help but 
+         looks rather expensive.  This too seems to work but
+         is cheaper.  Guidance for the magic constants 
+         7621 and 32768 is taken from Sedgewick's algorithms
+         book, chapter 35.
+      */
+      r = ((r * 7621) + 1) % 32768;
+      r3 = r % 3;
+      if (r3 == 0) med = eclass[fmap[lo]]; else
+      if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
+                   med = eclass[fmap[hi]];
+
+      unLo = ltLo = lo;
+      unHi = gtHi = hi;
+
+      while (1) {
+         while (1) {
+            if (unLo > unHi) break;
+            n = (Int32)eclass[fmap[unLo]] - (Int32)med;
+            if (n == 0) { 
+               fswap(fmap[unLo], fmap[ltLo]); 
+               ltLo++; unLo++; 
+               continue; 
+            };
+            if (n > 0) break;
+            unLo++;
+         }
+         while (1) {
+            if (unLo > unHi) break;
+            n = (Int32)eclass[fmap[unHi]] - (Int32)med;
+            if (n == 0) { 
+               fswap(fmap[unHi], fmap[gtHi]); 
+               gtHi--; unHi--; 
+               continue; 
+            };
+            if (n < 0) break;
+            unHi--;
+         }
+         if (unLo > unHi) break;
+         fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
+      }
+
+      AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
+
+      if (gtHi < ltLo) continue;
+
+      n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
+      m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
+
+      n = lo + unLo - ltLo - 1;
+      m = hi - (gtHi - unHi) + 1;
+
+      if (n - lo > hi - m) {
+         fpush ( lo, n );
+         fpush ( m, hi );
+      } else {
+         fpush ( m, hi );
+         fpush ( lo, n );
+      }
+   }
+}
+
+#undef fmin
+#undef fpush
+#undef fpop
+#undef fswap
+#undef fvswap
+#undef FALLBACK_QSORT_SMALL_THRESH
+#undef FALLBACK_QSORT_STACK_SIZE
+
+
+/*---------------------------------------------*/
+/* Pre:
+      nblock > 0
+      eclass exists for [0 .. nblock-1]
+      ((UChar*)eclass) [0 .. nblock-1] holds block
+      ptr exists for [0 .. nblock-1]
+
+   Post:
+      ((UChar*)eclass) [0 .. nblock-1] holds block
+      All other areas of eclass destroyed
+      fmap [0 .. nblock-1] holds sorted order
+      bhtab [ 0 .. 2+(nblock/32) ] destroyed
+*/
+
+#define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
+#define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
+#define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
+#define      WORD_BH(zz)  bhtab[(zz) >> 5]
+#define UNALIGNED_BH(zz)  ((zz) & 0x01f)
+
+static
+void fallbackSort ( UInt32* fmap, 
+                    UInt32* eclass, 
+                    UInt32* bhtab,
+                    Int32   nblock,
+                    Int32   verb )
+{
+   Int32 ftab[257];
+   Int32 ftabCopy[256];
+   Int32 H, i, j, k, l, r, cc, cc1;
+   Int32 nNotDone;
+   Int32 nBhtab;
+   UChar* eclass8 = (UChar*)eclass;
+
+   /*--
+      Initial 1-char radix sort to generate
+      initial fmap and initial BH bits.
+   --*/
+   if (verb >= 4)
+      VPrintf0 ( "        bucket sorting ...\n" );
+   for (i = 0; i < 257;    i++) ftab[i] = 0;
+   for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
+   for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
+   for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
+
+   for (i = 0; i < nblock; i++) {
+      j = eclass8[i];
+      k = ftab[j] - 1;
+      ftab[j] = k;
+      fmap[k] = i;
+   }
+
+   nBhtab = 2 + (nblock / 32);
+   for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
+   for (i = 0; i < 256; i++) SET_BH(ftab[i]);
+
+   /*--
+      Inductively refine the buckets.  Kind-of an
+      "exponential radix sort" (!), inspired by the
+      Manber-Myers suffix array construction algorithm.
+   --*/
+
+   /*-- set sentinel bits for block-end detection --*/
+   for (i = 0; i < 32; i++) { 
+      SET_BH(nblock + 2*i);
+      CLEAR_BH(nblock + 2*i + 1);
+   }
+
+   /*-- the log(N) loop --*/
+   H = 1;
+   while (1) {
+
+      if (verb >= 4) 
+         VPrintf1 ( "        depth %6d has ", H );
+
+      j = 0;
+      for (i = 0; i < nblock; i++) {
+         if (ISSET_BH(i)) j = i;
+         k = fmap[i] - H; if (k < 0) k += nblock;
+         eclass[k] = j;
+      }
+
+      nNotDone = 0;
+      r = -1;
+      while (1) {
+
+	 /*-- find the next non-singleton bucket --*/
+         k = r + 1;
+         while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+         if (ISSET_BH(k)) {
+            while (WORD_BH(k) == 0xffffffff) k += 32;
+            while (ISSET_BH(k)) k++;
+         }
+         l = k - 1;
+         if (l >= nblock) break;
+         while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+         if (!ISSET_BH(k)) {
+            while (WORD_BH(k) == 0x00000000) k += 32;
+            while (!ISSET_BH(k)) k++;
+         }
+         r = k - 1;
+         if (r >= nblock) break;
+
+         /*-- now [l, r] bracket current bucket --*/
+         if (r > l) {
+            nNotDone += (r - l + 1);
+            fallbackQSort3 ( fmap, eclass, l, r );
+
+            /*-- scan bucket and generate header bits-- */
+            cc = -1;
+            for (i = l; i <= r; i++) {
+               cc1 = eclass[fmap[i]];
+               if (cc != cc1) { SET_BH(i); cc = cc1; };
+            }
+         }
+      }
+
+      if (verb >= 4) 
+         VPrintf1 ( "%6d unresolved strings\n", nNotDone );
+
+      H *= 2;
+      if (H > nblock || nNotDone == 0) break;
+   }
+
+   /*-- 
+      Reconstruct the original block in
+      eclass8 [0 .. nblock-1], since the
+      previous phase destroyed it.
+   --*/
+   if (verb >= 4)
+      VPrintf0 ( "        reconstructing block ...\n" );
+   j = 0;
+   for (i = 0; i < nblock; i++) {
+      while (ftabCopy[j] == 0) j++;
+      ftabCopy[j]--;
+      eclass8[fmap[i]] = (UChar)j;
+   }
+   AssertH ( j < 256, 1005 );
+}
+
+#undef       SET_BH
+#undef     CLEAR_BH
+#undef     ISSET_BH
+#undef      WORD_BH
+#undef UNALIGNED_BH
+
+
+/*---------------------------------------------*/
+/*--- The main, O(N^2 log(N)) sorting       ---*/
+/*--- algorithm.  Faster for "normal"       ---*/
+/*--- non-repetitive blocks.                ---*/
+/*---------------------------------------------*/
+
+/*---------------------------------------------*/
+static
+__inline__
+Bool mainGtU ( UInt32  i1, 
+               UInt32  i2,
+               UChar*  block, 
+               UInt16* quadrant,
+               UInt32  nblock,
+               Int32*  budget )
+{
+   Int32  k;
+   UChar  c1, c2;
+   UInt16 s1, s2;
+
+   AssertD ( i1 != i2, "mainGtU" );
+   /* 1 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 2 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 3 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 4 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 5 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 6 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 7 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 8 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 9 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 10 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 11 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+   /* 12 */
+   c1 = block[i1]; c2 = block[i2];
+   if (c1 != c2) return (c1 > c2);
+   i1++; i2++;
+
+   k = nblock + 8;
+
+   do {
+      /* 1 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 2 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 3 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 4 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 5 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 6 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 7 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+      /* 8 */
+      c1 = block[i1]; c2 = block[i2];
+      if (c1 != c2) return (c1 > c2);
+      s1 = quadrant[i1]; s2 = quadrant[i2];
+      if (s1 != s2) return (s1 > s2);
+      i1++; i2++;
+
+      if (i1 >= nblock) i1 -= nblock;
+      if (i2 >= nblock) i2 -= nblock;
+
+      k -= 8;
+      (*budget)--;
+   }
+      while (k >= 0);
+
+   return False;
+}
+
+
+/*---------------------------------------------*/
+/*--
+   Knuth's increments seem to work better
+   than Incerpi-Sedgewick here.  Possibly
+   because the number of elems to sort is
+   usually small, typically <= 20.
+--*/
+static
+Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+                   9841, 29524, 88573, 265720,
+                   797161, 2391484 };
+
+static
+void mainSimpleSort ( UInt32* ptr,
+                      UChar*  block,
+                      UInt16* quadrant,
+                      Int32   nblock,
+                      Int32   lo, 
+                      Int32   hi, 
+                      Int32   d,
+                      Int32*  budget )
+{
+   Int32 i, j, h, bigN, hp;
+   UInt32 v;
+
+   bigN = hi - lo + 1;
+   if (bigN < 2) return;
+
+   hp = 0;
+   while (incs[hp] < bigN) hp++;
+   hp--;
+
+   for (; hp >= 0; hp--) {
+      h = incs[hp];
+
+      i = lo + h;
+      while (True) {
+
+         /*-- copy 1 --*/
+         if (i > hi) break;
+         v = ptr[i];
+         j = i;
+         while ( mainGtU ( 
+                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
+                 ) ) {
+            ptr[j] = ptr[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         ptr[j] = v;
+         i++;
+
+         /*-- copy 2 --*/
+         if (i > hi) break;
+         v = ptr[i];
+         j = i;
+         while ( mainGtU ( 
+                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
+                 ) ) {
+            ptr[j] = ptr[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         ptr[j] = v;
+         i++;
+
+         /*-- copy 3 --*/
+         if (i > hi) break;
+         v = ptr[i];
+         j = i;
+         while ( mainGtU ( 
+                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
+                 ) ) {
+            ptr[j] = ptr[j-h];
+            j = j - h;
+            if (j <= (lo + h - 1)) break;
+         }
+         ptr[j] = v;
+         i++;
+
+         if (*budget < 0) return;
+      }
+   }
+}
+
+
+/*---------------------------------------------*/
+/*--
+   The following is an implementation of
+   an elegant 3-way quicksort for strings,
+   described in a paper "Fast Algorithms for
+   Sorting and Searching Strings", by Robert
+   Sedgewick and Jon L. Bentley.
+--*/
+
+#define mswap(zz1, zz2) \
+   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+
+#define mvswap(zzp1, zzp2, zzn)       \
+{                                     \
+   Int32 yyp1 = (zzp1);               \
+   Int32 yyp2 = (zzp2);               \
+   Int32 yyn  = (zzn);                \
+   while (yyn > 0) {                  \
+      mswap(ptr[yyp1], ptr[yyp2]);    \
+      yyp1++; yyp2++; yyn--;          \
+   }                                  \
+}
+
+static 
+__inline__
+UChar mmed3 ( UChar a, UChar b, UChar c )
+{
+   UChar t;
+   if (a > b) { t = a; a = b; b = t; };
+   if (b > c) { 
+      b = c;
+      if (a > b) b = a;
+   }
+   return b;
+}
+
+#define mmin(a,b) ((a) < (b)) ? (a) : (b)
+
+#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
+                          stackHi[sp] = hz; \
+                          stackD [sp] = dz; \
+                          sp++; }
+
+#define mpop(lz,hz,dz) { sp--;             \
+                         lz = stackLo[sp]; \
+                         hz = stackHi[sp]; \
+                         dz = stackD [sp]; }
+
+
+#define mnextsize(az) (nextHi[az]-nextLo[az])
+
+#define mnextswap(az,bz)                                        \
+   { Int32 tz;                                                  \
+     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
+     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
+     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
+
+
+#define MAIN_QSORT_SMALL_THRESH 20
+#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
+#define MAIN_QSORT_STACK_SIZE 100
+
+static
+void mainQSort3 ( UInt32* ptr,
+                  UChar*  block,
+                  UInt16* quadrant,
+                  Int32   nblock,
+                  Int32   loSt, 
+                  Int32   hiSt, 
+                  Int32   dSt,
+                  Int32*  budget )
+{
+   Int32 unLo, unHi, ltLo, gtHi, n, m, med;
+   Int32 sp, lo, hi, d;
+
+   Int32 stackLo[MAIN_QSORT_STACK_SIZE];
+   Int32 stackHi[MAIN_QSORT_STACK_SIZE];
+   Int32 stackD [MAIN_QSORT_STACK_SIZE];
+
+   Int32 nextLo[3];
+   Int32 nextHi[3];
+   Int32 nextD [3];
+
+   sp = 0;
+   mpush ( loSt, hiSt, dSt );
+
+   while (sp > 0) {
+
+      AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 );
+
+      mpop ( lo, hi, d );
+      if (hi - lo < MAIN_QSORT_SMALL_THRESH || 
+          d > MAIN_QSORT_DEPTH_THRESH) {
+         mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
+         if (*budget < 0) return;
+         continue;
+      }
+
+      med = (Int32) 
+            mmed3 ( block[ptr[ lo         ]+d],
+                    block[ptr[ hi         ]+d],
+                    block[ptr[ (lo+hi)>>1 ]+d] );
+
+      unLo = ltLo = lo;
+      unHi = gtHi = hi;
+
+      while (True) {
+         while (True) {
+            if (unLo > unHi) break;
+            n = ((Int32)block[ptr[unLo]+d]) - med;
+            if (n == 0) { 
+               mswap(ptr[unLo], ptr[ltLo]); 
+               ltLo++; unLo++; continue; 
+            };
+            if (n >  0) break;
+            unLo++;
+         }
+         while (True) {
+            if (unLo > unHi) break;
+            n = ((Int32)block[ptr[unHi]+d]) - med;
+            if (n == 0) { 
+               mswap(ptr[unHi], ptr[gtHi]); 
+               gtHi--; unHi--; continue; 
+            };
+            if (n <  0) break;
+            unHi--;
+         }
+         if (unLo > unHi) break;
+         mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
+      }
+
+      AssertD ( unHi == unLo-1, "mainQSort3(2)" );
+
+      if (gtHi < ltLo) {
+         mpush(lo, hi, d+1 );
+         continue;
+      }
+
+      n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
+      m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
+
+      n = lo + unLo - ltLo - 1;
+      m = hi - (gtHi - unHi) + 1;
+
+      nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
+      nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
+      nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
+
+      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+      if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
+      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+
+      AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
+      AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
+
+      mpush (nextLo[0], nextHi[0], nextD[0]);
+      mpush (nextLo[1], nextHi[1], nextD[1]);
+      mpush (nextLo[2], nextHi[2], nextD[2]);
+   }
+}
+
+#undef mswap
+#undef mvswap
+#undef mpush
+#undef mpop
+#undef mmin
+#undef mnextsize
+#undef mnextswap
+#undef MAIN_QSORT_SMALL_THRESH
+#undef MAIN_QSORT_DEPTH_THRESH
+#undef MAIN_QSORT_STACK_SIZE
+
+
+/*---------------------------------------------*/
+/* Pre:
+      nblock > N_OVERSHOOT
+      block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
+      ((UChar*)block32) [0 .. nblock-1] holds block
+      ptr exists for [0 .. nblock-1]
+
+   Post:
+      ((UChar*)block32) [0 .. nblock-1] holds block
+      All other areas of block32 destroyed
+      ftab [0 .. 65536 ] destroyed
+      ptr [0 .. nblock-1] holds sorted order
+      if (*budget < 0), sorting was abandoned
+*/
+
+#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
+#define SETMASK (1 << 21)
+#define CLEARMASK (~(SETMASK))
+
+static
+void mainSort ( UInt32* ptr, 
+                UChar*  block,
+                UInt16* quadrant, 
+                UInt32* ftab,
+                Int32   nblock,
+                Int32   verb,
+                Int32*  budget )
+{
+   Int32  i, j, k, ss, sb;
+   Int32  runningOrder[256];
+   Bool   bigDone[256];
+   Int32  copyStart[256];
+   Int32  copyEnd  [256];
+   UChar  c1;
+   Int32  numQSorted;
+   UInt16 s;
+   if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
+
+   /*-- set up the 2-byte frequency table --*/
+   for (i = 65536; i >= 0; i--) ftab[i] = 0;
+
+   j = block[0] << 8;
+   i = nblock-1;
+   for (; i >= 3; i -= 4) {
+      quadrant[i] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+      ftab[j]++;
+      quadrant[i-1] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
+      ftab[j]++;
+      quadrant[i-2] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
+      ftab[j]++;
+      quadrant[i-3] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
+      ftab[j]++;
+   }
+   for (; i >= 0; i--) {
+      quadrant[i] = 0;
+      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+      ftab[j]++;
+   }
+
+   /*-- (emphasises close relationship of block & quadrant) --*/
+   for (i = 0; i < BZ_N_OVERSHOOT; i++) {
+      block   [nblock+i] = block[i];
+      quadrant[nblock+i] = 0;
+   }
+
+   if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
+
+   /*-- Complete the initial radix sort --*/
+   for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
+
+   s = block[0] << 8;
+   i = nblock-1;
+   for (; i >= 3; i -= 4) {
+      s = (s >> 8) | (block[i] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i;
+      s = (s >> 8) | (block[i-1] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i-1;
+      s = (s >> 8) | (block[i-2] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i-2;
+      s = (s >> 8) | (block[i-3] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i-3;
+   }
+   for (; i >= 0; i--) {
+      s = (s >> 8) | (block[i] << 8);
+      j = ftab[s] -1;
+      ftab[s] = j;
+      ptr[j] = i;
+   }
+
+   /*--
+      Now ftab contains the first loc of every small bucket.
+      Calculate the running order, from smallest to largest
+      big bucket.
+   --*/
+   for (i = 0; i <= 255; i++) {
+      bigDone     [i] = False;
+      runningOrder[i] = i;
+   }
+
+   {
+      Int32 vv;
+      Int32 h = 1;
+      do h = 3 * h + 1; while (h <= 256);
+      do {
+         h = h / 3;
+         for (i = h; i <= 255; i++) {
+            vv = runningOrder[i];
+            j = i;
+            while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
+               runningOrder[j] = runningOrder[j-h];
+               j = j - h;
+               if (j <= (h - 1)) goto zero;
+            }
+            zero:
+            runningOrder[j] = vv;
+         }
+      } while (h != 1);
+   }
+
+   /*--
+      The main sorting loop.
+   --*/
+
+   numQSorted = 0;
+
+   for (i = 0; i <= 255; i++) {
+
+      /*--
+         Process big buckets, starting with the least full.
+         Basically this is a 3-step process in which we call
+         mainQSort3 to sort the small buckets [ss, j], but
+         also make a big effort to avoid the calls if we can.
+      --*/
+      ss = runningOrder[i];
+
+      /*--
+         Step 1:
+         Complete the big bucket [ss] by quicksorting
+         any unsorted small buckets [ss, j], for j != ss.  
+         Hopefully previous pointer-scanning phases have already
+         completed many of the small buckets [ss, j], so
+         we don't have to sort them at all.
+      --*/
+      for (j = 0; j <= 255; j++) {
+         if (j != ss) {
+            sb = (ss << 8) + j;
+            if ( ! (ftab[sb] & SETMASK) ) {
+               Int32 lo = ftab[sb]   & CLEARMASK;
+               Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
+               if (hi > lo) {
+                  if (verb >= 4)
+                     VPrintf4 ( "        qsort [0x%x, 0x%x]   "
+                                "done %d   this %d\n",
+                                ss, j, numQSorted, hi - lo + 1 );
+                  mainQSort3 ( 
+                     ptr, block, quadrant, nblock, 
+                     lo, hi, BZ_N_RADIX, budget 
+                  );   
+                  numQSorted += (hi - lo + 1);
+                  if (*budget < 0) return;
+               }
+            }
+            ftab[sb] |= SETMASK;
+         }
+      }
+
+      AssertH ( !bigDone[ss], 1006 );
+
+      /*--
+         Step 2:
+         Now scan this big bucket [ss] so as to synthesise the
+         sorted order for small buckets [t, ss] for all t,
+         including, magically, the bucket [ss,ss] too.
+         This will avoid doing Real Work in subsequent Step 1's.
+      --*/
+      {
+         for (j = 0; j <= 255; j++) {
+            copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
+            copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
+         }
+         for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
+            k = ptr[j]-1; if (k < 0) k += nblock;
+            c1 = block[k];
+            if (!bigDone[c1])
+               ptr[ copyStart[c1]++ ] = k;
+         }
+         for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
+            k = ptr[j]-1; if (k < 0) k += nblock;
+            c1 = block[k];
+            if (!bigDone[c1]) 
+               ptr[ copyEnd[c1]-- ] = k;
+         }
+      }
+
+      AssertH ( (copyStart[ss]-1 == copyEnd[ss])
+                || 
+                /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
+                   Necessity for this case is demonstrated by compressing 
+                   a sequence of approximately 48.5 million of character 
+                   251; 1.0.0/1.0.1 will then die here. */
+                (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
+                1007 )
+
+      for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
+
+      /*--
+         Step 3:
+         The [ss] big bucket is now done.  Record this fact,
+         and update the quadrant descriptors.  Remember to
+         update quadrants in the overshoot area too, if
+         necessary.  The "if (i < 255)" test merely skips
+         this updating for the last bucket processed, since
+         updating for the last bucket is pointless.
+
+         The quadrant array provides a way to incrementally
+         cache sort orderings, as they appear, so as to 
+         make subsequent comparisons in fullGtU() complete
+         faster.  For repetitive blocks this makes a big
+         difference (but not big enough to be able to avoid
+         the fallback sorting mechanism, exponential radix sort).
+
+         The precise meaning is: at all times:
+
+            for 0 <= i < nblock and 0 <= j <= nblock
+
+            if block[i] != block[j], 
+
+               then the relative values of quadrant[i] and 
+                    quadrant[j] are meaningless.
+
+               else {
+                  if quadrant[i] < quadrant[j]
+                     then the string starting at i lexicographically
+                     precedes the string starting at j
+
+                  else if quadrant[i] > quadrant[j]
+                     then the string starting at j lexicographically
+                     precedes the string starting at i
+
+                  else
+                     the relative ordering of the strings starting
+                     at i and j has not yet been determined.
+               }
+      --*/
+      bigDone[ss] = True;
+
+      if (i < 255) {
+         Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
+         Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
+         Int32 shifts   = 0;
+
+         while ((bbSize >> shifts) > 65534) shifts++;
+
+         for (j = bbSize-1; j >= 0; j--) {
+            Int32 a2update     = ptr[bbStart + j];
+            UInt16 qVal        = (UInt16)(j >> shifts);
+            quadrant[a2update] = qVal;
+            if (a2update < BZ_N_OVERSHOOT)
+               quadrant[a2update + nblock] = qVal;
+         }
+         AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
+      }
+
+   }
+
+   if (verb >= 4)
+      VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
+                 nblock, numQSorted, nblock - numQSorted );
+}
+
+#undef BIGFREQ
+#undef SETMASK
+#undef CLEARMASK
+
+
+/*---------------------------------------------*/
+/* Pre:
+      nblock > 0
+      arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
+      ((UChar*)arr2)  [0 .. nblock-1] holds block
+      arr1 exists for [0 .. nblock-1]
+
+   Post:
+      ((UChar*)arr2) [0 .. nblock-1] holds block
+      All other areas of block destroyed
+      ftab [ 0 .. 65536 ] destroyed
+      arr1 [0 .. nblock-1] holds sorted order
+*/
+void BZ2_blockSort ( EState* s )
+{
+   UInt32* ptr    = s->ptr; 
+   UChar*  block  = s->block;
+   UInt32* ftab   = s->ftab;
+   Int32   nblock = s->nblock;
+   Int32   verb   = s->verbosity;
+   Int32   wfact  = s->workFactor;
+   UInt16* quadrant;
+   Int32   budget;
+   Int32   budgetInit;
+   Int32   i;
+
+   if (nblock < /* 10000 */1000 ) {
+      fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
+   } else {
+      /* Calculate the location for quadrant, remembering to get
+         the alignment right.  Assumes that &(block[0]) is at least
+         2-byte aligned -- this should be ok since block is really
+         the first section of arr2.
+      */
+      i = nblock+BZ_N_OVERSHOOT;
+      if (i & 1) i++;
+      quadrant = (UInt16*)(&(block[i]));
+
+      /* (wfact-1) / 3 puts the default-factor-30
+         transition point at very roughly the same place as 
+         with v0.1 and v0.9.0.  
+         Not that it particularly matters any more, since the
+         resulting compressed stream is now the same regardless
+         of whether or not we use the main sort or fallback sort.
+      */
+      if (wfact < 1  ) wfact = 1;
+      if (wfact > 100) wfact = 100;
+      budgetInit = nblock * ((wfact-1) / 3);
+      budget = budgetInit;
+
+      mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
+      if (0 && verb >= 3) 
+         VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
+                    budgetInit - budget,
+                    nblock, 
+                    (float)(budgetInit - budget) /
+                    (float)(nblock==0 ? 1 : nblock) ); 
+      if (budget < 0) {
+         if (verb >= 2) 
+            VPrintf0 ( "    too repetitive; using fallback"
+                       " sorting algorithm\n" );
+         fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
+      }
+   }
+
+   s->origPtr = -1;
+   for (i = 0; i < s->nblock; i++)
+      if (ptr[i] == 0)
+         { s->origPtr = i; break; };
+
+   AssertH( s->origPtr != -1, 1003 );
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                       blocksort.c ---*/
+/*-------------------------------------------------------------*/
+
+/*-------------------------------------------------------------*/
+/*--- Huffman coding low-level stuff                        ---*/
+/*---                                             huffman.c ---*/
+/*-------------------------------------------------------------*/
+
+/*--
+  This file is a part of bzip2 and/or libbzip2, a program and
+  library for lossless, block-sorting data compression.
+
+  Copyright (C) 1996-2004 Julian R Seward.  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+  1. Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+  2. The origin of this software must not be misrepresented; you must 
+     not claim that you wrote the original software.  If you use this 
+     software in a product, an acknowledgment in the product 
+     documentation would be appreciated but is not required.
+
+  3. Altered source versions must be plainly marked as such, and must
+     not be misrepresented as being the original software.
+
+  4. The name of the author may not be used to endorse or promote 
+     products derived from this software without specific prior written 
+     permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  Julian Seward, Cambridge, UK.
+  jseward@bzip.org
+  bzip2/libbzip2 version 1.0 of 21 March 2000
+
+  This program is based on (at least) the work of:
+     Mike Burrows
+     David Wheeler
+     Peter Fenwick
+     Alistair Moffat
+     Radford Neal
+     Ian H. Witten
+     Robert Sedgewick
+     Jon L. Bentley
+
+  For more information on these sources, see the manual.
+--*/
+
+
+
+/*---------------------------------------------------*/
+#define WEIGHTOF(zz0)  ((zz0) & 0xffffff00)
+#define DEPTHOF(zz1)   ((zz1) & 0x000000ff)
+#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
+
+#define ADDWEIGHTS(zw1,zw2)                           \
+   (WEIGHTOF(zw1)+WEIGHTOF(zw2)) |                    \
+   (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
+
+#define UPHEAP(z)                                     \
+{                                                     \
+   Int32 zz, tmp;                                     \
+   zz = z; tmp = heap[zz];                            \
+   while (weight[tmp] < weight[heap[zz >> 1]]) {      \
+      heap[zz] = heap[zz >> 1];                       \
+      zz >>= 1;                                       \
+   }                                                  \
+   heap[zz] = tmp;                                    \
+}
+
+#define DOWNHEAP(z)                                   \
+{                                                     \
+   Int32 zz, yy, tmp;                                 \
+   zz = z; tmp = heap[zz];                            \
+   while (True) {                                     \
+      yy = zz << 1;                                   \
+      if (yy > nHeap) break;                          \
+      if (yy < nHeap &&                               \
+          weight[heap[yy+1]] < weight[heap[yy]])      \
+         yy++;                                        \
+      if (weight[tmp] < weight[heap[yy]]) break;      \
+      heap[zz] = heap[yy];                            \
+      zz = yy;                                        \
+   }                                                  \
+   heap[zz] = tmp;                                    \
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_hbMakeCodeLengths ( UChar *len, 
+                             Int32 *freq,
+                             Int32 alphaSize,
+                             Int32 maxLen )
+{
+   /*--
+      Nodes and heap entries run from 1.  Entry 0
+      for both the heap and nodes is a sentinel.
+   --*/
+   Int32 nNodes, nHeap, n1, n2, i, j, k;
+   Bool  tooLong;
+
+   Int32 heap   [ BZ_MAX_ALPHA_SIZE + 2 ];
+   Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
+   Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; 
+
+   for (i = 0; i < alphaSize; i++)
+      weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
+
+   while (True) {
+
+      nNodes = alphaSize;
+      nHeap = 0;
+
+      heap[0] = 0;
+      weight[0] = 0;
+      parent[0] = -2;
+
+      for (i = 1; i <= alphaSize; i++) {
+         parent[i] = -1;
+         nHeap++;
+         heap[nHeap] = i;
+         UPHEAP(nHeap);
+      }
+
+      AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
+   
+      while (nHeap > 1) {
+         n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
+         n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
+         nNodes++;
+         parent[n1] = parent[n2] = nNodes;
+         weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
+         parent[nNodes] = -1;
+         nHeap++;
+         heap[nHeap] = nNodes;
+         UPHEAP(nHeap);
+      }
+
+      AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
+
+      tooLong = False;
+      for (i = 1; i <= alphaSize; i++) {
+         j = 0;
+         k = i;
+         while (parent[k] >= 0) { k = parent[k]; j++; }
+         len[i-1] = j;
+         if (j > maxLen) tooLong = True;
+      }
+      
+      if (! tooLong) break;
+
+      /* 17 Oct 04: keep-going condition for the following loop used
+         to be 'i < alphaSize', which missed the last element,
+         theoretically leading to the possibility of the compressor
+         looping.  However, this count-scaling step is only needed if
+         one of the generated Huffman code words is longer than
+         maxLen, which up to and including version 1.0.2 was 20 bits,
+         which is extremely unlikely.  In version 1.0.3 maxLen was
+         changed to 17 bits, which has minimal effect on compression
+         ratio, but does mean this scaling step is used from time to
+         time, enough to verify that it works.
+
+         This means that bzip2-1.0.3 and later will only produce
+         Huffman codes with a maximum length of 17 bits.  However, in
+         order to preserve backwards compatibility with bitstreams
+         produced by versions pre-1.0.3, the decompressor must still
+         handle lengths of up to 20. */
+
+      for (i = 1; i <= alphaSize; i++) {
+         j = weight[i] >> 8;
+         j = 1 + (j / 2);
+         weight[i] = j << 8;
+      }
+   }
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_hbAssignCodes ( Int32 *code,
+                         UChar *length,
+                         Int32 minLen,
+                         Int32 maxLen,
+                         Int32 alphaSize )
+{
+   Int32 n, vec, i;
+
+   vec = 0;
+   for (n = minLen; n <= maxLen; n++) {
+      for (i = 0; i < alphaSize; i++)
+         if (length[i] == n) { code[i] = vec; vec++; };
+      vec <<= 1;
+   }
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_hbCreateDecodeTables ( Int32 *limit,
+                                Int32 *base,
+                                Int32 *perm,
+                                UChar *length,
+                                Int32 minLen,
+                                Int32 maxLen,
+                                Int32 alphaSize )
+{
+   Int32 pp, i, j, vec;
+
+   pp = 0;
+   for (i = minLen; i <= maxLen; i++)
+      for (j = 0; j < alphaSize; j++)
+         if (length[j] == i) { perm[pp] = j; pp++; };
+
+   for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
+   for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
+
+   for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
+
+   for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
+   vec = 0;
+
+   for (i = minLen; i <= maxLen; i++) {
+      vec += (base[i+1] - base[i]);
+      limit[i] = vec-1;
+      vec <<= 1;
+   }
+   for (i = minLen + 1; i <= maxLen; i++)
+      base[i] = ((limit[i-1] + 1) << 1) - base[i];
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                         huffman.c ---*/
+/*-------------------------------------------------------------*/
+
+/*-------------------------------------------------------------*/
+/*--- Compression machinery (not incl block sorting)        ---*/
+/*---                                            compress.c ---*/
+/*-------------------------------------------------------------*/
+
+/*--
+  This file is a part of bzip2 and/or libbzip2, a program and
+  library for lossless, block-sorting data compression.
+
+  Copyright (C) 1996-2004 Julian R Seward.  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+  1. Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+  2. The origin of this software must not be misrepresented; you must 
+     not claim that you wrote the original software.  If you use this 
+     software in a product, an acknowledgment in the product 
+     documentation would be appreciated but is not required.
+
+  3. Altered source versions must be plainly marked as such, and must
+     not be misrepresented as being the original software.
+
+  4. The name of the author may not be used to endorse or promote 
+     products derived from this software without specific prior written 
+     permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  Julian Seward, Cambridge, UK.
+  jseward@bzip.org
+  bzip2/libbzip2 version 1.0 of 21 March 2000
+
+  This program is based on (at least) the work of:
+     Mike Burrows
+     David Wheeler
+     Peter Fenwick
+     Alistair Moffat
+     Radford Neal
+     Ian H. Witten
+     Robert Sedgewick
+     Jon L. Bentley
+
+  For more information on these sources, see the manual.
+--*/
+
+/*--
+   CHANGES
+   ~~~~~~~
+   0.9.0 -- original version.
+
+   0.9.0a/b -- no changes in this file.
+
+   0.9.0c
+      * changed setting of nGroups in sendMTFValues() so as to 
+        do a bit better on small files
+--*/
+
+
+
+/*---------------------------------------------------*/
+/*--- Bit stream I/O                              ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+void BZ2_bsInitWrite ( EState* s )
+{
+   s->bsLive = 0;
+   s->bsBuff = 0;
+}
+
+
+/*---------------------------------------------------*/
+static
+void bsFinishWrite ( EState* s )
+{
+   while (s->bsLive > 0) {
+      s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
+      s->numZ++;
+      s->bsBuff <<= 8;
+      s->bsLive -= 8;
+   }
+}
+
+
+/*---------------------------------------------------*/
+#define bsNEEDW(nz)                           \
+{                                             \
+   while (s->bsLive >= 8) {                   \
+      s->zbits[s->numZ]                       \
+         = (UChar)(s->bsBuff >> 24);          \
+      s->numZ++;                              \
+      s->bsBuff <<= 8;                        \
+      s->bsLive -= 8;                         \
+   }                                          \
+}
+
+
+/*---------------------------------------------------*/
+static
+__inline__
+void bsW ( EState* s, Int32 n, UInt32 v )
+{
+   bsNEEDW ( n );
+   s->bsBuff |= (v << (32 - s->bsLive - n));
+   s->bsLive += n;
+}
+
+
+/*---------------------------------------------------*/
+static
+void bsPutUInt32 ( EState* s, UInt32 u )
+{
+   bsW ( s, 8, (u >> 24) & 0xffL );
+   bsW ( s, 8, (u >> 16) & 0xffL );
+   bsW ( s, 8, (u >>  8) & 0xffL );
+   bsW ( s, 8,  u        & 0xffL );
+}
+
+
+/*---------------------------------------------------*/
+static
+void bsPutUChar ( EState* s, UChar c )
+{
+   bsW( s, 8, (UInt32)c );
+}
+
+
+/*---------------------------------------------------*/
+/*--- The back end proper                         ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+static
+void makeMaps_e ( EState* s )
+{
+   Int32 i;
+   s->nInUse = 0;
+   for (i = 0; i < 256; i++)
+      if (s->inUse[i]) {
+         s->unseqToSeq[i] = s->nInUse;
+         s->nInUse++;
+      }
+}
+
+
+/*---------------------------------------------------*/
+static
+void generateMTFValues ( EState* s )
+{
+   UChar   yy[256];
+   Int32   i, j;
+   Int32   zPend;
+   Int32   wr;
+   Int32   EOB;
+
+   /* 
+      After sorting (eg, here),
+         s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
+         and
+         ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] 
+         holds the original block data.
+
+      The first thing to do is generate the MTF values,
+      and put them in
+         ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
+      Because there are strictly fewer or equal MTF values
+      than block values, ptr values in this area are overwritten
+      with MTF values only when they are no longer needed.
+
+      The final compressed bitstream is generated into the
+      area starting at
+         (UChar*) (&((UChar*)s->arr2)[s->nblock])
+
+      These storage aliases are set up in bzCompressInit(),
+      except for the last one, which is arranged in 
+      compressBlock().
+   */
+   UInt32* ptr   = s->ptr;
+   UChar* block  = s->block;
+   UInt16* mtfv  = s->mtfv;
+
+   makeMaps_e ( s );
+   EOB = s->nInUse+1;
+
+   for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
+
+   wr = 0;
+   zPend = 0;
+   for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
+
+   for (i = 0; i < s->nblock; i++) {
+      UChar ll_i;
+      AssertD ( wr <= i, "generateMTFValues(1)" );
+      j = ptr[i]-1; if (j < 0) j += s->nblock;
+      ll_i = s->unseqToSeq[block[j]];
+      AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
+
+      if (yy[0] == ll_i) { 
+         zPend++;
+      } else {
+
+         if (zPend > 0) {
+            zPend--;
+            while (True) {
+               if (zPend & 1) {
+                  mtfv[wr] = BZ_RUNB; wr++; 
+                  s->mtfFreq[BZ_RUNB]++; 
+               } else {
+                  mtfv[wr] = BZ_RUNA; wr++; 
+                  s->mtfFreq[BZ_RUNA]++; 
+               }
+               if (zPend < 2) break;
+               zPend = (zPend - 2) / 2;
+            };
+            zPend = 0;
+         }
+         {
+            register UChar  rtmp;
+            register UChar* ryy_j;
+            register UChar  rll_i;
+            rtmp  = yy[1];
+            yy[1] = yy[0];
+            ryy_j = &(yy[1]);
+            rll_i = ll_i;
+            while ( rll_i != rtmp ) {
+               register UChar rtmp2;
+               ryy_j++;
+               rtmp2  = rtmp;
+               rtmp   = *ryy_j;
+               *ryy_j = rtmp2;
+            };
+            yy[0] = rtmp;
+            j = ryy_j - &(yy[0]);
+            mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
+         }
+
+      }
+   }
+
+   if (zPend > 0) {
+      zPend--;
+      while (True) {
+         if (zPend & 1) {
+            mtfv[wr] = BZ_RUNB; wr++; 
+            s->mtfFreq[BZ_RUNB]++; 
+         } else {
+            mtfv[wr] = BZ_RUNA; wr++; 
+            s->mtfFreq[BZ_RUNA]++; 
+         }
+         if (zPend < 2) break;
+         zPend = (zPend - 2) / 2;
+      };
+      zPend = 0;
+   }
+
+   mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
+
+   s->nMTF = wr;
+}
+
+
+/*---------------------------------------------------*/
+#define BZ_LESSER_ICOST  0
+#define BZ_GREATER_ICOST 15
+
+static
+void sendMTFValues ( EState* s )
+{
+   Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
+   Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
+   Int32 nGroups, nBytes;
+
+   /*--
+   UChar  len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+   is a global since the decoder also needs it.
+
+   Int32  code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+   Int32  rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+   are also globals only used in this proc.
+   Made global to keep stack frame size small.
+   --*/
+
+
+   UInt16 cost[BZ_N_GROUPS];
+   Int32  fave[BZ_N_GROUPS];
+
+   UInt16* mtfv = s->mtfv;
+
+   if (s->verbosity >= 3)
+      VPrintf3( "      %d in block, %d after MTF & 1-2 coding, "
+                "%d+2 syms in use\n", 
+                s->nblock, s->nMTF, s->nInUse );
+
+   alphaSize = s->nInUse+2;
+   for (t = 0; t < BZ_N_GROUPS; t++)
+      for (v = 0; v < alphaSize; v++)
+         s->len[t][v] = BZ_GREATER_ICOST;
+
+   /*--- Decide how many coding tables to use ---*/
+   AssertH ( s->nMTF > 0, 3001 );
+   if (s->nMTF < 200)  nGroups = 2; else
+   if (s->nMTF < 600)  nGroups = 3; else
+   if (s->nMTF < 1200) nGroups = 4; else
+   if (s->nMTF < 2400) nGroups = 5; else
+                       nGroups = 6;
+
+   /*--- Generate an initial set of coding tables ---*/
+   { 
+      Int32 nPart, remF, tFreq, aFreq;
+
+      nPart = nGroups;
+      remF  = s->nMTF;
+      gs = 0;
+      while (nPart > 0) {
+         tFreq = remF / nPart;
+         ge = gs-1;
+         aFreq = 0;
+         while (aFreq < tFreq && ge < alphaSize-1) {
+            ge++;
+            aFreq += s->mtfFreq[ge];
+         }
+
+         if (ge > gs 
+             && nPart != nGroups && nPart != 1 
+             && ((nGroups-nPart) % 2 == 1)) {
+            aFreq -= s->mtfFreq[ge];
+            ge--;
+         }
+
+         if (0 && s->verbosity >= 3)
+            VPrintf5( "      initial group %d, [%d .. %d], "
+                      "has %d syms (%4.1f%%)\n",
+                      nPart, gs, ge, aFreq, 
+                      (100.0 * (float)aFreq) / (float)(s->nMTF) );
+ 
+         for (v = 0; v < alphaSize; v++)
+            if (v >= gs && v <= ge) 
+               s->len[nPart-1][v] = BZ_LESSER_ICOST; else
+               s->len[nPart-1][v] = BZ_GREATER_ICOST;
+ 
+         nPart--;
+         gs = ge+1;
+         remF -= aFreq;
+      }
+   }
+
+   /*--- 
+      Iterate up to BZ_N_ITERS times to improve the tables.
+   ---*/
+   for (iter = 0; iter < BZ_N_ITERS; iter++) {
+
+      for (t = 0; t < nGroups; t++) fave[t] = 0;
+
+      for (t = 0; t < nGroups; t++)
+         for (v = 0; v < alphaSize; v++)
+            s->rfreq[t][v] = 0;
+
+      /*---
+        Set up an auxiliary length table which is used to fast-track
+	the common case (nGroups == 6). 
+      ---*/
+      if (nGroups == 6) {
+         for (v = 0; v < alphaSize; v++) {
+            s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
+            s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
+            s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
+	 }
+      }
+
+      nSelectors = 0;
+      totc = 0;
+      gs = 0;
+      while (True) {
+
+         /*--- Set group start & end marks. --*/
+         if (gs >= s->nMTF) break;
+         ge = gs + BZ_G_SIZE - 1; 
+         if (ge >= s->nMTF) ge = s->nMTF-1;
+
+         /*-- 
+            Calculate the cost of this group as coded
+            by each of the coding tables.
+         --*/
+         for (t = 0; t < nGroups; t++) cost[t] = 0;
+
+         if (nGroups == 6 && 50 == ge-gs+1) {
+            /*--- fast track the common case ---*/
+            register UInt32 cost01, cost23, cost45;
+            register UInt16 icv;
+            cost01 = cost23 = cost45 = 0;
+
+#           define BZ_ITER(nn)                \
+               icv = mtfv[gs+(nn)];           \
+               cost01 += s->len_pack[icv][0]; \
+               cost23 += s->len_pack[icv][1]; \
+               cost45 += s->len_pack[icv][2]; \
+
+            BZ_ITER(0);  BZ_ITER(1);  BZ_ITER(2);  BZ_ITER(3);  BZ_ITER(4);
+            BZ_ITER(5);  BZ_ITER(6);  BZ_ITER(7);  BZ_ITER(8);  BZ_ITER(9);
+            BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
+            BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
+            BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
+            BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
+            BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
+            BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
+            BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
+            BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
+
+#           undef BZ_ITER
+
+            cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
+            cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
+            cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
+
+         } else {
+	    /*--- slow version which correctly handles all situations ---*/
+            for (i = gs; i <= ge; i++) { 
+               UInt16 icv = mtfv[i];
+               for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
+            }
+         }
+ 
+         /*-- 
+            Find the coding table which is best for this group,
+            and record its identity in the selector table.
+         --*/
+         bc = 999999999; bt = -1;
+         for (t = 0; t < nGroups; t++)
+            if (cost[t] < bc) { bc = cost[t]; bt = t; };
+         totc += bc;
+         fave[bt]++;
+         s->selector[nSelectors] = bt;
+         nSelectors++;
+
+         /*-- 
+            Increment the symbol frequencies for the selected table.
+          --*/
+         if (nGroups == 6 && 50 == ge-gs+1) {
+            /*--- fast track the common case ---*/
+
+#           define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
+
+            BZ_ITUR(0);  BZ_ITUR(1);  BZ_ITUR(2);  BZ_ITUR(3);  BZ_ITUR(4);
+            BZ_ITUR(5);  BZ_ITUR(6);  BZ_ITUR(7);  BZ_ITUR(8);  BZ_ITUR(9);
+            BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
+            BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
+            BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
+            BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
+            BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
+            BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
+            BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
+            BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
+
+#           undef BZ_ITUR
+
+         } else {
+	    /*--- slow version which correctly handles all situations ---*/
+            for (i = gs; i <= ge; i++)
+               s->rfreq[bt][ mtfv[i] ]++;
+         }
+
+         gs = ge+1;
+      }
+      if (s->verbosity >= 3) {
+         VPrintf2 ( "      pass %d: size is %d, grp uses are ", 
+                   iter+1, totc/8 );
+         for (t = 0; t < nGroups; t++)
+            VPrintf1 ( "%d ", fave[t] );
+         VPrintf0 ( "\n" );
+      }
+
+      /*--
+        Recompute the tables based on the accumulated frequencies.
+      --*/
+      /* maxLen was changed from 20 to 17 in bzip2-1.0.3.  See 
+         comment in huffman.c for details. */
+      for (t = 0; t < nGroups; t++)
+         BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), 
+                                 alphaSize, 17 /*20*/ );
+   }
+
+
+   AssertH( nGroups < 8, 3002 );
+   AssertH( nSelectors < 32768 &&
+            nSelectors <= (2 + (900000 / BZ_G_SIZE)),
+            3003 );
+
+
+   /*--- Compute MTF values for the selectors. ---*/
+   {
+      UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
+      for (i = 0; i < nGroups; i++) pos[i] = i;
+      for (i = 0; i < nSelectors; i++) {
+         ll_i = s->selector[i];
+         j = 0;
+         tmp = pos[j];
+         while ( ll_i != tmp ) {
+            j++;
+            tmp2 = tmp;
+            tmp = pos[j];
+            pos[j] = tmp2;
+         };
+         pos[0] = tmp;
+         s->selectorMtf[i] = j;
+      }
+   };
+
+   /*--- Assign actual codes for the tables. --*/
+   for (t = 0; t < nGroups; t++) {
+      minLen = 32;
+      maxLen = 0;
+      for (i = 0; i < alphaSize; i++) {
+         if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
+         if (s->len[t][i] < minLen) minLen = s->len[t][i];
+      }
+      AssertH ( !(maxLen > 17 /*20*/ ), 3004 );
+      AssertH ( !(minLen < 1),  3005 );
+      BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), 
+                          minLen, maxLen, alphaSize );
+   }
+
+   /*--- Transmit the mapping table. ---*/
+   { 
+      Bool inUse16[16];
+      for (i = 0; i < 16; i++) {
+          inUse16[i] = False;
+          for (j = 0; j < 16; j++)
+             if (s->inUse[i * 16 + j]) inUse16[i] = True;
+      }
+     
+      nBytes = s->numZ;
+      for (i = 0; i < 16; i++)
+         if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
+
+      for (i = 0; i < 16; i++)
+         if (inUse16[i])
+            for (j = 0; j < 16; j++) {
+               if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
+            }
+
+      if (s->verbosity >= 3) 
+         VPrintf1( "      bytes: mapping %d, ", s->numZ-nBytes );
+   }
+
+   /*--- Now the selectors. ---*/
+   nBytes = s->numZ;
+   bsW ( s, 3, nGroups );
+   bsW ( s, 15, nSelectors );
+   for (i = 0; i < nSelectors; i++) { 
+      for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
+      bsW(s,1,0);
+   }
+   if (s->verbosity >= 3)
+      VPrintf1( "selectors %d, ", s->numZ-nBytes );
+
+   /*--- Now the coding tables. ---*/
+   nBytes = s->numZ;
+
+   for (t = 0; t < nGroups; t++) {
+      Int32 curr = s->len[t][0];
+      bsW ( s, 5, curr );
+      for (i = 0; i < alphaSize; i++) {
+         while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
+         while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
+         bsW ( s, 1, 0 );
+      }
+   }
+
+   if (s->verbosity >= 3)
+      VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
+
+   /*--- And finally, the block data proper ---*/
+   nBytes = s->numZ;
+   selCtr = 0;
+   gs = 0;
+   while (True) {
+      if (gs >= s->nMTF) break;
+      ge = gs + BZ_G_SIZE - 1; 
+      if (ge >= s->nMTF) ge = s->nMTF-1;
+      AssertH ( s->selector[selCtr] < nGroups, 3006 );
+
+      if (nGroups == 6 && 50 == ge-gs+1) {
+            /*--- fast track the common case ---*/
+            UInt16 mtfv_i;
+            UChar* s_len_sel_selCtr 
+               = &(s->len[s->selector[selCtr]][0]);
+            Int32* s_code_sel_selCtr
+               = &(s->code[s->selector[selCtr]][0]);
+
+#           define BZ_ITAH(nn)                      \
+               mtfv_i = mtfv[gs+(nn)];              \
+               bsW ( s,                             \
+                     s_len_sel_selCtr[mtfv_i],      \
+                     s_code_sel_selCtr[mtfv_i] )
+
+            BZ_ITAH(0);  BZ_ITAH(1);  BZ_ITAH(2);  BZ_ITAH(3);  BZ_ITAH(4);
+            BZ_ITAH(5);  BZ_ITAH(6);  BZ_ITAH(7);  BZ_ITAH(8);  BZ_ITAH(9);
+            BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
+            BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
+            BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
+            BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
+            BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
+            BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
+            BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
+            BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
+
+#           undef BZ_ITAH
+
+      } else {
+	 /*--- slow version which correctly handles all situations ---*/
+         for (i = gs; i <= ge; i++) {
+            bsW ( s, 
+                  s->len  [s->selector[selCtr]] [mtfv[i]],
+                  s->code [s->selector[selCtr]] [mtfv[i]] );
+         }
+      }
+
+
+      gs = ge+1;
+      selCtr++;
+   }
+   AssertH( selCtr == nSelectors, 3007 );
+
+   if (s->verbosity >= 3)
+      VPrintf1( "codes %d\n", s->numZ-nBytes );
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_compressBlock ( EState* s, Bool is_last_block )
+{
+   if (s->nblock > 0) {
+
+      BZ_FINALISE_CRC ( s->blockCRC );
+      s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
+      s->combinedCRC ^= s->blockCRC;
+      if (s->blockNo > 1) s->numZ = 0;
+
+      if (s->verbosity >= 2)
+         VPrintf4( "    block %d: crc = 0x%08x, "
+                   "combined CRC = 0x%08x, size = %d\n",
+                   s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
+
+      BZ2_blockSort ( s );
+   }
+
+   s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
+
+   /*-- If this is the first block, create the stream header. --*/
+   if (s->blockNo == 1) {
+      BZ2_bsInitWrite ( s );
+      bsPutUChar ( s, BZ_HDR_B );
+      bsPutUChar ( s, BZ_HDR_Z );
+      bsPutUChar ( s, BZ_HDR_h );
+      bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
+   }
+
+   if (s->nblock > 0) {
+
+      bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
+      bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
+      bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
+
+      /*-- Now the block's CRC, so it is in a known place. --*/
+      bsPutUInt32 ( s, s->blockCRC );
+
+      /*-- 
+         Now a single bit indicating (non-)randomisation. 
+         As of version 0.9.5, we use a better sorting algorithm
+         which makes randomisation unnecessary.  So always set
+         the randomised bit to 'no'.  Of course, the decoder
+         still needs to be able to handle randomised blocks
+         so as to maintain backwards compatibility with
+         older versions of bzip2.
+      --*/
+      bsW(s,1,0);
+
+      bsW ( s, 24, s->origPtr );
+      generateMTFValues ( s );
+      sendMTFValues ( s );
+   }
+
+
+   /*-- If this is the last block, add the stream trailer. --*/
+   if (is_last_block) {
+
+      bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
+      bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
+      bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
+      bsPutUInt32 ( s, s->combinedCRC );
+      if (s->verbosity >= 2)
+         VPrintf1( "    final combined CRC = 0x%08x\n   ", s->combinedCRC );
+      bsFinishWrite ( s );
+   }
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                        compress.c ---*/
+/*-------------------------------------------------------------*/
+
+
+/*-------------------------------------------------------------*/
+/*--- Table for randomising repetitive blocks               ---*/
+/*---                                           randtable.c ---*/
+/*-------------------------------------------------------------*/
+
+/*--
+  This file is a part of bzip2 and/or libbzip2, a program and
+  library for lossless, block-sorting data compression.
+
+  Copyright (C) 1996-2004 Julian R Seward.  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+  1. Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+  2. The origin of this software must not be misrepresented; you must 
+     not claim that you wrote the original software.  If you use this 
+     software in a product, an acknowledgment in the product 
+     documentation would be appreciated but is not required.
+
+  3. Altered source versions must be plainly marked as such, and must
+     not be misrepresented as being the original software.
+
+  4. The name of the author may not be used to endorse or promote 
+     products derived from this software without specific prior written 
+     permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  Julian Seward, Cambridge, UK.
+  jseward@bzip.org
+  bzip2/libbzip2 version 1.0 of 21 March 2000
+
+  This program is based on (at least) the work of:
+     Mike Burrows
+     David Wheeler
+     Peter Fenwick
+     Alistair Moffat
+     Radford Neal
+     Ian H. Witten
+     Robert Sedgewick
+     Jon L. Bentley
+
+  For more information on these sources, see the manual.
+--*/
+
+
+
+
+/*---------------------------------------------*/
+Int32 BZ2_rNums[512] = { 
+   619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 
+   985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 
+   733, 859, 335, 708, 621, 574, 73, 654, 730, 472, 
+   419, 436, 278, 496, 867, 210, 399, 680, 480, 51, 
+   878, 465, 811, 169, 869, 675, 611, 697, 867, 561, 
+   862, 687, 507, 283, 482, 129, 807, 591, 733, 623, 
+   150, 238, 59, 379, 684, 877, 625, 169, 643, 105, 
+   170, 607, 520, 932, 727, 476, 693, 425, 174, 647, 
+   73, 122, 335, 530, 442, 853, 695, 249, 445, 515, 
+   909, 545, 703, 919, 874, 474, 882, 500, 594, 612, 
+   641, 801, 220, 162, 819, 984, 589, 513, 495, 799, 
+   161, 604, 958, 533, 221, 400, 386, 867, 600, 782, 
+   382, 596, 414, 171, 516, 375, 682, 485, 911, 276, 
+   98, 553, 163, 354, 666, 933, 424, 341, 533, 870, 
+   227, 730, 475, 186, 263, 647, 537, 686, 600, 224, 
+   469, 68, 770, 919, 190, 373, 294, 822, 808, 206, 
+   184, 943, 795, 384, 383, 461, 404, 758, 839, 887, 
+   715, 67, 618, 276, 204, 918, 873, 777, 604, 560, 
+   951, 160, 578, 722, 79, 804, 96, 409, 713, 940, 
+   652, 934, 970, 447, 318, 353, 859, 672, 112, 785, 
+   645, 863, 803, 350, 139, 93, 354, 99, 820, 908, 
+   609, 772, 154, 274, 580, 184, 79, 626, 630, 742, 
+   653, 282, 762, 623, 680, 81, 927, 626, 789, 125, 
+   411, 521, 938, 300, 821, 78, 343, 175, 128, 250, 
+   170, 774, 972, 275, 999, 639, 495, 78, 352, 126, 
+   857, 956, 358, 619, 580, 124, 737, 594, 701, 612, 
+   669, 112, 134, 694, 363, 992, 809, 743, 168, 974, 
+   944, 375, 748, 52, 600, 747, 642, 182, 862, 81, 
+   344, 805, 988, 739, 511, 655, 814, 334, 249, 515, 
+   897, 955, 664, 981, 649, 113, 974, 459, 893, 228, 
+   433, 837, 553, 268, 926, 240, 102, 654, 459, 51, 
+   686, 754, 806, 760, 493, 403, 415, 394, 687, 700, 
+   946, 670, 656, 610, 738, 392, 760, 799, 887, 653, 
+   978, 321, 576, 617, 626, 502, 894, 679, 243, 440, 
+   680, 879, 194, 572, 640, 724, 926, 56, 204, 700, 
+   707, 151, 457, 449, 797, 195, 791, 558, 945, 679, 
+   297, 59, 87, 824, 713, 663, 412, 693, 342, 606, 
+   134, 108, 571, 364, 631, 212, 174, 643, 304, 329, 
+   343, 97, 430, 751, 497, 314, 983, 374, 822, 928, 
+   140, 206, 73, 263, 980, 736, 876, 478, 430, 305, 
+   170, 514, 364, 692, 829, 82, 855, 953, 676, 246, 
+   369, 970, 294, 750, 807, 827, 150, 790, 288, 923, 
+   804, 378, 215, 828, 592, 281, 565, 555, 710, 82, 
+   896, 831, 547, 261, 524, 462, 293, 465, 502, 56, 
+   661, 821, 976, 991, 658, 869, 905, 758, 745, 193, 
+   768, 550, 608, 933, 378, 286, 215, 979, 792, 961, 
+   61, 688, 793, 644, 986, 403, 106, 366, 905, 644, 
+   372, 567, 466, 434, 645, 210, 389, 550, 919, 135, 
+   780, 773, 635, 389, 707, 100, 626, 958, 165, 504, 
+   920, 176, 193, 713, 857, 265, 203, 50, 668, 108, 
+   645, 990, 626, 197, 510, 357, 358, 850, 858, 364, 
+   936, 638
+};
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                       randtable.c ---*/
+/*-------------------------------------------------------------*/
+
+/*-------------------------------------------------------------*/
+/*--- Table for doing CRCs                                  ---*/
+/*---                                            crctable.c ---*/
+/*-------------------------------------------------------------*/
+
+/*--
+  This file is a part of bzip2 and/or libbzip2, a program and
+  library for lossless, block-sorting data compression.
+
+  Copyright (C) 1996-2004 Julian R Seward.  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+  1. Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+  2. The origin of this software must not be misrepresented; you must 
+     not claim that you wrote the original software.  If you use this 
+     software in a product, an acknowledgment in the product 
+     documentation would be appreciated but is not required.
+
+  3. Altered source versions must be plainly marked as such, and must
+     not be misrepresented as being the original software.
+
+  4. The name of the author may not be used to endorse or promote 
+     products derived from this software without specific prior written 
+     permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  Julian Seward, Cambridge, UK.
+  jseward@bzip.org
+  bzip2/libbzip2 version 1.0 of 21 March 2000
+
+  This program is based on (at least) the work of:
+     Mike Burrows
+     David Wheeler
+     Peter Fenwick
+     Alistair Moffat
+     Radford Neal
+     Ian H. Witten
+     Robert Sedgewick
+     Jon L. Bentley
+
+  For more information on these sources, see the manual.
+--*/
+
+
+
+
+
+/*--
+  I think this is an implementation of the AUTODIN-II,
+  Ethernet & FDDI 32-bit CRC standard.  Vaguely derived
+  from code by Rob Warnock, in Section 51 of the
+  comp.compression FAQ.
+--*/
+
+UInt32 BZ2_crc32Table[256] = {
+
+   /*-- Ugly, innit? --*/
+
+   0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
+   0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
+   0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
+   0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
+   0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
+   0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
+   0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
+   0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
+   0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
+   0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
+   0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
+   0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
+   0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
+   0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
+   0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
+   0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
+   0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
+   0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
+   0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
+   0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
+   0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
+   0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
+   0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
+   0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
+   0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
+   0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
+   0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
+   0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
+   0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
+   0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
+   0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
+   0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
+   0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
+   0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
+   0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
+   0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
+   0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
+   0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
+   0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
+   0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
+   0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
+   0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
+   0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
+   0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
+   0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
+   0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
+   0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
+   0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
+   0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
+   0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
+   0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
+   0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
+   0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
+   0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
+   0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
+   0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
+   0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
+   0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
+   0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
+   0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
+   0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
+   0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
+   0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
+   0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
+};
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                        crctable.c ---*/
+/*-------------------------------------------------------------*/
+
+/*-------------------------------------------------------------*/
+/*--- Library top-level functions.                          ---*/
+/*---                                               bzlib.c ---*/
+/*-------------------------------------------------------------*/
+
+/*--
+  This file is a part of bzip2 and/or libbzip2, a program and
+  library for lossless, block-sorting data compression.
+
+  Copyright (C) 1996-2004 Julian R Seward.  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+  1. Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+  2. The origin of this software must not be misrepresented; you must 
+     not claim that you wrote the original software.  If you use this 
+     software in a product, an acknowledgment in the product 
+     documentation would be appreciated but is not required.
+
+  3. Altered source versions must be plainly marked as such, and must
+     not be misrepresented as being the original software.
+
+  4. The name of the author may not be used to endorse or promote 
+     products derived from this software without specific prior written 
+     permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+  Julian Seward, Cambridge, UK.
+  jseward@bzip.org
+  bzip2/libbzip2 version 1.0 of 21 March 2000
+
+  This program is based on (at least) the work of:
+     Mike Burrows
+     David Wheeler
+     Peter Fenwick
+     Alistair Moffat
+     Radford Neal
+     Ian H. Witten
+     Robert Sedgewick
+     Jon L. Bentley
+
+  For more information on these sources, see the manual.
+--*/
+
+/*--
+   CHANGES
+   ~~~~~~~
+   0.9.0 -- original version.
+
+   0.9.0a/b -- no changes in this file.
+
+   0.9.0c
+      * made zero-length BZ_FLUSH work correctly in bzCompress().
+      * fixed bzWrite/bzRead to ignore zero-length requests.
+      * fixed bzread to correctly handle read requests after EOF.
+      * wrong parameter order in call to bzDecompressInit in
+        bzBuffToBuffDecompress.  Fixed.
+--*/
+
+
+
+/*---------------------------------------------------*/
+/*--- Compression stuff                           ---*/
+/*---------------------------------------------------*/
+
+
+/*---------------------------------------------------*/
+void BZ2_bz__AssertH__fail ( int errcode )
+{
+   vex_printf("BZ2_bz__AssertH__fail(%d) called, exiting\n", errcode);
+   (*serviceFn)(0,0);
+}
+
+void bz_internal_error ( int errcode )
+{
+   vex_printf("bz_internal_error called, exiting\n", errcode);
+   (*serviceFn)(0,0);
+}
+
+/*---------------------------------------------------*/
+static
+int bz_config_ok ( void )
+{
+   if (sizeof(int)   != 4) return 0;
+   if (sizeof(short) != 2) return 0;
+   if (sizeof(char)  != 1) return 0;
+   return 1;
+}
+
+
+/*---------------------------------------------------*/
+static
+void* default_bzalloc ( void* opaque, Int32 items, Int32 size )
+{
+   void* v = (void*) (*serviceFn)(2, items * size );
+   return v;
+}
+
+static
+void default_bzfree ( void* opaque, void* addr )
+{
+   if (addr != NULL) (*serviceFn)( 3, (HWord)addr );
+}
+
+
+/*---------------------------------------------------*/
+static
+void prepare_new_block ( EState* s )
+{
+   Int32 i;
+   s->nblock = 0;
+   s->numZ = 0;
+   s->state_out_pos = 0;
+   BZ_INITIALISE_CRC ( s->blockCRC );
+   for (i = 0; i < 256; i++) s->inUse[i] = False;
+   s->blockNo++;
+}
+
+
+/*---------------------------------------------------*/
+static
+void init_RL ( EState* s )
+{
+   s->state_in_ch  = 256;
+   s->state_in_len = 0;
+}
+
+
+static
+Bool isempty_RL ( EState* s )
+{
+   if (s->state_in_ch < 256 && s->state_in_len > 0)
+      return False; else
+      return True;
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzCompressInit) 
+                    ( bz_stream* strm, 
+                     int        blockSize100k,
+                     int        verbosity,
+                     int        workFactor )
+{
+   Int32   n;
+   EState* s;
+
+   if (!bz_config_ok()) return BZ_CONFIG_ERROR;
+
+   if (strm == NULL || 
+       blockSize100k < 1 || blockSize100k > 9 ||
+       workFactor < 0 || workFactor > 250)
+     return BZ_PARAM_ERROR;
+
+   if (workFactor == 0) workFactor = 30;
+   if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc;
+   if (strm->bzfree == NULL) strm->bzfree = default_bzfree;
+
+   s = BZALLOC( sizeof(EState) );
+   if (s == NULL) return BZ_MEM_ERROR;
+   s->strm = strm;
+
+   s->arr1 = NULL;
+   s->arr2 = NULL;
+   s->ftab = NULL;
+
+   n       = 100000 * blockSize100k;
+   s->arr1 = BZALLOC( n                  * sizeof(UInt32) );
+   s->arr2 = BZALLOC( (n+BZ_N_OVERSHOOT) * sizeof(UInt32) );
+   s->ftab = BZALLOC( 65537              * sizeof(UInt32) );
+
+   if (s->arr1 == NULL || s->arr2 == NULL || s->ftab == NULL) {
+      if (s->arr1 != NULL) BZFREE(s->arr1);
+      if (s->arr2 != NULL) BZFREE(s->arr2);
+      if (s->ftab != NULL) BZFREE(s->ftab);
+      if (s       != NULL) BZFREE(s);
+      return BZ_MEM_ERROR;
+   }
+
+   s->blockNo           = 0;
+   s->state             = BZ_S_INPUT;
+   s->mode              = BZ_M_RUNNING;
+   s->combinedCRC       = 0;
+   s->blockSize100k     = blockSize100k;
+   s->nblockMAX         = 100000 * blockSize100k - 19;
+   s->verbosity         = verbosity;
+   s->workFactor        = workFactor;
+
+   s->block             = (UChar*)s->arr2;
+   s->mtfv              = (UInt16*)s->arr1;
+   s->zbits             = NULL;
+   s->ptr               = (UInt32*)s->arr1;
+
+   strm->state          = s;
+   strm->total_in_lo32  = 0;
+   strm->total_in_hi32  = 0;
+   strm->total_out_lo32 = 0;
+   strm->total_out_hi32 = 0;
+   init_RL ( s );
+   prepare_new_block ( s );
+   return BZ_OK;
+}
+
+
+/*---------------------------------------------------*/
+static
+void add_pair_to_block ( EState* s )
+{
+   Int32 i;
+   UChar ch = (UChar)(s->state_in_ch);
+   for (i = 0; i < s->state_in_len; i++) {
+      BZ_UPDATE_CRC( s->blockCRC, ch );
+   }
+   s->inUse[s->state_in_ch] = True;
+   switch (s->state_in_len) {
+      case 1:
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         break;
+      case 2:
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         break;
+      case 3:
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         break;
+      default:
+         s->inUse[s->state_in_len-4] = True;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = (UChar)ch; s->nblock++;
+         s->block[s->nblock] = ((UChar)(s->state_in_len-4));
+         s->nblock++;
+         break;
+   }
+}
+
+
+/*---------------------------------------------------*/
+static
+void flush_RL ( EState* s )
+{
+   if (s->state_in_ch < 256) add_pair_to_block ( s );
+   init_RL ( s );
+}
+
+
+/*---------------------------------------------------*/
+#define ADD_CHAR_TO_BLOCK(zs,zchh0)               \
+{                                                 \
+   UInt32 zchh = (UInt32)(zchh0);                 \
+   /*-- fast track the common case --*/           \
+   if (zchh != zs->state_in_ch &&                 \
+       zs->state_in_len == 1) {                   \
+      UChar ch = (UChar)(zs->state_in_ch);        \
+      BZ_UPDATE_CRC( zs->blockCRC, ch );          \
+      zs->inUse[zs->state_in_ch] = True;          \
+      zs->block[zs->nblock] = (UChar)ch;          \
+      zs->nblock++;                               \
+      zs->state_in_ch = zchh;                     \
+   }                                              \
+   else                                           \
+   /*-- general, uncommon cases --*/              \
+   if (zchh != zs->state_in_ch ||                 \
+      zs->state_in_len == 255) {                  \
+      if (zs->state_in_ch < 256)                  \
+         add_pair_to_block ( zs );                \
+      zs->state_in_ch = zchh;                     \
+      zs->state_in_len = 1;                       \
+   } else {                                       \
+      zs->state_in_len++;                         \
+   }                                              \
+}
+
+
+/*---------------------------------------------------*/
+static
+Bool copy_input_until_stop ( EState* s )
+{
+   Bool progress_in = False;
+
+   if (s->mode == BZ_M_RUNNING) {
+
+      /*-- fast track the common case --*/
+      while (True) {
+         /*-- block full? --*/
+         if (s->nblock >= s->nblockMAX) break;
+         /*-- no input? --*/
+         if (s->strm->avail_in == 0) break;
+         progress_in = True;
+         ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); 
+         s->strm->next_in++;
+         s->strm->avail_in--;
+         s->strm->total_in_lo32++;
+         if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
+      }
+
+   } else {
+
+      /*-- general, uncommon case --*/
+      while (True) {
+         /*-- block full? --*/
+         if (s->nblock >= s->nblockMAX) break;
+         /*-- no input? --*/
+         if (s->strm->avail_in == 0) break;
+         /*-- flush/finish end? --*/
+         if (s->avail_in_expect == 0) break;
+         progress_in = True;
+         ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); 
+         s->strm->next_in++;
+         s->strm->avail_in--;
+         s->strm->total_in_lo32++;
+         if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
+         s->avail_in_expect--;
+      }
+   }
+   return progress_in;
+}
+
+
+/*---------------------------------------------------*/
+static
+Bool copy_output_until_stop ( EState* s )
+{
+   Bool progress_out = False;
+
+   while (True) {
+
+      /*-- no output space? --*/
+      if (s->strm->avail_out == 0) break;
+
+      /*-- block done? --*/
+      if (s->state_out_pos >= s->numZ) break;
+
+      progress_out = True;
+      *(s->strm->next_out) = s->zbits[s->state_out_pos];
+      s->state_out_pos++;
+      s->strm->avail_out--;
+      s->strm->next_out++;
+      s->strm->total_out_lo32++;
+      if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+   }
+
+   return progress_out;
+}
+
+
+/*---------------------------------------------------*/
+static
+Bool handle_compress ( bz_stream* strm )
+{
+   Bool progress_in  = False;
+   Bool progress_out = False;
+   EState* s = strm->state;
+   
+   while (True) {
+
+      if (s->state == BZ_S_OUTPUT) {
+         progress_out |= copy_output_until_stop ( s );
+         if (s->state_out_pos < s->numZ) break;
+         if (s->mode == BZ_M_FINISHING && 
+             s->avail_in_expect == 0 &&
+             isempty_RL(s)) break;
+         prepare_new_block ( s );
+         s->state = BZ_S_INPUT;
+         if (s->mode == BZ_M_FLUSHING && 
+             s->avail_in_expect == 0 &&
+             isempty_RL(s)) break;
+      }
+
+      if (s->state == BZ_S_INPUT) {
+         progress_in |= copy_input_until_stop ( s );
+         if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) {
+            flush_RL ( s );
+            BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) );
+            s->state = BZ_S_OUTPUT;
+         }
+         else
+         if (s->nblock >= s->nblockMAX) {
+            BZ2_compressBlock ( s, False );
+            s->state = BZ_S_OUTPUT;
+         }
+         else
+         if (s->strm->avail_in == 0) {
+            break;
+         }
+      }
+
+   }
+
+   return progress_in || progress_out;
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action )
+{
+   Bool progress;
+   EState* s;
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   s = strm->state;
+   if (s == NULL) return BZ_PARAM_ERROR;
+   if (s->strm != strm) return BZ_PARAM_ERROR;
+
+   preswitch:
+   switch (s->mode) {
+
+      case BZ_M_IDLE:
+         return BZ_SEQUENCE_ERROR;
+
+      case BZ_M_RUNNING:
+         if (action == BZ_RUN) {
+            progress = handle_compress ( strm );
+            return progress ? BZ_RUN_OK : BZ_PARAM_ERROR;
+         } 
+         else
+	 if (action == BZ_FLUSH) {
+            s->avail_in_expect = strm->avail_in;
+            s->mode = BZ_M_FLUSHING;
+            goto preswitch;
+         }
+         else
+         if (action == BZ_FINISH) {
+            s->avail_in_expect = strm->avail_in;
+            s->mode = BZ_M_FINISHING;
+            goto preswitch;
+         }
+         else 
+            return BZ_PARAM_ERROR;
+
+      case BZ_M_FLUSHING:
+         if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR;
+         if (s->avail_in_expect != s->strm->avail_in) 
+            return BZ_SEQUENCE_ERROR;
+         progress = handle_compress ( strm );
+         if (s->avail_in_expect > 0 || !isempty_RL(s) ||
+             s->state_out_pos < s->numZ) return BZ_FLUSH_OK;
+         s->mode = BZ_M_RUNNING;
+         return BZ_RUN_OK;
+
+      case BZ_M_FINISHING:
+         if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR;
+         if (s->avail_in_expect != s->strm->avail_in) 
+            return BZ_SEQUENCE_ERROR;
+         progress = handle_compress ( strm );
+         if (!progress) return BZ_SEQUENCE_ERROR;
+         if (s->avail_in_expect > 0 || !isempty_RL(s) ||
+             s->state_out_pos < s->numZ) return BZ_FINISH_OK;
+         s->mode = BZ_M_IDLE;
+         return BZ_STREAM_END;
+   }
+   return BZ_OK; /*--not reached--*/
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzCompressEnd)  ( bz_stream *strm )
+{
+   EState* s;
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   s = strm->state;
+   if (s == NULL) return BZ_PARAM_ERROR;
+   if (s->strm != strm) return BZ_PARAM_ERROR;
+
+   if (s->arr1 != NULL) BZFREE(s->arr1);
+   if (s->arr2 != NULL) BZFREE(s->arr2);
+   if (s->ftab != NULL) BZFREE(s->ftab);
+   BZFREE(strm->state);
+
+   strm->state = NULL;   
+
+   return BZ_OK;
+}
+
+
+/*---------------------------------------------------*/
+/*--- Decompression stuff                         ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzDecompressInit) 
+                     ( bz_stream* strm, 
+                       int        verbosity,
+                       int        small )
+{
+   DState* s;
+
+   if (!bz_config_ok()) return BZ_CONFIG_ERROR;
+
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   if (small != 0 && small != 1) return BZ_PARAM_ERROR;
+   if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR;
+
+   if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc;
+   if (strm->bzfree == NULL) strm->bzfree = default_bzfree;
+
+   s = BZALLOC( sizeof(DState) );
+   if (s == NULL) return BZ_MEM_ERROR;
+   s->strm                  = strm;
+   strm->state              = s;
+   s->state                 = BZ_X_MAGIC_1;
+   s->bsLive                = 0;
+   s->bsBuff                = 0;
+   s->calculatedCombinedCRC = 0;
+   strm->total_in_lo32      = 0;
+   strm->total_in_hi32      = 0;
+   strm->total_out_lo32     = 0;
+   strm->total_out_hi32     = 0;
+   s->smallDecompress       = (Bool)small;
+   s->ll4                   = NULL;
+   s->ll16                  = NULL;
+   s->tt                    = NULL;
+   s->currBlockNo           = 0;
+   s->verbosity             = verbosity;
+
+   return BZ_OK;
+}
+
+
+/*---------------------------------------------------*/
+/* Return  True iff data corruption is discovered.
+   Returns False if there is no problem.
+*/
+static
+Bool unRLE_obuf_to_output_FAST ( DState* s )
+{
+   UChar k1;
+
+   if (s->blockRandomised) {
+
+      while (True) {
+         /* try to finish existing run */
+         while (True) {
+            if (s->strm->avail_out == 0) return False;
+            if (s->state_out_len == 0) break;
+            *( (UChar*)(s->strm->next_out) ) = s->state_out_ch;
+            BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch );
+            s->state_out_len--;
+            s->strm->next_out++;
+            s->strm->avail_out--;
+            s->strm->total_out_lo32++;
+            if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+         }
+
+         /* can a new run be started? */
+         if (s->nblock_used == s->save_nblock+1) return False;
+               
+         /* Only caused by corrupt data stream? */
+         if (s->nblock_used > s->save_nblock+1)
+            return True;
+   
+         s->state_out_len = 1;
+         s->state_out_ch = s->k0;
+         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 2;
+         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 3;
+         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         s->state_out_len = ((Int32)k1) + 4;
+         BZ_GET_FAST(s->k0); BZ_RAND_UPD_MASK; 
+         s->k0 ^= BZ_RAND_MASK; s->nblock_used++;
+      }
+
+   } else {
+
+      /* restore */
+      UInt32        c_calculatedBlockCRC = s->calculatedBlockCRC;
+      UChar         c_state_out_ch       = s->state_out_ch;
+      Int32         c_state_out_len      = s->state_out_len;
+      Int32         c_nblock_used        = s->nblock_used;
+      Int32         c_k0                 = s->k0;
+      UInt32*       c_tt                 = s->tt;
+      UInt32        c_tPos               = s->tPos;
+      char*         cs_next_out          = s->strm->next_out;
+      unsigned int  cs_avail_out         = s->strm->avail_out;
+      /* end restore */
+
+      UInt32       avail_out_INIT = cs_avail_out;
+      Int32        s_save_nblockPP = s->save_nblock+1;
+      unsigned int total_out_lo32_old;
+
+      while (True) {
+
+         /* try to finish existing run */
+         if (c_state_out_len > 0) {
+            while (True) {
+               if (cs_avail_out == 0) goto return_notr;
+               if (c_state_out_len == 1) break;
+               *( (UChar*)(cs_next_out) ) = c_state_out_ch;
+               BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch );
+               c_state_out_len--;
+               cs_next_out++;
+               cs_avail_out--;
+            }
+            s_state_out_len_eq_one:
+            {
+               if (cs_avail_out == 0) { 
+                  c_state_out_len = 1; goto return_notr;
+               };
+               *( (UChar*)(cs_next_out) ) = c_state_out_ch;
+               BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch );
+               cs_next_out++;
+               cs_avail_out--;
+            }
+         }   
+         /* Only caused by corrupt data stream? */
+         if (c_nblock_used > s_save_nblockPP)
+            return True;
+
+         /* can a new run be started? */
+         if (c_nblock_used == s_save_nblockPP) {
+            c_state_out_len = 0; goto return_notr;
+         };   
+         c_state_out_ch = c_k0;
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         if (k1 != c_k0) { 
+            c_k0 = k1; goto s_state_out_len_eq_one; 
+         };
+         if (c_nblock_used == s_save_nblockPP) 
+            goto s_state_out_len_eq_one;
+   
+         c_state_out_len = 2;
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         if (c_nblock_used == s_save_nblockPP) continue;
+         if (k1 != c_k0) { c_k0 = k1; continue; };
+   
+         c_state_out_len = 3;
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         if (c_nblock_used == s_save_nblockPP) continue;
+         if (k1 != c_k0) { c_k0 = k1; continue; };
+   
+         BZ_GET_FAST_C(k1); c_nblock_used++;
+         c_state_out_len = ((Int32)k1) + 4;
+         BZ_GET_FAST_C(c_k0); c_nblock_used++;
+      }
+
+      return_notr:
+      total_out_lo32_old = s->strm->total_out_lo32;
+      s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out);
+      if (s->strm->total_out_lo32 < total_out_lo32_old)
+         s->strm->total_out_hi32++;
+
+      /* save */
+      s->calculatedBlockCRC = c_calculatedBlockCRC;
+      s->state_out_ch       = c_state_out_ch;
+      s->state_out_len      = c_state_out_len;
+      s->nblock_used        = c_nblock_used;
+      s->k0                 = c_k0;
+      s->tt                 = c_tt;
+      s->tPos               = c_tPos;
+      s->strm->next_out     = cs_next_out;
+      s->strm->avail_out    = cs_avail_out;
+      /* end save */
+   }
+   return False;
+}
+
+
+
+/*---------------------------------------------------*/
+/* Return  True iff data corruption is discovered.
+   Returns False if there is no problem.
+*/
+static
+Bool unRLE_obuf_to_output_SMALL ( DState* s )
+{
+   UChar k1;
+
+   if (s->blockRandomised) {
+
+      while (True) {
+         /* try to finish existing run */
+         while (True) {
+            if (s->strm->avail_out == 0) return False;
+            if (s->state_out_len == 0) break;
+            *( (UChar*)(s->strm->next_out) ) = s->state_out_ch;
+            BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch );
+            s->state_out_len--;
+            s->strm->next_out++;
+            s->strm->avail_out--;
+            s->strm->total_out_lo32++;
+            if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+         }
+   
+         /* can a new run be started? */
+         if (s->nblock_used == s->save_nblock+1) return False;
+
+         /* Only caused by corrupt data stream? */
+         if (s->nblock_used > s->save_nblock+1)
+            return True;
+   
+         s->state_out_len = 1;
+         s->state_out_ch = s->k0;
+         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 2;
+         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 3;
+         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 
+         k1 ^= BZ_RAND_MASK; s->nblock_used++;
+         s->state_out_len = ((Int32)k1) + 4;
+         BZ_GET_SMALL(s->k0); BZ_RAND_UPD_MASK; 
+         s->k0 ^= BZ_RAND_MASK; s->nblock_used++;
+      }
+
+   } else {
+
+      while (True) {
+         /* try to finish existing run */
+         while (True) {
+            if (s->strm->avail_out == 0) return False;
+            if (s->state_out_len == 0) break;
+            *( (UChar*)(s->strm->next_out) ) = s->state_out_ch;
+            BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch );
+            s->state_out_len--;
+            s->strm->next_out++;
+            s->strm->avail_out--;
+            s->strm->total_out_lo32++;
+            if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+         }
+   
+         /* can a new run be started? */
+         if (s->nblock_used == s->save_nblock+1) return False;
+
+         /* Only caused by corrupt data stream? */
+         if (s->nblock_used > s->save_nblock+1)
+            return True;
+   
+         s->state_out_len = 1;
+         s->state_out_ch = s->k0;
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 2;
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         s->state_out_len = 3;
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         if (s->nblock_used == s->save_nblock+1) continue;
+         if (k1 != s->k0) { s->k0 = k1; continue; };
+   
+         BZ_GET_SMALL(k1); s->nblock_used++;
+         s->state_out_len = ((Int32)k1) + 4;
+         BZ_GET_SMALL(s->k0); s->nblock_used++;
+      }
+
+   }
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzDecompress) ( bz_stream *strm )
+{
+   Bool    corrupt;
+   DState* s;
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   s = strm->state;
+   if (s == NULL) return BZ_PARAM_ERROR;
+   if (s->strm != strm) return BZ_PARAM_ERROR;
+
+   while (True) {
+      if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR;
+      if (s->state == BZ_X_OUTPUT) {
+         if (s->smallDecompress)
+            corrupt = unRLE_obuf_to_output_SMALL ( s ); else
+            corrupt = unRLE_obuf_to_output_FAST  ( s );
+         if (corrupt) return BZ_DATA_ERROR;
+         if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) {
+            BZ_FINALISE_CRC ( s->calculatedBlockCRC );
+            if (s->verbosity >= 3) 
+               VPrintf2 ( " {0x%08x, 0x%08x}", s->storedBlockCRC, 
+                          s->calculatedBlockCRC );
+            if (s->verbosity >= 2) VPrintf0 ( "]" );
+            if (s->calculatedBlockCRC != s->storedBlockCRC)
+               return BZ_DATA_ERROR;
+            s->calculatedCombinedCRC 
+               = (s->calculatedCombinedCRC << 1) | 
+                    (s->calculatedCombinedCRC >> 31);
+            s->calculatedCombinedCRC ^= s->calculatedBlockCRC;
+            s->state = BZ_X_BLKHDR_1;
+         } else {
+            return BZ_OK;
+         }
+      }
+      if (s->state >= BZ_X_MAGIC_1) {
+         Int32 r = BZ2_decompress ( s );
+         if (r == BZ_STREAM_END) {
+            if (s->verbosity >= 3)
+               VPrintf2 ( "\n    combined CRCs: stored = 0x%08x, computed = 0x%08x", 
+                          s->storedCombinedCRC, s->calculatedCombinedCRC );
+            if (s->calculatedCombinedCRC != s->storedCombinedCRC)
+               return BZ_DATA_ERROR;
+            return r;
+         }
+         if (s->state != BZ_X_OUTPUT) return r;
+      }
+   }
+
+   AssertH ( 0, 6001 );
+
+   return 0;  /*NOTREACHED*/
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzDecompressEnd)  ( bz_stream *strm )
+{
+   DState* s;
+   if (strm == NULL) return BZ_PARAM_ERROR;
+   s = strm->state;
+   if (s == NULL) return BZ_PARAM_ERROR;
+   if (s->strm != strm) return BZ_PARAM_ERROR;
+
+   if (s->tt   != NULL) BZFREE(s->tt);
+   if (s->ll16 != NULL) BZFREE(s->ll16);
+   if (s->ll4  != NULL) BZFREE(s->ll4);
+
+   BZFREE(strm->state);
+   strm->state = NULL;
+
+   return BZ_OK;
+}
+
+
+#ifndef BZ_NO_STDIO
+/*---------------------------------------------------*/
+/*--- File I/O stuff                              ---*/
+/*---------------------------------------------------*/
+
+#define BZ_SETERR(eee)                    \
+{                                         \
+   if (bzerror != NULL) *bzerror = eee;   \
+   if (bzf != NULL) bzf->lastErr = eee;   \
+}
+
+typedef 
+   struct {
+      FILE*     handle;
+      Char      buf[BZ_MAX_UNUSED];
+      Int32     bufN;
+      Bool      writing;
+      bz_stream strm;
+      Int32     lastErr;
+      Bool      initialisedOk;
+   }
+   bzFile;
+
+
+/*---------------------------------------------*/
+static Bool myfeof ( FILE* f )
+{
+   Int32 c = fgetc ( f );
+   if (c == EOF) return True;
+   ungetc ( c, f );
+   return False;
+}
+
+
+/*---------------------------------------------------*/
+BZFILE* BZ_API(BZ2_bzWriteOpen) 
+                    ( int*  bzerror,      
+                      FILE* f, 
+                      int   blockSize100k, 
+                      int   verbosity,
+                      int   workFactor )
+{
+   Int32   ret;
+   bzFile* bzf = NULL;
+
+   BZ_SETERR(BZ_OK);
+
+   if (f == NULL ||
+       (blockSize100k < 1 || blockSize100k > 9) ||
+       (workFactor < 0 || workFactor > 250) ||
+       (verbosity < 0 || verbosity > 4))
+      { BZ_SETERR(BZ_PARAM_ERROR); return NULL; };
+
+   if (ferror(f))
+      { BZ_SETERR(BZ_IO_ERROR); return NULL; };
+
+   bzf = malloc ( sizeof(bzFile) );
+   if (bzf == NULL)
+      { BZ_SETERR(BZ_MEM_ERROR); return NULL; };
+
+   BZ_SETERR(BZ_OK);
+   bzf->initialisedOk = False;
+   bzf->bufN          = 0;
+   bzf->handle        = f;
+   bzf->writing       = True;
+   bzf->strm.bzalloc  = NULL;
+   bzf->strm.bzfree   = NULL;
+   bzf->strm.opaque   = NULL;
+
+   if (workFactor == 0) workFactor = 30;
+   ret = BZ2_bzCompressInit ( &(bzf->strm), blockSize100k, 
+                              verbosity, workFactor );
+   if (ret != BZ_OK)
+      { BZ_SETERR(ret); free(bzf); return NULL; };
+
+   bzf->strm.avail_in = 0;
+   bzf->initialisedOk = True;
+   return bzf;   
+}
+
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzWrite)
+             ( int*    bzerror, 
+               BZFILE* b, 
+               void*   buf, 
+               int     len )
+{
+   Int32 n, n2, ret;
+   bzFile* bzf = (bzFile*)b;
+
+   BZ_SETERR(BZ_OK);
+   if (bzf == NULL || buf == NULL || len < 0)
+      { BZ_SETERR(BZ_PARAM_ERROR); return; };
+   if (!(bzf->writing))
+      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+   if (ferror(bzf->handle))
+      { BZ_SETERR(BZ_IO_ERROR); return; };
+
+   if (len == 0)
+      { BZ_SETERR(BZ_OK); return; };
+
+   bzf->strm.avail_in = len;
+   bzf->strm.next_in  = buf;
+
+   while (True) {
+      bzf->strm.avail_out = BZ_MAX_UNUSED;
+      bzf->strm.next_out = bzf->buf;
+      ret = BZ2_bzCompress ( &(bzf->strm), BZ_RUN );
+      if (ret != BZ_RUN_OK)
+         { BZ_SETERR(ret); return; };
+
+      if (bzf->strm.avail_out < BZ_MAX_UNUSED) {
+         n = BZ_MAX_UNUSED - bzf->strm.avail_out;
+         n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), 
+                       n, bzf->handle );
+         if (n != n2 || ferror(bzf->handle))
+            { BZ_SETERR(BZ_IO_ERROR); return; };
+      }
+
+      if (bzf->strm.avail_in == 0)
+         { BZ_SETERR(BZ_OK); return; };
+   }
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzWriteClose)
+                  ( int*          bzerror, 
+                    BZFILE*       b, 
+                    int           abandon,
+                    unsigned int* nbytes_in,
+                    unsigned int* nbytes_out )
+{
+   BZ2_bzWriteClose64 ( bzerror, b, abandon, 
+                        nbytes_in, NULL, nbytes_out, NULL );
+}
+
+
+void BZ_API(BZ2_bzWriteClose64)
+                  ( int*          bzerror, 
+                    BZFILE*       b, 
+                    int           abandon,
+                    unsigned int* nbytes_in_lo32,
+                    unsigned int* nbytes_in_hi32,
+                    unsigned int* nbytes_out_lo32,
+                    unsigned int* nbytes_out_hi32 )
+{
+   Int32   n, n2, ret;
+   bzFile* bzf = (bzFile*)b;
+
+   if (bzf == NULL)
+      { BZ_SETERR(BZ_OK); return; };
+   if (!(bzf->writing))
+      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+   if (ferror(bzf->handle))
+      { BZ_SETERR(BZ_IO_ERROR); return; };
+
+   if (nbytes_in_lo32 != NULL) *nbytes_in_lo32 = 0;
+   if (nbytes_in_hi32 != NULL) *nbytes_in_hi32 = 0;
+   if (nbytes_out_lo32 != NULL) *nbytes_out_lo32 = 0;
+   if (nbytes_out_hi32 != NULL) *nbytes_out_hi32 = 0;
+
+   if ((!abandon) && bzf->lastErr == BZ_OK) {
+      while (True) {
+         bzf->strm.avail_out = BZ_MAX_UNUSED;
+         bzf->strm.next_out = bzf->buf;
+         ret = BZ2_bzCompress ( &(bzf->strm), BZ_FINISH );
+         if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
+            { BZ_SETERR(ret); return; };
+
+         if (bzf->strm.avail_out < BZ_MAX_UNUSED) {
+            n = BZ_MAX_UNUSED - bzf->strm.avail_out;
+            n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), 
+                          n, bzf->handle );
+            if (n != n2 || ferror(bzf->handle))
+               { BZ_SETERR(BZ_IO_ERROR); return; };
+         }
+
+         if (ret == BZ_STREAM_END) break;
+      }
+   }
+
+   if ( !abandon && !ferror ( bzf->handle ) ) {
+      fflush ( bzf->handle );
+      if (ferror(bzf->handle))
+         { BZ_SETERR(BZ_IO_ERROR); return; };
+   }
+
+   if (nbytes_in_lo32 != NULL)
+      *nbytes_in_lo32 = bzf->strm.total_in_lo32;
+   if (nbytes_in_hi32 != NULL)
+      *nbytes_in_hi32 = bzf->strm.total_in_hi32;
+   if (nbytes_out_lo32 != NULL)
+      *nbytes_out_lo32 = bzf->strm.total_out_lo32;
+   if (nbytes_out_hi32 != NULL)
+      *nbytes_out_hi32 = bzf->strm.total_out_hi32;
+
+   BZ_SETERR(BZ_OK);
+   BZ2_bzCompressEnd ( &(bzf->strm) );
+   free ( bzf );
+}
+
+
+/*---------------------------------------------------*/
+BZFILE* BZ_API(BZ2_bzReadOpen) 
+                   ( int*  bzerror, 
+                     FILE* f, 
+                     int   verbosity,
+                     int   small,
+                     void* unused,
+                     int   nUnused )
+{
+   bzFile* bzf = NULL;
+   int     ret;
+
+   BZ_SETERR(BZ_OK);
+
+   if (f == NULL || 
+       (small != 0 && small != 1) ||
+       (verbosity < 0 || verbosity > 4) ||
+       (unused == NULL && nUnused != 0) ||
+       (unused != NULL && (nUnused < 0 || nUnused > BZ_MAX_UNUSED)))
+      { BZ_SETERR(BZ_PARAM_ERROR); return NULL; };
+
+   if (ferror(f))
+      { BZ_SETERR(BZ_IO_ERROR); return NULL; };
+
+   bzf = malloc ( sizeof(bzFile) );
+   if (bzf == NULL) 
+      { BZ_SETERR(BZ_MEM_ERROR); return NULL; };
+
+   BZ_SETERR(BZ_OK);
+
+   bzf->initialisedOk = False;
+   bzf->handle        = f;
+   bzf->bufN          = 0;
+   bzf->writing       = False;
+   bzf->strm.bzalloc  = NULL;
+   bzf->strm.bzfree   = NULL;
+   bzf->strm.opaque   = NULL;
+   
+   while (nUnused > 0) {
+      bzf->buf[bzf->bufN] = *((UChar*)(unused)); bzf->bufN++;
+      unused = ((void*)( 1 + ((UChar*)(unused))  ));
+      nUnused--;
+   }
+
+   ret = BZ2_bzDecompressInit ( &(bzf->strm), verbosity, small );
+   if (ret != BZ_OK)
+      { BZ_SETERR(ret); free(bzf); return NULL; };
+
+   bzf->strm.avail_in = bzf->bufN;
+   bzf->strm.next_in  = bzf->buf;
+
+   bzf->initialisedOk = True;
+   return bzf;   
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b )
+{
+   bzFile* bzf = (bzFile*)b;
+
+   BZ_SETERR(BZ_OK);
+   if (bzf == NULL)
+      { BZ_SETERR(BZ_OK); return; };
+
+   if (bzf->writing)
+      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+
+   if (bzf->initialisedOk)
+      (void)BZ2_bzDecompressEnd ( &(bzf->strm) );
+   free ( bzf );
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzRead) 
+           ( int*    bzerror, 
+             BZFILE* b, 
+             void*   buf, 
+             int     len )
+{
+   Int32   n, ret;
+   bzFile* bzf = (bzFile*)b;
+
+   BZ_SETERR(BZ_OK);
+
+   if (bzf == NULL || buf == NULL || len < 0)
+      { BZ_SETERR(BZ_PARAM_ERROR); return 0; };
+
+   if (bzf->writing)
+      { BZ_SETERR(BZ_SEQUENCE_ERROR); return 0; };
+
+   if (len == 0)
+      { BZ_SETERR(BZ_OK); return 0; };
+
+   bzf->strm.avail_out = len;
+   bzf->strm.next_out = buf;
+
+   while (True) {
+
+      if (ferror(bzf->handle)) 
+         { BZ_SETERR(BZ_IO_ERROR); return 0; };
+
+      if (bzf->strm.avail_in == 0 && !myfeof(bzf->handle)) {
+         n = fread ( bzf->buf, sizeof(UChar), 
+                     BZ_MAX_UNUSED, bzf->handle );
+         if (ferror(bzf->handle))
+            { BZ_SETERR(BZ_IO_ERROR); return 0; };
+         bzf->bufN = n;
+         bzf->strm.avail_in = bzf->bufN;
+         bzf->strm.next_in = bzf->buf;
+      }
+
+      ret = BZ2_bzDecompress ( &(bzf->strm) );
+
+      if (ret != BZ_OK && ret != BZ_STREAM_END)
+         { BZ_SETERR(ret); return 0; };
+
+      if (ret == BZ_OK && myfeof(bzf->handle) && 
+          bzf->strm.avail_in == 0 && bzf->strm.avail_out > 0)
+         { BZ_SETERR(BZ_UNEXPECTED_EOF); return 0; };
+
+      if (ret == BZ_STREAM_END)
+         { BZ_SETERR(BZ_STREAM_END);
+           return len - bzf->strm.avail_out; };
+      if (bzf->strm.avail_out == 0)
+         { BZ_SETERR(BZ_OK); return len; };
+      
+   }
+
+   return 0; /*not reached*/
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzReadGetUnused) 
+                     ( int*    bzerror, 
+                       BZFILE* b, 
+                       void**  unused, 
+                       int*    nUnused )
+{
+   bzFile* bzf = (bzFile*)b;
+   if (bzf == NULL)
+      { BZ_SETERR(BZ_PARAM_ERROR); return; };
+   if (bzf->lastErr != BZ_STREAM_END)
+      { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+   if (unused == NULL || nUnused == NULL)
+      { BZ_SETERR(BZ_PARAM_ERROR); return; };
+
+   BZ_SETERR(BZ_OK);
+   *nUnused = bzf->strm.avail_in;
+   *unused = bzf->strm.next_in;
+}
+#endif
+
+
+/*---------------------------------------------------*/
+/*--- Misc convenience stuff                      ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzBuffToBuffCompress) 
+                         ( char*         dest, 
+                           unsigned int* destLen,
+                           char*         source, 
+                           unsigned int  sourceLen,
+                           int           blockSize100k, 
+                           int           verbosity, 
+                           int           workFactor )
+{
+   bz_stream strm;
+   int ret;
+
+   if (dest == NULL || destLen == NULL || 
+       source == NULL ||
+       blockSize100k < 1 || blockSize100k > 9 ||
+       verbosity < 0 || verbosity > 4 ||
+       workFactor < 0 || workFactor > 250) 
+      return BZ_PARAM_ERROR;
+
+   if (workFactor == 0) workFactor = 30;
+   strm.bzalloc = NULL;
+   strm.bzfree = NULL;
+   strm.opaque = NULL;
+   ret = BZ2_bzCompressInit ( &strm, blockSize100k, 
+                              verbosity, workFactor );
+   if (ret != BZ_OK) return ret;
+
+   strm.next_in = source;
+   strm.next_out = dest;
+   strm.avail_in = sourceLen;
+   strm.avail_out = *destLen;
+
+   ret = BZ2_bzCompress ( &strm, BZ_FINISH );
+   if (ret == BZ_FINISH_OK) goto output_overflow;
+   if (ret != BZ_STREAM_END) goto errhandler;
+
+   /* normal termination */
+   *destLen -= strm.avail_out;   
+   BZ2_bzCompressEnd ( &strm );
+   return BZ_OK;
+
+   output_overflow:
+   BZ2_bzCompressEnd ( &strm );
+   return BZ_OUTBUFF_FULL;
+
+   errhandler:
+   BZ2_bzCompressEnd ( &strm );
+   return ret;
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzBuffToBuffDecompress) 
+                           ( char*         dest, 
+                             unsigned int* destLen,
+                             char*         source, 
+                             unsigned int  sourceLen,
+                             int           small,
+                             int           verbosity )
+{
+   bz_stream strm;
+   int ret;
+
+   if (dest == NULL || destLen == NULL || 
+       source == NULL ||
+       (small != 0 && small != 1) ||
+       verbosity < 0 || verbosity > 4) 
+          return BZ_PARAM_ERROR;
+
+   strm.bzalloc = NULL;
+   strm.bzfree = NULL;
+   strm.opaque = NULL;
+   ret = BZ2_bzDecompressInit ( &strm, verbosity, small );
+   if (ret != BZ_OK) return ret;
+
+   strm.next_in = source;
+   strm.next_out = dest;
+   strm.avail_in = sourceLen;
+   strm.avail_out = *destLen;
+
+   ret = BZ2_bzDecompress ( &strm );
+   if (ret == BZ_OK) goto output_overflow_or_eof;
+   if (ret != BZ_STREAM_END) goto errhandler;
+
+   /* normal termination */
+   *destLen -= strm.avail_out;
+   BZ2_bzDecompressEnd ( &strm );
+   return BZ_OK;
+
+   output_overflow_or_eof:
+   if (strm.avail_out > 0) {
+      BZ2_bzDecompressEnd ( &strm );
+      return BZ_UNEXPECTED_EOF;
+   } else {
+      BZ2_bzDecompressEnd ( &strm );
+      return BZ_OUTBUFF_FULL;
+   };      
+
+   errhandler:
+   BZ2_bzDecompressEnd ( &strm );
+   return ret; 
+}
+
+
+/*---------------------------------------------------*/
+/*--
+   Code contributed by Yoshioka Tsuneo
+   (QWF00133@niftyserve.or.jp/tsuneo-y@is.aist-nara.ac.jp),
+   to support better zlib compatibility.
+   This code is not _officially_ part of libbzip2 (yet);
+   I haven't tested it, documented it, or considered the
+   threading-safeness of it.
+   If this code breaks, please contact both Yoshioka and me.
+--*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+/*--
+   return version like "0.9.0c".
+--*/
+const char * BZ_API(BZ2_bzlibVersion)(void)
+{
+   return BZ_VERSION;
+}
+
+
+#ifndef BZ_NO_STDIO
+/*---------------------------------------------------*/
+
+#if defined(_WIN32) || defined(OS2) || defined(MSDOS)
+#   include <fcntl.h>
+#   include <io.h>
+#   define SET_BINARY_MODE(file) setmode(fileno(file),O_BINARY)
+#else
+#   define SET_BINARY_MODE(file)
+#endif
+static
+BZFILE * bzopen_or_bzdopen
+               ( const char *path,   /* no use when bzdopen */
+                 int fd,             /* no use when bzdopen */
+                 const char *mode,
+                 int open_mode)      /* bzopen: 0, bzdopen:1 */
+{
+   int    bzerr;
+   char   unused[BZ_MAX_UNUSED];
+   int    blockSize100k = 9;
+   int    writing       = 0;
+   char   mode2[10]     = "";
+   FILE   *fp           = NULL;
+   BZFILE *bzfp         = NULL;
+   int    verbosity     = 0;
+   int    workFactor    = 30;
+   int    smallMode     = 0;
+   int    nUnused       = 0; 
+
+   if (mode == NULL) return NULL;
+   while (*mode) {
+      switch (*mode) {
+      case 'r':
+         writing = 0; break;
+      case 'w':
+         writing = 1; break;
+      case 's':
+         smallMode = 1; break;
+      default:
+         if (isdigit((int)(*mode))) {
+            blockSize100k = *mode-BZ_HDR_0;
+         }
+      }
+      mode++;
+   }
+   strcat(mode2, writing ? "w" : "r" );
+   strcat(mode2,"b");   /* binary mode */
+
+   if (open_mode==0) {
+      if (path==NULL || strcmp(path,"")==0) {
+        fp = (writing ? stdout : stdin);
+        SET_BINARY_MODE(fp);
+      } else {
+        fp = fopen(path,mode2);
+      }
+   } else {
+#ifdef BZ_STRICT_ANSI
+      fp = NULL;
+#else
+      fp = fdopen(fd,mode2);
+#endif
+   }
+   if (fp == NULL) return NULL;
+
+   if (writing) {
+      /* Guard against total chaos and anarchy -- JRS */
+      if (blockSize100k < 1) blockSize100k = 1;
+      if (blockSize100k > 9) blockSize100k = 9; 
+      bzfp = BZ2_bzWriteOpen(&bzerr,fp,blockSize100k,
+                             verbosity,workFactor);
+   } else {
+      bzfp = BZ2_bzReadOpen(&bzerr,fp,verbosity,smallMode,
+                            unused,nUnused);
+   }
+   if (bzfp == NULL) {
+      if (fp != stdin && fp != stdout) fclose(fp);
+      return NULL;
+   }
+   return bzfp;
+}
+
+
+/*---------------------------------------------------*/
+/*--
+   open file for read or write.
+      ex) bzopen("file","w9")
+      case path="" or NULL => use stdin or stdout.
+--*/
+BZFILE * BZ_API(BZ2_bzopen)
+               ( const char *path,
+                 const char *mode )
+{
+   return bzopen_or_bzdopen(path,-1,mode,/*bzopen*/0);
+}
+
+
+/*---------------------------------------------------*/
+BZFILE * BZ_API(BZ2_bzdopen)
+               ( int fd,
+                 const char *mode )
+{
+   return bzopen_or_bzdopen(NULL,fd,mode,/*bzdopen*/1);
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzread) (BZFILE* b, void* buf, int len )
+{
+   int bzerr, nread;
+   if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0;
+   nread = BZ2_bzRead(&bzerr,b,buf,len);
+   if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) {
+      return nread;
+   } else {
+      return -1;
+   }
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzwrite) (BZFILE* b, void* buf, int len )
+{
+   int bzerr;
+
+   BZ2_bzWrite(&bzerr,b,buf,len);
+   if(bzerr == BZ_OK){
+      return len;
+   }else{
+      return -1;
+   }
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzflush) (BZFILE *b)
+{
+   /* do nothing now... */
+   return 0;
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzclose) (BZFILE* b)
+{
+   int bzerr;
+   FILE *fp = ((bzFile *)b)->handle;
+   
+   if (b==NULL) {return;}
+   if(((bzFile*)b)->writing){
+      BZ2_bzWriteClose(&bzerr,b,0,NULL,NULL);
+      if(bzerr != BZ_OK){
+         BZ2_bzWriteClose(NULL,b,1,NULL,NULL);
+      }
+   }else{
+      BZ2_bzReadClose(&bzerr,b);
+   }
+   if(fp!=stdin && fp!=stdout){
+      fclose(fp);
+   }
+}
+
+
+/*---------------------------------------------------*/
+/*--
+   return last error code 
+--*/
+static char *bzerrorstrings[] = {
+       "OK"
+      ,"SEQUENCE_ERROR"
+      ,"PARAM_ERROR"
+      ,"MEM_ERROR"
+      ,"DATA_ERROR"
+      ,"DATA_ERROR_MAGIC"
+      ,"IO_ERROR"
+      ,"UNEXPECTED_EOF"
+      ,"OUTBUFF_FULL"
+      ,"CONFIG_ERROR"
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+};
+
+
+const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum)
+{
+   int err = ((bzFile *)b)->lastErr;
+
+   if(err>0) err = 0;
+   *errnum = err;
+   return bzerrorstrings[err*-1];
+}
+#endif
+
+
+/*-------------------------------------------------------------*/
+/*--- end                                           bzlib.c ---*/
+/*-------------------------------------------------------------*/
+
+
+/////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////
+
+
+/* A test program written to test robustness to decompression of
+   corrupted data.  Usage is 
+       unzcrash filename
+   and the program will read the specified file, compress it (in memory),
+   and then repeatedly decompress it, each time with a different bit of
+   the compressed data inverted, so as to test all possible one-bit errors.
+   This should not cause any invalid memory accesses.  If it does, 
+   I want to know about it!
+
+   p.s.  As you can see from the above description, the process is
+   incredibly slow.  A file of size eg 5KB will cause it to run for
+   many hours.
+*/
+
+//#include <stdio.h>
+//#include <assert.h>
+//#include "bzlib.h"
+
+#define M_BLOCK 1000000
+
+
+#define M_BLOCK_OUT (M_BLOCK + 1000000)
+ char inbuf[M_BLOCK];
+ char outbuf[M_BLOCK_OUT];
+ char zbuf[M_BLOCK + 600 + (M_BLOCK / 100)];
+
+int nIn;
+unsigned int nOut;
+unsigned int nZ;
+
+#if 0
+static char *bzerrorstrings[] = {
+       "OK"
+      ,"SEQUENCE_ERROR"
+      ,"PARAM_ERROR"
+      ,"MEM_ERROR"
+      ,"DATA_ERROR"
+      ,"DATA_ERROR_MAGIC"
+      ,"IO_ERROR"
+      ,"UNEXPECTED_EOF"
+      ,"OUTBUFF_FULL"
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+      ,"???"   /* for future */
+};
+#endif
+
+void flip_bit ( int bit )
+{
+   int byteno = bit / 8;
+   int bitno  = bit % 8;
+   UChar mask = 1 << bitno;
+   //fprintf ( stderr, "(byte %d  bit %d  mask %d)",
+   //          byteno, bitno, (int)mask );
+   zbuf[byteno] ^= mask;
+}
+
+void set_inbuf ( void )
+{
+  inbuf[0] = 0;
+  my_strcat(inbuf, "At her sixtieth birthday party, Margaret Thatcher ");
+  my_strcat(inbuf, "blew on the cake to light the candles.\n");
+  my_strcat(inbuf, "This program, bzip2, the associated library libbzip2, and all\n");
+  my_strcat(inbuf, "documentation, are copyright (C) 1996-2004 Julian R Seward.  All\n");
+  my_strcat(inbuf, "rights reserved.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "Redistribution and use in source and binary forms, with or without\n");
+  my_strcat(inbuf, "modification, are permitted provided that the following conditions\n");
+  my_strcat(inbuf, "are met:\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "1. Redistributions of source code must retain the above copyright\n");
+  my_strcat(inbuf, "   notice, this list of conditions and the following disclaimer.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "2. The origin of this software must not be misrepresented; you must\n");
+  my_strcat(inbuf, "   not claim that you wrote the original software.  If you use this\n");
+  my_strcat(inbuf, "   software in a product, an acknowledgment in the product\n");
+  my_strcat(inbuf, "   documentation would be appreciated but is not required.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "3. Altered source versions must be plainly marked as such, and must\n");
+  my_strcat(inbuf, "   not be misrepresented as being the original software.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "4. The name of the author may not be used to endorse or promote\n");
+  my_strcat(inbuf, "   products derived from this software without specific prior written\n");
+  my_strcat(inbuf, "   permission.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS\n");
+  my_strcat(inbuf, "OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n");
+  my_strcat(inbuf, "WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n");
+  my_strcat(inbuf, "ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY\n");
+  my_strcat(inbuf, "DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n");
+  my_strcat(inbuf, "DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE\n");
+  my_strcat(inbuf, "GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n");
+  my_strcat(inbuf, "INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\n");
+  my_strcat(inbuf, "WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n");
+  my_strcat(inbuf, "NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n");
+  my_strcat(inbuf, "SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "ababababababababababababababababababababababababababababababab");
+  my_strcat(inbuf, "		    GNU GENERAL PUBLIC LICENSE\n");
+  my_strcat(inbuf, "		       Version 2, June 1991\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, " Copyright (C) 1989, 1991 Free Software Foundation, Inc.\n");
+  my_strcat(inbuf, "     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA\n");
+  my_strcat(inbuf, " Everyone is permitted to copy and distribute verbatim copies\n");
+  my_strcat(inbuf, " of this license document, but changing it is not allowed.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "			    Preamble\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  The licenses for most software are designed to take away your\n");
+  my_strcat(inbuf, "freedom to share and change it.  By contrast, the GNU General Public\n");
+  my_strcat(inbuf, "License is intended to guarantee your freedom to share and change free\n");
+  my_strcat(inbuf, "software--to make sure the software is free for all its users.  This\n");
+  my_strcat(inbuf, "General Public License applies to most of the Free Software\n");
+  my_strcat(inbuf, "Foundation's software and to any other program whose authors commit to\n");
+  my_strcat(inbuf, "using it.  (Some other Free Software Foundation software is covered by\n");
+  my_strcat(inbuf, "the GNU Library General Public License instead.)  You can apply it to\n");
+  my_strcat(inbuf, "your programs, too.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  When we speak of free software, we are referring to freedom, not\n");
+  my_strcat(inbuf, "price.  Our General Public Licenses are designed to make sure that you\n");
+  my_strcat(inbuf, "have the freedom to distribute copies of free software (and charge for\n");
+  my_strcat(inbuf, "this service if you wish), that you receive source code or can get it\n");
+  my_strcat(inbuf, "if you want it, that you can change the software or use pieces of it\n");
+  my_strcat(inbuf, "in new free programs; and that you know you can do these things.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  To protect your rights, we need to make restrictions that forbid\n");
+  my_strcat(inbuf, "anyone to deny you these rights or to ask you to surrender the rights.\n");
+  my_strcat(inbuf, "These restrictions translate to certain responsibilities for you if you\n");
+  my_strcat(inbuf, "distribute copies of the software, or if you modify it.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  For example, if you distribute copies of such a program, whether\n");
+  my_strcat(inbuf, "gratis or for a fee, you must give the recipients all the rights that\n");
+  my_strcat(inbuf, "you have.  You must make sure that they, too, receive or can get the\n");
+  my_strcat(inbuf, "source code.  And you must show them these terms so they know their\n");
+  my_strcat(inbuf, "rights.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  We protect your rights with two steps: (1) copyright the software, and\n");
+  my_strcat(inbuf, "(2) offer you this license which gives you legal permission to copy,\n");
+  my_strcat(inbuf, "distribute and/or modify the software.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  Also, for each author's protection and ours, we want to make certain\n");
+  my_strcat(inbuf, "that everyone understands that there is no warranty for this free\n");
+  my_strcat(inbuf, "software.  If the software is modified by someone else and passed on, we\n");
+  my_strcat(inbuf, "want its recipients to know that what they have is not the original, so\n");
+  my_strcat(inbuf, "that any problems introduced by others will not reflect on the original\n");
+  my_strcat(inbuf, "authors' reputations.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  Finally, any free program is threatened constantly by software\n");
+  my_strcat(inbuf, "patents.  We wish to avoid the danger that redistributors of a free\n");
+  my_strcat(inbuf, "program will individually obtain patent licenses, in effect making the\n");
+  my_strcat(inbuf, "program proprietary.  To prevent this, we have made it clear that any\n");
+  my_strcat(inbuf, "patent must be licensed for everyone's free use or not licensed at all.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  The precise terms and conditions for copying, distribution and\n");
+  my_strcat(inbuf, "modification follow.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "		    GNU GENERAL PUBLIC LICENSE\n");
+  my_strcat(inbuf, "   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  0. This License applies to any program or other work which contains\n");
+  my_strcat(inbuf, "a notice placed by the copyright holder saying it may be distributed\n");
+  my_strcat(inbuf, "under the terms of this General Public License.  The Program, below,\n");
+  my_strcat(inbuf, "refers to any such program or work, and a work based on the Program\n");
+  my_strcat(inbuf, "means either the Program or any derivative work under copyright law:\n");
+  my_strcat(inbuf, "that is to say, a work containing the Program or a portion of it,\n");
+  my_strcat(inbuf, "either verbatim or with modifications and/or translated into another\n");
+  my_strcat(inbuf, "language.  (Hereinafter, translation is included without limitation in\n");
+  my_strcat(inbuf, "the term modification.)  Each licensee is addressed as you.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "Activities other than copying, distribution and modification are not\n");
+  my_strcat(inbuf, "covered by this License; they are outside its scope.  The act of\n");
+  my_strcat(inbuf, "running the Program is not restricted, and the output from the Program\n");
+  my_strcat(inbuf, "is covered only if its contents constitute a work based on the\n");
+  my_strcat(inbuf, "Program (independent of having been made by running the Program).\n");
+  my_strcat(inbuf, "Whether that is true depends on what the Program does.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  1. You may copy and distribute verbatim copies of the Program's\n");
+  my_strcat(inbuf, "source code as you receive it, in any medium, provided that you\n");
+  my_strcat(inbuf, "conspicuously and appropriately publish on each copy an appropriate\n");
+  my_strcat(inbuf, "copyright notice and disclaimer of warranty; keep intact all the\n");
+  my_strcat(inbuf, "notices that refer to this License and to the absence of any warranty;\n");
+  my_strcat(inbuf, "and give any other recipients of the Program a copy of this License\n");
+  my_strcat(inbuf, "along with the Program.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "You may charge a fee for the physical act of transferring a copy, and\n");
+  my_strcat(inbuf, "you may at your option offer warranty protection in exchange for a fee.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  2. You may modify your copy or copies of the Program or any portion\n");
+  my_strcat(inbuf, "of it, thus forming a work based on the Program, and copy and\n");
+  my_strcat(inbuf, "distribute such modifications or work under the terms of Section 1\n");
+  my_strcat(inbuf, "above, provided that you also meet all of these conditions:\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "    a) You must cause the modified files to carry prominent notices\n");
+  my_strcat(inbuf, "    stating that you changed the files and the date of any change.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "    b) You must cause any work that you distribute or publish, that in\n");
+  my_strcat(inbuf, "    whole or in part contains or is derived from the Program or any\n");
+  my_strcat(inbuf, "    part thereof, to be licensed as a whole at no charge to all third\n");
+  my_strcat(inbuf, "    parties under the terms of this License.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "    c) If the modified program normally reads commands interactively\n");
+  my_strcat(inbuf, "    when run, you must cause it, when started running for such\n");
+  my_strcat(inbuf, "    interactive use in the most ordinary way, to print or display an\n");
+  my_strcat(inbuf, "    announcement including an appropriate copyright notice and a\n");
+  my_strcat(inbuf, "    notice that there is no warranty (or else, saying that you provide\n");
+  my_strcat(inbuf, "    a warranty) and that users may redistribute the program under\n");
+  my_strcat(inbuf, "    these conditions, and telling the user how to view a copy of this\n");
+  my_strcat(inbuf, "    License.  (Exception: if the Program itself is interactive but\n");
+  my_strcat(inbuf, "    does not normally print such an announcement, your work based on\n");
+  my_strcat(inbuf, "    the Program is not required to print an announcement.)\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "These requirements apply to the modified work as a whole.  If\n");
+  my_strcat(inbuf, "identifiable sections of that work are not derived from the Program,\n");
+  my_strcat(inbuf, "and can be reasonably considered independent and separate works in\n");
+  my_strcat(inbuf, "themselves, then this License, and its terms, do not apply to those\n");
+  my_strcat(inbuf, "sections when you distribute them as separate works.  But when you\n");
+  my_strcat(inbuf, "distribute the same sections as part of a whole which is a work based\n");
+  my_strcat(inbuf, "on the Program, the distribution of the whole must be on the terms of\n");
+  my_strcat(inbuf, "this License, whose permissions for other licensees extend to the\n");
+  my_strcat(inbuf, "entire whole, and thus to each and every part regardless of who wrote it.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "Thus, it is not the intent of this section to claim rights or contest\n");
+  my_strcat(inbuf, "your rights to work written entirely by you; rather, the intent is to\n");
+  my_strcat(inbuf, "exercise the right to control the distribution of derivative or\n");
+  my_strcat(inbuf, "collective works based on the Program.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "In addition, mere aggregation of another work not based on the Program\n");
+  my_strcat(inbuf, "with the Program (or with a work based on the Program) on a volume of\n");
+  my_strcat(inbuf, "a storage or distribution medium does not bring the other work under\n");
+  my_strcat(inbuf, "the scope of this License.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  3. You may copy and distribute the Program (or a work based on it,\n");
+  my_strcat(inbuf, "under Section 2) in object code or executable form under the terms of\n");
+  my_strcat(inbuf, "Sections 1 and 2 above provided that you also do one of the following:\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "    a) Accompany it with the complete corresponding machine-readable\n");
+  my_strcat(inbuf, "    source code, which must be distributed under the terms of Sections\n");
+  my_strcat(inbuf, "    1 and 2 above on a medium customarily used for software interchange; or,\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "    b) Accompany it with a written offer, valid for at least three\n");
+  my_strcat(inbuf, "    years, to give any third party, for a charge no more than your\n");
+  my_strcat(inbuf, "    cost of physically performing source distribution, a complete\n");
+  my_strcat(inbuf, "    machine-readable copy of the corresponding source code, to be\n");
+  my_strcat(inbuf, "    distributed under the terms of Sections 1 and 2 above on a medium\n");
+  my_strcat(inbuf, "    customarily used for software interchange; or,\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "    c) Accompany it with the information you received as to the offer\n");
+  my_strcat(inbuf, "    to distribute corresponding source code.  (This alternative is\n");
+  my_strcat(inbuf, "    allowed only for noncommercial distribution and only if you\n");
+  my_strcat(inbuf, "    received the program in object code or executable form with such\n");
+  my_strcat(inbuf, "    an offer, in accord with Subsection b above.)\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "The source code for a work means the preferred form of the work for\n");
+  my_strcat(inbuf, "making modifications to it.  For an executable work, complete source\n");
+  my_strcat(inbuf, "code means all the source code for all modules it contains, plus any\n");
+  my_strcat(inbuf, "associated interface definition files, plus the scripts used to\n");
+  my_strcat(inbuf, "control compilation and installation of the executable.  However, as a\n");
+  my_strcat(inbuf, "special exception, the source code distributed need not include\n");
+  my_strcat(inbuf, "anything that is normally distributed (in either source or binary\n");
+  my_strcat(inbuf, "form) with the major components (compiler, kernel, and so on) of the\n");
+  my_strcat(inbuf, "operating system on which the executable runs, unless that component\n");
+  my_strcat(inbuf, "itself accompanies the executable.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "If distribution of executable or object code is made by offering\n");
+  my_strcat(inbuf, "access to copy from a designated place, then offering equivalent\n");
+  my_strcat(inbuf, "access to copy the source code from the same place counts as\n");
+  my_strcat(inbuf, "distribution of the source code, even though third parties are not\n");
+  my_strcat(inbuf, "compelled to copy the source along with the object code.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  4. You may not copy, modify, sublicense, or distribute the Program\n");
+  my_strcat(inbuf, "except as expressly provided under this License.  Any attempt\n");
+  my_strcat(inbuf, "otherwise to copy, modify, sublicense or distribute the Program is\n");
+  my_strcat(inbuf, "void, and will automatically terminate your rights under this License.\n");
+  my_strcat(inbuf, "However, parties who have received copies, or rights, from you under\n");
+  my_strcat(inbuf, "this License will not have their licenses terminated so long as such\n");
+  my_strcat(inbuf, "parties remain in full compliance.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  5. You are not required to accept this License, since you have not\n");
+  my_strcat(inbuf, "signed it.  However, nothing else grants you permission to modify or\n");
+  my_strcat(inbuf, "distribute the Program or its derivative works.  These actions are\n");
+  my_strcat(inbuf, "prohibited by law if you do not accept this License.  Therefore, by\n");
+  my_strcat(inbuf, "modifying or distributing the Program (or any work based on the\n");
+  my_strcat(inbuf, "Program), you indicate your acceptance of this License to do so, and\n");
+  my_strcat(inbuf, "all its terms and conditions for copying, distributing or modifying\n");
+  my_strcat(inbuf, "the Program or works based on it.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  6. Each time you redistribute the Program (or any work based on the\n");
+  my_strcat(inbuf, "Program), the recipient automatically receives a license from the\n");
+  my_strcat(inbuf, "original licensor to copy, distribute or modify the Program subject to\n");
+  my_strcat(inbuf, "these terms and conditions.  You may not impose any further\n");
+  my_strcat(inbuf, "restrictions on the recipients' exercise of the rights granted herein.\n");
+  my_strcat(inbuf, "You are not responsible for enforcing compliance by third parties to\n");
+  my_strcat(inbuf, "this License.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  7. If, as a consequence of a court judgment or allegation of patent\n");
+  my_strcat(inbuf, "infringement or for any other reason (not limited to patent issues),\n");
+  my_strcat(inbuf, "conditions are imposed on you (whether by court order, agreement or\n");
+  my_strcat(inbuf, "otherwise) that contradict the conditions of this License, they do not\n");
+  my_strcat(inbuf, "excuse you from the conditions of this License.  If you cannot\n");
+  my_strcat(inbuf, "distribute so as to satisfy simultaneously your obligations under this\n");
+  my_strcat(inbuf, "License and any other pertinent obligations, then as a consequence you\n");
+  my_strcat(inbuf, "may not distribute the Program at all.  For example, if a patent\n");
+  my_strcat(inbuf, "license would not permit royalty-free redistribution of the Program by\n");
+  my_strcat(inbuf, "all those who receive copies directly or indirectly through you, then\n");
+  my_strcat(inbuf, "the only way you could satisfy both it and this License would be to\n");
+  my_strcat(inbuf, "refrain entirely from distribution of the Program.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "If any portion of this section is held invalid or unenforceable under\n");
+  my_strcat(inbuf, "any particular circumstance, the balance of the section is intended to\n");
+  my_strcat(inbuf, "apply and the section as a whole is intended to apply in other\n");
+  my_strcat(inbuf, "circumstances.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "It is not the purpose of this section to induce you to infringe any\n");
+  my_strcat(inbuf, "patents or other property right claims or to contest validity of any\n");
+  my_strcat(inbuf, "such claims; this section has the sole purpose of protecting the\n");
+  my_strcat(inbuf, "integrity of the free software distribution system, which is\n");
+  my_strcat(inbuf, "implemented by public license practices.  Many people have made\n");
+  my_strcat(inbuf, "generous contributions to the wide range of software distributed\n");
+  my_strcat(inbuf, "through that system in reliance on consistent application of that\n");
+  my_strcat(inbuf, "system; it is up to the author/donor to decide if he or she is willing\n");
+  my_strcat(inbuf, "to distribute software through any other system and a licensee cannot\n");
+  my_strcat(inbuf, "impose that choice.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "This section is intended to make thoroughly clear what is believed to\n");
+  my_strcat(inbuf, "be a consequence of the rest of this License.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  8. If the distribution and/or use of the Program is restricted in\n");
+  my_strcat(inbuf, "certain countries either by patents or by copyrighted interfaces, the\n");
+  my_strcat(inbuf, "original copyright holder who places the Program under this License\n");
+  my_strcat(inbuf, "may add an explicit geographical distribution limitation excluding\n");
+  my_strcat(inbuf, "those countries, so that distribution is permitted only in or among\n");
+  my_strcat(inbuf, "countries not thus excluded.  In such case, this License incorporates\n");
+  my_strcat(inbuf, "the limitation as if written in the body of this License.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  9. The Free Software Foundation may publish revised and/or new versions\n");
+  my_strcat(inbuf, "of the General Public License from time to time.  Such new versions will\n");
+  my_strcat(inbuf, "be similar in spirit to the present version, but may differ in detail to\n");
+  my_strcat(inbuf, "address new problems or concerns.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "Each version is given a distinguishing version number.  If the Program\n");
+  my_strcat(inbuf, "specifies a version number of this License which applies to it and any\n");
+  my_strcat(inbuf, "later version, you have the option of following the terms and conditions\n");
+  my_strcat(inbuf, "either of that version or of any later version published by the Free\n");
+  my_strcat(inbuf, "Software Foundation.  If the Program does not specify a version number of\n");
+  my_strcat(inbuf, "this License, you may choose any version ever published by the Free Software\n");
+  my_strcat(inbuf, "Foundation.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  10. If you wish to incorporate parts of the Program into other free\n");
+  my_strcat(inbuf, "programs whose distribution conditions are different, write to the author\n");
+  my_strcat(inbuf, "to ask for permission.  For software which is copyrighted by the Free\n");
+  my_strcat(inbuf, "Software Foundation, write to the Free Software Foundation; we sometimes\n");
+  my_strcat(inbuf, "make exceptions for this.  Our decision will be guided by the two goals\n");
+  my_strcat(inbuf, "of preserving the free status of all derivatives of our free software and\n");
+  my_strcat(inbuf, "of promoting the sharing and reuse of software generally.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "			    NO WARRANTY\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY\n");
+  my_strcat(inbuf, "FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN\n");
+  my_strcat(inbuf, "OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES\n");
+  my_strcat(inbuf, "PROVIDE THE PROGRAM AS IS WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED\n");
+  my_strcat(inbuf, "OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\n");
+  my_strcat(inbuf, "MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS\n");
+  my_strcat(inbuf, "TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE\n");
+  my_strcat(inbuf, "PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,\n");
+  my_strcat(inbuf, "REPAIR OR CORRECTION.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n");
+  my_strcat(inbuf, "WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR\n");
+  my_strcat(inbuf, "REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,\n");
+  my_strcat(inbuf, "INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING\n");
+  my_strcat(inbuf, "OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED\n");
+  my_strcat(inbuf, "TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY\n");
+  my_strcat(inbuf, "YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER\n");
+  my_strcat(inbuf, "PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE\n");
+  my_strcat(inbuf, "POSSIBILITY OF SUCH DAMAGES.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "		     END OF TERMS AND CONDITIONS\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "	    How to Apply These Terms to Your New Programs\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  If you develop a new program, and you want it to be of the greatest\n");
+  my_strcat(inbuf, "possible use to the public, the best way to achieve this is to make it\n");
+  my_strcat(inbuf, "free software which everyone can redistribute and change under these terms.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  To do so, attach the following notices to the program.  It is safest\n");
+  my_strcat(inbuf, "to attach them to the start of each source file to most effectively\n");
+  my_strcat(inbuf, "convey the exclusion of warranty; and each file should have at least\n");
+  my_strcat(inbuf, "the copyright line and a pointer to where the full notice is found.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "    <one line to give the program's name and a brief idea of what it does.>\n");
+  my_strcat(inbuf, "    Copyright (C) <year>  <name of author>\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "    This program is free software; you can redistribute it and/or modify\n");
+  my_strcat(inbuf, "    it under the terms of the GNU General Public License as published by\n");
+  my_strcat(inbuf, "    the Free Software Foundation; either version 2 of the License, or\n");
+  my_strcat(inbuf, "    (at your option) any later version.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "    This program is distributed in the hope that it will be useful,\n");
+  my_strcat(inbuf, "    but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
+  my_strcat(inbuf, "    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n");
+  my_strcat(inbuf, "    GNU General Public License for more details.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "    You should have received a copy of the GNU General Public License\n");
+  my_strcat(inbuf, "    along with this program; if not, write to the Free Software\n");
+  my_strcat(inbuf, "    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "Also add information on how to contact you by electronic and paper mail.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "If the program is interactive, make it output a short notice like this\n");
+  my_strcat(inbuf, "when it starts in an interactive mode:\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "    Gnomovision version 69, Copyright (C) year  name of author\n");
+  my_strcat(inbuf, "    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.\n");
+  my_strcat(inbuf, "    This is free software, and you are welcome to redistribute it\n");
+  my_strcat(inbuf, "    under certain conditions; type `show c' for details.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "The hypothetical commands `show w' and `show c' should show the appropriate\n");
+  my_strcat(inbuf, "parts of the General Public License.  Of course, the commands you use may\n");
+  my_strcat(inbuf, "be called something other than `show w' and `show c'; they could even be\n");
+  my_strcat(inbuf, "mouse-clicks or menu items--whatever suits your program.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "You should also get your employer (if you work as a programmer) or your\n");
+  my_strcat(inbuf, "school, if any, to sign a copyright disclaimer for the program, if\n");
+  my_strcat(inbuf, "necessary.  Here is a sample; alter the names:\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  Yoyodyne, Inc., hereby disclaims all copyright interest in the program\n");
+  my_strcat(inbuf, "  `Gnomovision' (which makes passes at compilers) written by James Hacker.\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "  <signature of Ty Coon>, 1 April 1989\n");
+  my_strcat(inbuf, "  Ty Coon, President of Vice\n");
+  my_strcat(inbuf, "\n");
+  my_strcat(inbuf, "This General Public License does not permit incorporating your program into\n");
+  my_strcat(inbuf, "proprietary programs.  If your program is a subroutine library, you may\n");
+  my_strcat(inbuf, "consider it more useful to permit linking proprietary applications with the\n");
+  my_strcat(inbuf, "library.  If this is what you want to do, use the GNU Library General\n");
+  my_strcat(inbuf, "Public License instead of this License.\n");
+
+  my_strcat(inbuf, "\n");
+}
+
+#include <stdio.h>
+#include <malloc.h>
+#include <assert.h>
+
+/* For providing services. */
+static HWord g_serviceFn ( HWord arg1, HWord arg2 )
+{
+   switch (arg1) {
+      case 0: /* EXIT */
+         exit(0);
+      case 1: /* PUTC */
+         putchar(arg2);
+         return 0;
+      case 2: /* MALLOC */
+         return (HWord)malloc(arg2);
+      case 3: /* FREE */
+         free((void*)arg2);
+         return 0;
+      default:
+         assert(0);
+   }
+}
+
+static char *bzerrorstrings[] = {
+       "OK"
+       ,"SEQUENCE_ERROR"
+       ,"PARAM_ERROR"
+       ,"MEM_ERROR"
+       ,"DATA_ERROR"
+       ,"DATA_ERROR_MAGIC"
+       ,"IO_ERROR"
+       ,"UNEXPECTED_EOF"
+       ,"OUTBUFF_FULL"
+       ,"CONFIG_ERROR"
+       ,"???"   /* for future */
+       ,"???"   /* for future */
+       ,"???"   /* for future */
+       ,"???"   /* for future */
+       ,"???"   /* for future */
+       ,"???"   /* for future */
+};
+
+// If given a cmd line arg, behave as a correctness regtest
+// (run fast and be verbose).  If not, run for a long time
+// which is what is needed for the performance suite.
+int main ( int argc, char** argv )
+{
+   int   r;
+   int   bit;
+   int   i;
+
+   int regtest;
+   assert(argc == 1 || argc == 2);
+   regtest = argc==2;
+   regtest = 1;
+   serviceFn = g_serviceFn;
+
+   set_inbuf();
+   nIn = vex_strlen(inbuf)+1;
+   vex_printf( "%d bytes read\n", nIn );
+
+   nZ = M_BLOCK;
+   r = BZ2_bzBuffToBuffCompress (
+          zbuf, &nZ, inbuf, nIn, 9, 3/*verb*/, 30 );
+
+   if (r != BZ_OK) {
+     vex_printf("initial compress failed!\n");
+     (*serviceFn)(0,0);
+   }
+   vex_printf( "%d after compression\n", nZ );
+
+   for (bit = 0; bit < nZ*8; bit += (bit < 35 ? 1 : (regtest?2377:137))) {
+      if (regtest)
+         vex_printf( "bit %d  ", bit );
+      flip_bit ( bit );
+      nOut = M_BLOCK_OUT;
+      r = BZ2_bzBuffToBuffDecompress (
+             outbuf, &nOut, zbuf, nZ, 1/*small*/, 0 );
+      if (regtest)
+         vex_printf( " %d  %s ", r, bzerrorstrings[-r] );
+
+      if (r != BZ_OK) {
+	 if (regtest)
+            vex_printf( "\n" );
+      } else {
+         if (nOut != nIn) {
+           vex_printf(  "nIn/nOut mismatch %d %d\n", nIn, nOut );
+           (*serviceFn)(0,0);
+         } else {
+           for (i = 0; i < nOut; i++)
+             if (inbuf[i] != outbuf[i]) { 
+                vex_printf(  "mismatch at %d\n", i ); 
+                (*serviceFn)(0,0); 
+           }
+           if (i == nOut) vex_printf( "really ok!\n" );
+         }
+      }
+
+      flip_bit ( bit );
+   }
+
+#if 0
+   assert (nOut == nIn);
+   for (i = 0; i < nOut; i++) {
+     if (inbuf[i] != outbuf[i]) {
+        vex_printf( "difference at %d !\n", i );
+        return 1;
+     }
+   }
+#endif
+
+   vex_printf( "all ok\n" );
+   (*serviceFn)(0,0);
+   /*NOTREACHED*/
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/hackedbz2.stderr.exp-glibc28-amd64 b/exp-ptrcheck/tests/hackedbz2.stderr.exp-glibc28-amd64
new file mode 100644
index 0000000..b3802c2
--- /dev/null
+++ b/exp-ptrcheck/tests/hackedbz2.stderr.exp-glibc28-amd64
@@ -0,0 +1,15 @@
+
+Invalid read of size 1
+   at 0x........: vex_strlen (hackedbz2.c:1006)
+   by 0x........: add_to_myprintf_buf (hackedbz2.c:1284)
+   by 0x........: vex_printf (hackedbz2.c:1155)
+   by 0x........: BZ2_compressBlock (hackedbz2.c:4039)
+   by 0x........: handle_compress (hackedbz2.c:4761)
+   by 0x........: BZ2_bzCompress (hackedbz2.c:4831)
+   by 0x........: BZ2_bzBuffToBuffCompress (hackedbz2.c:5638)
+   by 0x........: main (hackedbz2.c:6485)
+ Address 0x........ expected vs actual:
+ Expected: global array "myprintf_buf" in object with soname "NONE"
+ Actual:   unknown
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/hackedbz2.stdout.exp b/exp-ptrcheck/tests/hackedbz2.stdout.exp
new file mode 100644
index 0000000..3bfc54e
--- /dev/null
+++ b/exp-ptrcheck/tests/hackedbz2.stdout.exp
@@ -0,0 +1,70 @@
+22323 bytes read
+    block 1: crc = 0xA212ABF8, combined CRC = 0xA212ABF8, size = 22373
+    too repetitive; using fallback sorting algorithm
+      22373 in block, 13504 after MTF & 1-2 coding, 79+2 syms in use
+      pass 1: size is 17143, grp uses are 38 62 2 92 6 71 
+      pass 2: size is 6506, grp uses are 28 71 0 86 9 77 
+      pass 3: size is 6479, grp uses are 26 70 0 81 11 83 
+      pass 4: size is 6469, grp uses are 26 69 0 74 17 85 
+      bytes: mapping 19, selectors 66, code lengths 134, codes 6465
+    final combined CRC = 0xA212ABF8
+   6710 after compression
+bit 0   -5  DATA_ERROR_MAGIC 
+bit 1   -5  DATA_ERROR_MAGIC 
+bit 2   -5  DATA_ERROR_MAGIC 
+bit 3   -5  DATA_ERROR_MAGIC 
+bit 4   -5  DATA_ERROR_MAGIC 
+bit 5   -5  DATA_ERROR_MAGIC 
+bit 6   -5  DATA_ERROR_MAGIC 
+bit 7   -5  DATA_ERROR_MAGIC 
+bit 8   -5  DATA_ERROR_MAGIC 
+bit 9   -5  DATA_ERROR_MAGIC 
+bit 10   -5  DATA_ERROR_MAGIC 
+bit 11   -5  DATA_ERROR_MAGIC 
+bit 12   -5  DATA_ERROR_MAGIC 
+bit 13   -5  DATA_ERROR_MAGIC 
+bit 14   -5  DATA_ERROR_MAGIC 
+bit 15   -5  DATA_ERROR_MAGIC 
+bit 16   -5  DATA_ERROR_MAGIC 
+bit 17   -5  DATA_ERROR_MAGIC 
+bit 18   -5  DATA_ERROR_MAGIC 
+bit 19   -5  DATA_ERROR_MAGIC 
+bit 20   -5  DATA_ERROR_MAGIC 
+bit 21   -5  DATA_ERROR_MAGIC 
+bit 22   -5  DATA_ERROR_MAGIC 
+bit 23   -5  DATA_ERROR_MAGIC 
+bit 24   0  OK really ok!
+bit 25   -5  DATA_ERROR_MAGIC 
+bit 26   -5  DATA_ERROR_MAGIC 
+bit 27   0  OK really ok!
+bit 28   -5  DATA_ERROR_MAGIC 
+bit 29   -5  DATA_ERROR_MAGIC 
+bit 30   -5  DATA_ERROR_MAGIC 
+bit 31   -5  DATA_ERROR_MAGIC 
+bit 32   -4  DATA_ERROR 
+bit 33   -4  DATA_ERROR 
+bit 34   -4  DATA_ERROR 
+bit 35   -4  DATA_ERROR 
+bit 2412   -4  DATA_ERROR 
+bit 4789   -4  DATA_ERROR 
+bit 7166   -4  DATA_ERROR 
+bit 9543   -4  DATA_ERROR 
+bit 11920   -4  DATA_ERROR 
+bit 14297   -4  DATA_ERROR 
+bit 16674   -4  DATA_ERROR 
+bit 19051   -4  DATA_ERROR 
+bit 21428   -4  DATA_ERROR 
+bit 23805   -4  DATA_ERROR 
+bit 26182   -4  DATA_ERROR 
+bit 28559   -4  DATA_ERROR 
+bit 30936   -4  DATA_ERROR 
+bit 33313   -4  DATA_ERROR 
+bit 35690   -4  DATA_ERROR 
+bit 38067   -4  DATA_ERROR 
+bit 40444   -4  DATA_ERROR 
+bit 42821   -4  DATA_ERROR 
+bit 45198   -4  DATA_ERROR 
+bit 47575   -4  DATA_ERROR 
+bit 49952   -4  DATA_ERROR 
+bit 52329   -4  DATA_ERROR 
+all ok
diff --git a/exp-ptrcheck/tests/hackedbz2.vgtest b/exp-ptrcheck/tests/hackedbz2.vgtest
new file mode 100644
index 0000000..41d1772
--- /dev/null
+++ b/exp-ptrcheck/tests/hackedbz2.vgtest
@@ -0,0 +1 @@
+prog: hackedbz2
diff --git a/exp-ptrcheck/tests/hp_bounds.c b/exp-ptrcheck/tests/hp_bounds.c
new file mode 100644
index 0000000..c285e53
--- /dev/null
+++ b/exp-ptrcheck/tests/hp_bounds.c
@@ -0,0 +1,13 @@
+#include <stdlib.h>
+
+int main(void)
+{
+   int  y;
+   int* x = malloc(sizeof(int) * 100);
+
+   y = x[95];   // ok
+   y = x[100];  // overrun
+   y = x[-1];   // underrun
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/hp_bounds.stderr.exp b/exp-ptrcheck/tests/hp_bounds.stderr.exp
new file mode 100644
index 0000000..97d073e
--- /dev/null
+++ b/exp-ptrcheck/tests/hp_bounds.stderr.exp
@@ -0,0 +1,16 @@
+
+Invalid read of size 4
+   at 0x........: main (hp_bounds.c:9)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 400 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (hp_bounds.c:6)
+
+Invalid read of size 4
+   at 0x........: main (hp_bounds.c:10)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 400 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (hp_bounds.c:6)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/hp_bounds.vgtest b/exp-ptrcheck/tests/hp_bounds.vgtest
new file mode 100644
index 0000000..f92fc54
--- /dev/null
+++ b/exp-ptrcheck/tests/hp_bounds.vgtest
@@ -0,0 +1 @@
+prog: hp_bounds
diff --git a/exp-ptrcheck/tests/hp_dangle.c b/exp-ptrcheck/tests/hp_dangle.c
new file mode 100644
index 0000000..271e0ef
--- /dev/null
+++ b/exp-ptrcheck/tests/hp_dangle.c
@@ -0,0 +1,21 @@
+
+#include <stdlib.h>
+
+int* mk_dangle(void)
+{
+   int* x = malloc(400);
+   free(x);
+
+   return x;
+}
+
+int main(void)
+{
+   int  y;
+   int* x = mk_dangle();
+
+   y = x[5];
+   y = x[-1];
+   
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/hp_dangle.stderr.exp b/exp-ptrcheck/tests/hp_dangle.stderr.exp
new file mode 100644
index 0000000..bb9e46d
--- /dev/null
+++ b/exp-ptrcheck/tests/hp_dangle.stderr.exp
@@ -0,0 +1,18 @@
+
+Invalid read of size 4
+   at 0x........: main (hp_dangle.c:17)
+ Address 0x........ is 20 bytes inside the accessing pointer's
+ once-legitimate range, a block of size 400 free'd
+   at 0x........: free (vg_replace_malloc.c:...)
+   by 0x........: mk_dangle (hp_dangle.c:7)
+   by 0x........: main (hp_dangle.c:15)
+
+Doubly-invalid read of size 4
+   at 0x........: main (hp_dangle.c:18)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ once-legitimate range, a block of size 400 free'd
+   at 0x........: free (vg_replace_malloc.c:...)
+   by 0x........: mk_dangle (hp_dangle.c:7)
+   by 0x........: main (hp_dangle.c:15)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/hp_dangle.vgtest b/exp-ptrcheck/tests/hp_dangle.vgtest
new file mode 100644
index 0000000..82785c9
--- /dev/null
+++ b/exp-ptrcheck/tests/hp_dangle.vgtest
@@ -0,0 +1 @@
+prog: hp_dangle
diff --git a/exp-ptrcheck/tests/idiv.c b/exp-ptrcheck/tests/idiv.c
new file mode 100644
index 0000000..f4ecb9c
--- /dev/null
+++ b/exp-ptrcheck/tests/idiv.c
@@ -0,0 +1,40 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <assert.h>
+
+#include "arith_include1.c"
+
+int main(void)
+{
+   #include "arith_include2.c"
+
+   // ADD =========================================================
+   g(/, n,  n2, n);  // det, det
+
+   g(/, n,  p,  e);  // detected bad idiv;  det, det
+
+   g(/, p,  n2, n);  // ok, det
+
+   g(/, p,  p,  e);  // detected bad idiv;  det, det
+
+   g(/, n,  un, n);  // undet, undet
+   g(/, n,  up, n);  // undetected bad idiv;  ok, undet
+
+   g(/, un, n2, n);  // undet, undet
+   g(/, up, n2, n);  // ok, undet
+
+   g(/, un, un, n);  // undet, undet
+   g(/, un, up, n);  // undetected bad idiv;  undet, undet
+   g(/, up, un, n);  // undet, undet
+   g(/, up, up, n);  // undetected bad idiv;  undet, undet
+
+   g(/, un, p,  n);  // detected bad idiv;  undet, undet
+   g(/, up, p,  n);  // detected bad idiv;  undet, undet
+
+   g(/, p,  un, n);  // undet, undet
+   g(/, p,  up, n);  // undetected bad idiv;  undet, undet
+  
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/idiv.stderr.exp b/exp-ptrcheck/tests/idiv.stderr.exp
new file mode 100644
index 0000000..794ecea
--- /dev/null
+++ b/exp-ptrcheck/tests/idiv.stderr.exp
@@ -0,0 +1,269 @@
+
+about to do 14 [0]
+Invalid read of size 4
+   at 0x........: main (idiv.c:14)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 14 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:14)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+
+Invalid IDIV
+   at 0x........: main (idiv.c:16)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First arg not a pointer
+Second arg derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+about to do 16 [0]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:16)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 16 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:16)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 18 [0]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:18)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 18 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:18)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+
+Invalid IDIV
+   at 0x........: main (idiv.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Both args derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+about to do 20 [0]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 20 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 22 [0]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:22)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 22 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:22)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 23 [0]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:23)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 23 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:23)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 25 [0]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:25)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 25 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:25)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 26 [0]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:26)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 26 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:26)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 28 [0]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:28)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 28 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:28)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 29 [0]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:29)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 29 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:29)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 30 [0]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:30)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 30 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:30)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 31 [0]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:31)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 31 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:31)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+
+Invalid IDIV
+   at 0x........: main (idiv.c:33)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First arg may be a pointer
+Second arg derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+about to do 33 [0]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:33)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 33 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:33)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+
+Invalid IDIV
+   at 0x........: main (idiv.c:34)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First arg may be a pointer
+Second arg derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+about to do 34 [0]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:34)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 34 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:34)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 36 [0]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:36)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 36 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:36)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 37 [0]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:37)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 37 [-1]
+
+Invalid read of size 4
+   at 0x........: main (idiv.c:37)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+
+ERROR SUMMARY: 36 errors from 36 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/idiv.vgtest-disabled b/exp-ptrcheck/tests/idiv.vgtest-disabled
new file mode 100644
index 0000000..e8aa9bb
--- /dev/null
+++ b/exp-ptrcheck/tests/idiv.vgtest-disabled
@@ -0,0 +1,2 @@
+prog: idiv
+stderr_filter: filter_add
diff --git a/exp-ptrcheck/tests/imul.c b/exp-ptrcheck/tests/imul.c
new file mode 100644
index 0000000..877c00a
--- /dev/null
+++ b/exp-ptrcheck/tests/imul.c
@@ -0,0 +1,40 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <assert.h>
+
+#include "arith_include1.c"
+
+int main(void)
+{
+   #include "arith_include2.c"
+
+   // ADD =========================================================
+   g(*, n,  n,  n);  // det, det
+
+   g(*, n,  p,  n);  // det, det
+
+   g(*, p,  n,  n);  // ok, det
+
+   g(*, p,  p,  e);  // detected bad mul;  det, det
+
+   g(*, n,  un, n);  // det, det
+   g(*, n,  up, n);  // ok, det
+
+   g(*, un, n,  n);  // det, det
+   g(*, up, n,  n);  // ok, det
+
+   g(*, un, un, n);  // det, det
+   g(*, un, up, n);  // det, det
+   g(*, up, un, n);  // det, det
+   g(*, up, up, n);  // undetected bad imul; det, det
+
+   g(*, un, p,  n);  // det, det
+   g(*, up, p,  n);  // undetected bad imul; det, det
+
+   g(*, p,  un, n);  // det, det
+   g(*, p,  up, n);  // undetected bad imul; det, det
+  
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/imul.stderr.exp b/exp-ptrcheck/tests/imul.stderr.exp
new file mode 100644
index 0000000..96f6e7d
--- /dev/null
+++ b/exp-ptrcheck/tests/imul.stderr.exp
@@ -0,0 +1,236 @@
+
+about to do 14 [0]
+Invalid read of size 4
+   at 0x........: main (imul.c:14)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 14 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:14)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 16 [0]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:16)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 16 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:16)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 18 [0]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:18)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 18 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:18)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+
+Invalid IMUL
+   at 0x........: main (imul.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Both args derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+about to do 20 [0]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 20 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 22 [0]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:22)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 22 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:22)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 23 [0]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:23)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 23 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:23)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 25 [0]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:25)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 25 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:25)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 26 [0]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:26)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 26 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:26)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 28 [0]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:28)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 28 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:28)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 29 [0]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:29)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 29 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:29)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 30 [0]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:30)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 30 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:30)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 31 [0]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:31)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 31 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:31)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 33 [0]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:33)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 33 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:33)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 34 [0]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:34)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 34 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:34)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 36 [0]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:36)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 36 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:36)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 37 [0]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:37)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 37 [-1]
+
+Invalid read of size 4
+   at 0x........: main (imul.c:37)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+
+ERROR SUMMARY: 33 errors from 33 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/imul.vgtest-disabled b/exp-ptrcheck/tests/imul.vgtest-disabled
new file mode 100644
index 0000000..c220364
--- /dev/null
+++ b/exp-ptrcheck/tests/imul.vgtest-disabled
@@ -0,0 +1,2 @@
+prog: imul
+stderr_filter: filter_add
diff --git a/exp-ptrcheck/tests/justify.c b/exp-ptrcheck/tests/justify.c
new file mode 100644
index 0000000..0cc5295
--- /dev/null
+++ b/exp-ptrcheck/tests/justify.c
@@ -0,0 +1,23 @@
+#include <stdlib.h>
+#include <assert.h>
+
+// This is an example of an error found by Annelid, but not found by
+// Memcheck -- because the wild read goes past the redzones of the pointer's
+// block.
+//
+// Nb: for Memcheck to not spot this, relies on it putting the 2nd block in
+// memory after the 1st block.
+
+int main ( void )
+{
+   char c;
+   char *c0, *c1;
+
+   c0 = malloc(10000);
+   c1 = malloc(10000);
+   assert(c0 && c1);
+
+   c = c0[15000];
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/justify.stderr.exp b/exp-ptrcheck/tests/justify.stderr.exp
new file mode 100644
index 0000000..b1846d1
--- /dev/null
+++ b/exp-ptrcheck/tests/justify.stderr.exp
@@ -0,0 +1,9 @@
+
+Invalid read of size 1
+   at 0x........: main (justify.c:20)
+ Address 0x........ is 5000 bytes after the accessing pointer's
+ legitimate range, a block of size 10000 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (justify.c:16)
+
+ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/justify.vgtest b/exp-ptrcheck/tests/justify.vgtest
new file mode 100644
index 0000000..aef13f6
--- /dev/null
+++ b/exp-ptrcheck/tests/justify.vgtest
@@ -0,0 +1 @@
+prog: justify
diff --git a/exp-ptrcheck/tests/mm.c b/exp-ptrcheck/tests/mm.c
new file mode 100644
index 0000000..00d1407
--- /dev/null
+++ b/exp-ptrcheck/tests/mm.c
@@ -0,0 +1,59 @@
+#include <sys/mman.h>
+#include <unistd.h>
+#include "arith_include1.c"
+
+// For some reason, the stack frame below __GI_write is disappearing.
+// Therefore, if I don't want the write errors to be merged, I have to
+// ensure they have a different stack trace.  I do this by using this
+// function.  Weird.
+void mywrite(char* buf, int len)
+{
+   write(-1, buf, len);
+}
+
+int main(void)
+{
+   struct sigaction sigsegv;
+   
+   char c;
+   
+   // This fails due to a bad fd (at one point I was not handling failing
+   // mmap() calls, and would have got a seg fault).
+   char* res1 = mmap(0, 0, PROT_READ, MAP_PRIVATE, -1, 0 );
+
+   // This succeeds but is meaningless.  Important thing is that the size is
+   // zero, so Annelid should not subtract one from the size when doing any
+   // range calculations.  (It did at one point, giving 0xffffffff, which
+   // screwed everything up.)
+   char* res2 = mmap(0, 0, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 );
+
+   // This succeeds and is useful.
+   char* res3 = mmap(0, getpagesize(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+
+   assert(MAP_FAILED == res1);
+   assert(NULL       == res2);
+   assert(MAP_FAILED != res3 && NULL != res3);
+
+   // Intall SEGV handler 
+   sigsegv.sa_handler = SEGV_handler;
+   sigsegv.sa_flags   = 0;
+   assert( 0 == sigemptyset( &sigsegv.sa_mask ) );
+   assert( 0 == sigaction(SIGSEGV, &sigsegv, NULL) );
+
+   #define TTT(i) \
+      if (__builtin_setjmp(TTT_jmpbuf) == 0) { c = res3[i]; }
+
+   TTT(0);
+   TTT(-1);
+   mywrite(res3,   5);
+   mywrite(res3-1, 5);
+
+   assert( 0 == munmap(res3, getpagesize()) );
+
+   TTT(0);
+   TTT(-1);
+   mywrite(res3,   5);
+   mywrite(res3-1, 5);
+   
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/mm.stderr.exp b/exp-ptrcheck/tests/mm.stderr.exp
new file mode 100644
index 0000000..57657d6
--- /dev/null
+++ b/exp-ptrcheck/tests/mm.stderr.exp
@@ -0,0 +1,64 @@
+
+Invalid read of size 1
+   at 0x........: main (mm.c:47)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is 1 bytes before the accessing pointer's
+ legitimate range, the 4096-byte block mmap'd
+   at 0x........: __mmap (in /...libc...)
+Warning: invalid file descriptor -1 in syscall write()
+Warning: invalid file descriptor -1 in syscall write()
+
+Syscall param write(buf) is non-contiguous
+   at 0x........: __libc_write (...libc...)
+   by 0x........: main (mm.c:49)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First byte is not within a known block
+Last byte (0x........) is 3 bytes within a 4096-byte block mmap'd
+   at 0x........: __mmap (in /...libc...)
+
+Invalid read of size 1
+   at 0x........: main (mm.c:53)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is 0 bytes inside the accessing pointer's
+ once-legitimate range, the 4096-byte block munmap'd
+   at 0x........: __munmap (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+Doubly-invalid read of size 1
+   at 0x........: main (mm.c:54)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is 1 bytes before the accessing pointer's
+ once-legitimate range, the 4096-byte block munmap'd
+   at 0x........: __munmap (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Warning: invalid file descriptor -1 in syscall write()
+
+Syscall param write(buf) contains unaddressable byte(s)
+   at 0x........: __libc_write (...libc...)
+   by 0x........: main (mm.c:55)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is 0 bytes within a 4096-byte block munmap'd
+   at 0x........: __munmap (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Warning: invalid file descriptor -1 in syscall write()
+
+Syscall param write(buf) is non-contiguous
+   at 0x........: __libc_write (...libc...)
+   by 0x........: main (mm.c:56)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First byte is not within a known block
+Last byte (0x........) is 3 bytes within a 4096-byte block munmap'd
+   at 0x........: __munmap (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+ERROR SUMMARY: 6 errors from 6 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/mm.vgtest-disabled b/exp-ptrcheck/tests/mm.vgtest-disabled
new file mode 100644
index 0000000..ab3d50e
--- /dev/null
+++ b/exp-ptrcheck/tests/mm.vgtest-disabled
@@ -0,0 +1 @@
+prog: mm
diff --git a/exp-ptrcheck/tests/neg.c b/exp-ptrcheck/tests/neg.c
new file mode 100644
index 0000000..5ea7734
--- /dev/null
+++ b/exp-ptrcheck/tests/neg.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <assert.h>
+
+#include "arith_include1.c"
+
+int main(void)
+{
+   #include "arith_include2.c"
+   
+   // Neg =========================================================
+   ui(-, p,  n);     // bad NEG; ok, det
+
+   ui(-, up, n);     // det, det
+
+   ui(-, un, n);     // det, det
+
+   ui(-, n,  n);     // det, det
+
+   ui(-, nn, n);     // det, det
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/neg.stderr.exp b/exp-ptrcheck/tests/neg.stderr.exp
new file mode 100644
index 0000000..37299cf
--- /dev/null
+++ b/exp-ptrcheck/tests/neg.stderr.exp
@@ -0,0 +1,52 @@
+
+about to do 14 [0]
+Invalid read of size 4
+   at 0x........: main (neg.c:14)
+ Address 0x........ is not derived from any known block
+about to do 14 [-1]
+
+Invalid read of size 4
+   at 0x........: main (neg.c:14)
+ Address 0x........ is not derived from any known block
+about to do 16 [0]
+
+Invalid read of size 4
+   at 0x........: main (neg.c:16)
+ Address 0x........ is not derived from any known block
+about to do 16 [-1]
+
+Invalid read of size 4
+   at 0x........: main (neg.c:16)
+ Address 0x........ is not derived from any known block
+about to do 18 [0]
+
+Invalid read of size 4
+   at 0x........: main (neg.c:18)
+ Address 0x........ is not derived from any known block
+about to do 18 [-1]
+
+Invalid read of size 4
+   at 0x........: main (neg.c:18)
+ Address 0x........ is not derived from any known block
+about to do 20 [0]
+
+Invalid read of size 4
+   at 0x........: main (neg.c:20)
+ Address 0x........ is not derived from any known block
+about to do 20 [-1]
+
+Invalid read of size 4
+   at 0x........: main (neg.c:20)
+ Address 0x........ is not derived from any known block
+about to do 22 [0]
+
+Invalid read of size 4
+   at 0x........: main (neg.c:22)
+ Address 0x........ is not derived from any known block
+about to do 22 [-1]
+
+Invalid read of size 4
+   at 0x........: main (neg.c:22)
+ Address 0x........ is not derived from any known block
+
+ERROR SUMMARY: 10 errors from 10 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/neg.vgtest-disabled b/exp-ptrcheck/tests/neg.vgtest-disabled
new file mode 100644
index 0000000..c546a43
--- /dev/null
+++ b/exp-ptrcheck/tests/neg.vgtest-disabled
@@ -0,0 +1,2 @@
+prog: neg
+stderr_filter: filter_add
diff --git a/exp-ptrcheck/tests/not.c b/exp-ptrcheck/tests/not.c
new file mode 100644
index 0000000..c5c08d1
--- /dev/null
+++ b/exp-ptrcheck/tests/not.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <assert.h>
+
+#include "arith_include1.c"
+
+int main(void)
+{
+   #include "arith_include2.c"
+   
+   // NOT =========================================================
+   ui(~, p,  n);      // det, det
+
+   ui(~, up, n);      // det, det
+
+   ui(~, un, n);      // det, det
+
+   ui(~, n,  n);      // det, det
+
+   ui(~, nn, n);      // det, det
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/not.stderr.exp b/exp-ptrcheck/tests/not.stderr.exp
new file mode 100644
index 0000000..40d1435
--- /dev/null
+++ b/exp-ptrcheck/tests/not.stderr.exp
@@ -0,0 +1,72 @@
+
+about to do 14 [0]
+Invalid read of size 4
+   at 0x........: main (not.c:14)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 14 [-1]
+
+Invalid read of size 4
+   at 0x........: main (not.c:14)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 16 [0]
+
+Invalid read of size 4
+   at 0x........: main (not.c:16)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 16 [-1]
+
+Invalid read of size 4
+   at 0x........: main (not.c:16)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 18 [0]
+
+Invalid read of size 4
+   at 0x........: main (not.c:18)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 18 [-1]
+
+Invalid read of size 4
+   at 0x........: main (not.c:18)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 20 [0]
+
+Invalid read of size 4
+   at 0x........: main (not.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 20 [-1]
+
+Invalid read of size 4
+   at 0x........: main (not.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 22 [0]
+
+Invalid read of size 4
+   at 0x........: main (not.c:22)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+about to do 22 [-1]
+
+Invalid read of size 4
+   at 0x........: main (not.c:22)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is not derived from any known block
+
+ERROR SUMMARY: 10 errors from 10 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/not.vgtest-disabled b/exp-ptrcheck/tests/not.vgtest-disabled
new file mode 100644
index 0000000..266c349
--- /dev/null
+++ b/exp-ptrcheck/tests/not.vgtest-disabled
@@ -0,0 +1,2 @@
+prog: not
+stderr_filter: filter_add
diff --git a/exp-ptrcheck/tests/or.c b/exp-ptrcheck/tests/or.c
new file mode 100644
index 0000000..6fd2128
--- /dev/null
+++ b/exp-ptrcheck/tests/or.c
@@ -0,0 +1,42 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <assert.h>
+
+#include "arith_include1.c"
+
+int main(void)
+{
+   #include "arith_include2.c"
+
+   // OR ==========================================================
+   g(|, n,  n,  n);  // det, det
+
+   g(|, n,  p,  p);  // ok, det
+
+   g(|, p,  n,  p);  // ok, det
+
+   g(|, p,  p,  e);  // detected bad OR;  ok, det
+   g(|, p,  p2, e);  // detected bad OR;  det, det
+
+   g(|, n,  un, u);  // undet, undet
+   g(|, n,  up, u);  // ok, undet
+
+   g(|, un, n,  u);  // undet, undet
+   g(|, up, n,  u);  // ok, undet
+
+   g(|, un, un, u);  // undet, undet
+   g(|, un, up, u);  // ok, undet
+   g(|, up, un, u);  // ok, undet
+   g(|, up, up, u);  // undetected bad OR; ok, undet
+   g(|, up, up2,u);  // undetected bad OR; undet, undet 
+
+   g(|, un, p,  u);  // ok, undet
+   g(|, up, p,  u);  // undetected bad OR; undet, undet
+
+   g(|, p,  un, u);  // ok, undet
+   g(|, p,  up, u);  // undetected bad OR; undet, undet
+  
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/or.stderr.exp b/exp-ptrcheck/tests/or.stderr.exp
new file mode 100644
index 0000000..c5afcd3
--- /dev/null
+++ b/exp-ptrcheck/tests/or.stderr.exp
@@ -0,0 +1,91 @@
+
+about to do 14 [0]
+Invalid read of size 4
+   at 0x........: main (or.c:14)
+ Address 0x........ is not derived from any known block
+about to do 14 [-1]
+
+Invalid read of size 4
+   at 0x........: main (or.c:14)
+ Address 0x........ is not derived from any known block
+about to do 16 [0]
+about to do 16 [-1]
+
+Invalid read of size 4
+   at 0x........: main (or.c:16)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+about to do 18 [0]
+about to do 18 [-1]
+
+Invalid read of size 4
+   at 0x........: main (or.c:18)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+
+Invalid OR
+   at 0x........: main (or.c:20)
+Both args derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+about to do 20 [0]
+
+Invalid read of size 4
+   at 0x........: main (or.c:20)
+ Address 0x........ is not derived from any known block
+about to do 20 [-1]
+
+Invalid read of size 4
+   at 0x........: main (or.c:20)
+ Address 0x........ is not derived from any known block
+
+Invalid arguments to Or32/Or64
+   at 0x........: main (or.c:21)
+ First arg derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+ Second arg derived from address 0x........ of 40-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+about to do 21 [0]
+
+Invalid read of size 4
+   at 0x........: main (or.c:21)
+ Address 0x........ is not derived from any known block
+about to do 21 [-1]
+
+Invalid read of size 4
+   at 0x........: main (or.c:21)
+ Address 0x........ is not derived from any known block
+about to do 23 [0]
+about to do 23 [-1]
+about to do 24 [0]
+about to do 24 [-1]
+about to do 26 [0]
+about to do 26 [-1]
+about to do 27 [0]
+about to do 27 [-1]
+about to do 29 [0]
+about to do 29 [-1]
+about to do 30 [0]
+about to do 30 [-1]
+about to do 31 [0]
+about to do 31 [-1]
+about to do 32 [0]
+about to do 32 [-1]
+about to do 33 [0]
+about to do 33 [-1]
+about to do 35 [0]
+about to do 35 [-1]
+about to do 36 [0]
+about to do 36 [-1]
+about to do 38 [0]
+about to do 38 [-1]
+about to do 39 [0]
+about to do 39 [-1]
+
+ERROR SUMMARY: 10 errors from 10 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/or.vgtest-disabled b/exp-ptrcheck/tests/or.vgtest-disabled
new file mode 100644
index 0000000..9a714a1
--- /dev/null
+++ b/exp-ptrcheck/tests/or.vgtest-disabled
@@ -0,0 +1,2 @@
+prog: or
+stderr_filter: filter_add
diff --git a/exp-ptrcheck/tests/partial.c b/exp-ptrcheck/tests/partial.c
new file mode 100644
index 0000000..7bdc356
--- /dev/null
+++ b/exp-ptrcheck/tests/partial.c
@@ -0,0 +1,52 @@
+
+#include <stdlib.h>
+
+int main ( void )
+{
+   int* x3 = malloc(3);    float         f,   *f3   = malloc(3);
+   int* x4 = malloc(4);    double        d,   *d7   = malloc(7);
+   int* x5 = malloc(5);    long long int lli, *lli7 = malloc(7);
+   int* x6 = malloc(6);    char          c,   *c0   = malloc(0);
+   int* x7 = malloc(7);    short int     s,   *s1   = malloc(1);
+   int  x;
+   int* y4 = malloc(4);
+   int* y5 = malloc(5);
+   int* y6 = malloc(6);
+   int* y7 = malloc(7);
+
+   #define ADDB(ptr, n)  ((int*)(((unsigned long)(ptr)) + (n)))
+
+   // All these overrun by a single byte;  the reads are happening at
+   // different alignments.
+   x = * ADDB(x3,0);    // ok if --partial-loads-ok=yes
+   x = * ADDB(x4,1);
+   x = * ADDB(x5,2);
+   x = * ADDB(x6,3);
+   x = * ADDB(x7,4);    // ok if --partial-loads-ok=yes
+
+   // These are fine
+   x = * ADDB(y4,0);
+   x = * ADDB(y5,1);
+   x = * ADDB(y6,2);
+   x = * ADDB(y7,3);
+
+   // These are all bad, at different points along
+   x = * ADDB(x3,-1);   // before
+   x = * ADDB(x3, 0);   // inside      // ok if --partial-loads-ok=yes ...
+   x = * ADDB(x3, 1);   // inside      // ... but only on 32 bit platforms
+   x = * ADDB(x3, 2);   // inside      // ... ditto
+   x = * ADDB(x3, 3);   // after
+
+   // These are all bad
+   f   = * f3;    // ok if --partial-loads-ok=yes  // ... ditto
+   d   = * d7;
+   lli = * lli7;  // ok if --partial-loads-ok=yes  see XXX below
+   c   = * c0;
+   s   = * s1;
+
+   return 0;
+}
+
+/* Note re XXX, this gives different behaviour on 32 and 64 bit
+platforms, because on 64-bit it's one load whereas as on 32 bit
+platforms it's necessarily 2 32-bit loads, and the first one is OK. */
diff --git a/exp-ptrcheck/tests/partial_bad.stderr.exp-glibc25-amd64 b/exp-ptrcheck/tests/partial_bad.stderr.exp-glibc25-amd64
new file mode 100644
index 0000000..383f05e
--- /dev/null
+++ b/exp-ptrcheck/tests/partial_bad.stderr.exp-glibc25-amd64
@@ -0,0 +1,107 @@
+
+Invalid read of size 4
+   at 0x........: main (partial.c:21)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:22)
+ Address 0x........ is 1 bytes inside the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:7)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:23)
+ Address 0x........ is 2 bytes inside the accessing pointer's
+ legitimate range, a block of size 5 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:8)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:24)
+ Address 0x........ is 3 bytes inside the accessing pointer's
+ legitimate range, a block of size 6 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:9)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:25)
+ Address 0x........ is 4 bytes inside the accessing pointer's
+ legitimate range, a block of size 7 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:10)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:34)
+ Address 0x........ is 1 bytes before the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:35)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:36)
+ Address 0x........ is 1 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:37)
+ Address 0x........ is 2 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:38)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:41)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 8
+   at 0x........: main (partial.c:42)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 7 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:7)
+
+Invalid read of size 8
+   at 0x........: main (partial.c:43)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 7 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:8)
+
+Invalid read of size 1
+   at 0x........: main (partial.c:44)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 0 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:9)
+
+Invalid read of size 2
+   at 0x........: main (partial.c:45)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 1 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:10)
+
+ERROR SUMMARY: 15 errors from 15 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/partial_bad.stderr.exp-glibc25-x86 b/exp-ptrcheck/tests/partial_bad.stderr.exp-glibc25-x86
new file mode 100644
index 0000000..0d1cdd2
--- /dev/null
+++ b/exp-ptrcheck/tests/partial_bad.stderr.exp-glibc25-x86
@@ -0,0 +1,107 @@
+
+Invalid read of size 4
+   at 0x........: main (partial.c:21)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:22)
+ Address 0x........ is 1 bytes inside the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:7)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:23)
+ Address 0x........ is 2 bytes inside the accessing pointer's
+ legitimate range, a block of size 5 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:8)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:24)
+ Address 0x........ is 3 bytes inside the accessing pointer's
+ legitimate range, a block of size 6 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:9)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:25)
+ Address 0x........ is 4 bytes inside the accessing pointer's
+ legitimate range, a block of size 7 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:10)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:34)
+ Address 0x........ is 1 bytes before the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:35)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:36)
+ Address 0x........ is 1 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:37)
+ Address 0x........ is 2 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:38)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:41)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 8
+   at 0x........: main (partial.c:42)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 7 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:7)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:43)
+ Address 0x........ is 4 bytes inside the accessing pointer's
+ legitimate range, a block of size 7 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:8)
+
+Invalid read of size 1
+   at 0x........: main (partial.c:44)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 0 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:9)
+
+Invalid read of size 2
+   at 0x........: main (partial.c:45)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 1 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:10)
+
+ERROR SUMMARY: 15 errors from 15 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/partial_bad.vgtest b/exp-ptrcheck/tests/partial_bad.vgtest
new file mode 100644
index 0000000..86e13e8
--- /dev/null
+++ b/exp-ptrcheck/tests/partial_bad.vgtest
@@ -0,0 +1,2 @@
+prog: partial
+vgopts: --partial-loads-ok=no
diff --git a/exp-ptrcheck/tests/partial_good.stderr.exp-glibc25-amd64 b/exp-ptrcheck/tests/partial_good.stderr.exp-glibc25-amd64
new file mode 100644
index 0000000..dbc1035
--- /dev/null
+++ b/exp-ptrcheck/tests/partial_good.stderr.exp-glibc25-amd64
@@ -0,0 +1,93 @@
+
+Invalid read of size 4
+   at 0x........: main (partial.c:21)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:22)
+ Address 0x........ is 1 bytes inside the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:7)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:23)
+ Address 0x........ is 2 bytes inside the accessing pointer's
+ legitimate range, a block of size 5 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:8)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:24)
+ Address 0x........ is 3 bytes inside the accessing pointer's
+ legitimate range, a block of size 6 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:9)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:25)
+ Address 0x........ is 4 bytes inside the accessing pointer's
+ legitimate range, a block of size 7 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:10)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:34)
+ Address 0x........ is 1 bytes before the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:35)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:36)
+ Address 0x........ is 1 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:37)
+ Address 0x........ is 2 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:38)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:41)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 1
+   at 0x........: main (partial.c:44)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 0 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:9)
+
+Invalid read of size 2
+   at 0x........: main (partial.c:45)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 1 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:10)
+
+ERROR SUMMARY: 13 errors from 13 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/partial_good.stderr.exp-glibc25-x86 b/exp-ptrcheck/tests/partial_good.stderr.exp-glibc25-x86
new file mode 100644
index 0000000..6d14f65
--- /dev/null
+++ b/exp-ptrcheck/tests/partial_good.stderr.exp-glibc25-x86
@@ -0,0 +1,72 @@
+
+Invalid read of size 4
+   at 0x........: main (partial.c:22)
+ Address 0x........ is 1 bytes inside the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:7)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:23)
+ Address 0x........ is 2 bytes inside the accessing pointer's
+ legitimate range, a block of size 5 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:8)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:24)
+ Address 0x........ is 3 bytes inside the accessing pointer's
+ legitimate range, a block of size 6 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:9)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:34)
+ Address 0x........ is 1 bytes before the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:36)
+ Address 0x........ is 1 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:37)
+ Address 0x........ is 2 bytes inside the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 4
+   at 0x........: main (partial.c:38)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 3 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:6)
+
+Invalid read of size 8
+   at 0x........: main (partial.c:42)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 7 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:7)
+
+Invalid read of size 1
+   at 0x........: main (partial.c:44)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 0 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:9)
+
+Invalid read of size 2
+   at 0x........: main (partial.c:45)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ legitimate range, a block of size 1 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial.c:10)
+
+ERROR SUMMARY: 10 errors from 10 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/partial_good.vgtest b/exp-ptrcheck/tests/partial_good.vgtest
new file mode 100644
index 0000000..29fd892
--- /dev/null
+++ b/exp-ptrcheck/tests/partial_good.vgtest
@@ -0,0 +1,2 @@
+prog: partial
+vgopts: --partial-loads-ok=yes
diff --git a/exp-ptrcheck/tests/pth_create.c b/exp-ptrcheck/tests/pth_create.c
new file mode 100644
index 0000000..2c2d5bb
--- /dev/null
+++ b/exp-ptrcheck/tests/pth_create.c
@@ -0,0 +1,22 @@
+
+#include <pthread.h>
+#include <stdlib.h>
+
+
+
+// This demonstrates an error for a pre_mem_{read,write} event that comes
+// from the core, rather than a syscall (ie. part == Vg_CorePart instead of
+// part == Vg_CoreSyscall).
+
+
+int main(void)
+{
+   pthread_key_t* key  = malloc(sizeof(pthread_key_t));
+   pthread_key_t* key2 = malloc(sizeof(pthread_key_t));
+
+   pthread_key_create ( (pthread_key_t*)((long)key + 1), NULL );
+   free(key2);
+   pthread_key_create (                        key2    , NULL );
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/pth_create.stderr.exp b/exp-ptrcheck/tests/pth_create.stderr.exp
new file mode 100644
index 0000000..005e153
--- /dev/null
+++ b/exp-ptrcheck/tests/pth_create.stderr.exp
@@ -0,0 +1,18 @@
+
+Invalid write of size 4
+   at 0x........: pthread_key_create (in /...libpthread...)
+   by 0x........: main (pth_create.c:17)
+ Address 0x........ is 1 bytes inside the accessing pointer's
+ legitimate range, a block of size 4 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (pth_create.c:14)
+
+Invalid write of size 4
+   at 0x........: pthread_key_create (in /...libpthread...)
+   by 0x........: main (pth_create.c:19)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ once-legitimate range, a block of size 4 free'd
+   at 0x........: free (vg_replace_malloc.c:...)
+   by 0x........: main (pth_create.c:18)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/pth_create.vgtest b/exp-ptrcheck/tests/pth_create.vgtest
new file mode 100644
index 0000000..c98abc6
--- /dev/null
+++ b/exp-ptrcheck/tests/pth_create.vgtest
@@ -0,0 +1 @@
+prog: pth_create
diff --git a/exp-ptrcheck/tests/pth_specific.c b/exp-ptrcheck/tests/pth_specific.c
new file mode 100644
index 0000000..d679bb7
--- /dev/null
+++ b/exp-ptrcheck/tests/pth_specific.c
@@ -0,0 +1,34 @@
+#include <pthread.h>
+#include <assert.h>
+#include <stdlib.h>
+
+// This shows that putting a segment pointer into a thread-specific data
+// area and then getting it out again doesn't lose info -- even though the
+// key allocation/getting is done on the real CPU where the skin can't see,
+// the get/set of the info is done using that key on the simd CPU where it
+// can see, so everything works out fine.
+
+int main(void)
+{
+   pthread_key_t key;
+   char *x, *z;
+   char  y;
+
+   x = malloc(100);
+
+   y = x[-1];     // error
+   x[1] = 'z';
+
+   assert( 0 == pthread_key_create ( &key, NULL ) );
+   assert( 0 == pthread_setspecific(  key, x ) );
+   z = (char*)pthread_getspecific( key );
+   assert( 0 != z );
+
+   y = z[-1];     // error
+
+   // ensure the key went in and out correctly
+   assert(z == x);
+   assert(z[1] == 'z');
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/pth_specific.stderr.exp b/exp-ptrcheck/tests/pth_specific.stderr.exp
new file mode 100644
index 0000000..b35c9db
--- /dev/null
+++ b/exp-ptrcheck/tests/pth_specific.stderr.exp
@@ -0,0 +1,16 @@
+
+Invalid read of size 1
+   at 0x........: main (pth_specific.c:19)
+ Address 0x........ is 1 bytes before the accessing pointer's
+ legitimate range, a block of size 100 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (pth_specific.c:17)
+
+Invalid read of size 1
+   at 0x........: main (pth_specific.c:27)
+ Address 0x........ is 1 bytes before the accessing pointer's
+ legitimate range, a block of size 100 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (pth_specific.c:17)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/pth_specific.vgtest b/exp-ptrcheck/tests/pth_specific.vgtest
new file mode 100644
index 0000000..f2a2a34
--- /dev/null
+++ b/exp-ptrcheck/tests/pth_specific.vgtest
@@ -0,0 +1 @@
+prog: pth_specific
diff --git a/exp-ptrcheck/tests/realloc.c b/exp-ptrcheck/tests/realloc.c
new file mode 100644
index 0000000..f588d53
--- /dev/null
+++ b/exp-ptrcheck/tests/realloc.c
@@ -0,0 +1,46 @@
+
+#include <stdlib.h>
+
+int main(void)
+{
+   int i;
+   int* y;
+   int** x  = malloc(sizeof(int*) * 100);
+   int* x2 = malloc(sizeof(int) * 100);
+   void* sink;
+   x[0]  = x2;  // this is to check the pointerness is copied across ok
+   x[49] = x2;  // this is to check the pointerness is copied across ok
+   
+   i = *x[0];
+   i = *x[49];
+   
+   x = realloc(x, sizeof(int*)*50);     // smaller
+   y = x[0];   // ok
+   y = x[49];  // ok
+   y = x[-1];  // bad
+   y = x[50];  // bad
+   i = *x[0];  // ok
+   i = *x[49]; // ok
+
+   x = realloc(x, sizeof(int*)*50);     // same size
+   y = x[0];   // ok
+   y = x[49];  // ok
+   y = x[-1];  // bad
+   y = x[50];  // bad
+   i = *x[0];  // ok
+   i = *x[49]; // ok
+
+   x = realloc(x, sizeof(int*)*100);     // bigger
+   y = x[0];   // ok
+   y = x[49];  // ok
+   y = x[50];  // ok
+   y = x[99];  // ok
+   y = x[-1];  // bad
+   y = x[100]; // bad
+   i = *x[0];  // ok
+   i = *x[49]; // ok
+
+   sink = realloc((void*)0x99, 10);    // fails
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/realloc.stderr.exp-glibc25-amd64 b/exp-ptrcheck/tests/realloc.stderr.exp-glibc25-amd64
new file mode 100644
index 0000000..1916f1d
--- /dev/null
+++ b/exp-ptrcheck/tests/realloc.stderr.exp-glibc25-amd64
@@ -0,0 +1,44 @@
+
+Invalid read of size 8
+   at 0x........: main (realloc.c:20)
+ Address 0x........ is 8 bytes before the accessing pointer's
+ legitimate range, a block of size 400 alloc'd
+   at 0x........: realloc (vg_replace_malloc.c:...)
+   by 0x........: main (realloc.c:17)
+
+Invalid read of size 8
+   at 0x........: main (realloc.c:21)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 400 alloc'd
+   at 0x........: realloc (vg_replace_malloc.c:...)
+   by 0x........: main (realloc.c:17)
+
+Invalid read of size 8
+   at 0x........: main (realloc.c:28)
+ Address 0x........ is 8 bytes before the accessing pointer's
+ legitimate range, a block of size 400 alloc'd
+   at 0x........: realloc (vg_replace_malloc.c:...)
+   by 0x........: main (realloc.c:25)
+
+Invalid read of size 8
+   at 0x........: main (realloc.c:29)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 400 alloc'd
+   at 0x........: realloc (vg_replace_malloc.c:...)
+   by 0x........: main (realloc.c:25)
+
+Invalid read of size 8
+   at 0x........: main (realloc.c:38)
+ Address 0x........ is 8 bytes before the accessing pointer's
+ legitimate range, a block of size 800 alloc'd
+   at 0x........: realloc (vg_replace_malloc.c:...)
+   by 0x........: main (realloc.c:33)
+
+Invalid read of size 8
+   at 0x........: main (realloc.c:39)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 800 alloc'd
+   at 0x........: realloc (vg_replace_malloc.c:...)
+   by 0x........: main (realloc.c:33)
+
+ERROR SUMMARY: 6 errors from 6 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/realloc.stderr.exp-glibc25-x86 b/exp-ptrcheck/tests/realloc.stderr.exp-glibc25-x86
new file mode 100644
index 0000000..5bf3951
--- /dev/null
+++ b/exp-ptrcheck/tests/realloc.stderr.exp-glibc25-x86
@@ -0,0 +1,44 @@
+
+Invalid read of size 4
+   at 0x........: main (realloc.c:20)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 200 alloc'd
+   at 0x........: realloc (vg_replace_malloc.c:...)
+   by 0x........: main (realloc.c:17)
+
+Invalid read of size 4
+   at 0x........: main (realloc.c:21)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 200 alloc'd
+   at 0x........: realloc (vg_replace_malloc.c:...)
+   by 0x........: main (realloc.c:17)
+
+Invalid read of size 4
+   at 0x........: main (realloc.c:28)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 200 alloc'd
+   at 0x........: realloc (vg_replace_malloc.c:...)
+   by 0x........: main (realloc.c:25)
+
+Invalid read of size 4
+   at 0x........: main (realloc.c:29)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 200 alloc'd
+   at 0x........: realloc (vg_replace_malloc.c:...)
+   by 0x........: main (realloc.c:25)
+
+Invalid read of size 4
+   at 0x........: main (realloc.c:38)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 400 alloc'd
+   at 0x........: realloc (vg_replace_malloc.c:...)
+   by 0x........: main (realloc.c:33)
+
+Invalid read of size 4
+   at 0x........: main (realloc.c:39)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 400 alloc'd
+   at 0x........: realloc (vg_replace_malloc.c:...)
+   by 0x........: main (realloc.c:33)
+
+ERROR SUMMARY: 6 errors from 6 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/realloc.vgtest b/exp-ptrcheck/tests/realloc.vgtest
new file mode 100644
index 0000000..0b9a00c
--- /dev/null
+++ b/exp-ptrcheck/tests/realloc.vgtest
@@ -0,0 +1 @@
+prog: realloc
diff --git a/exp-ptrcheck/tests/sh_script b/exp-ptrcheck/tests/sh_script
new file mode 100755
index 0000000..ca2b0b8
--- /dev/null
+++ b/exp-ptrcheck/tests/sh_script
@@ -0,0 +1,4 @@
+#! /bin/bash
+if [ $# != 0 ] ; then
+    echo "too many args"
+fi
diff --git a/exp-ptrcheck/tests/sh_script.stderr.exp b/exp-ptrcheck/tests/sh_script.stderr.exp
new file mode 100644
index 0000000..d18786f
--- /dev/null
+++ b/exp-ptrcheck/tests/sh_script.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/sh_script.vgtest-disabled b/exp-ptrcheck/tests/sh_script.vgtest-disabled
new file mode 100644
index 0000000..5942de2
--- /dev/null
+++ b/exp-ptrcheck/tests/sh_script.vgtest-disabled
@@ -0,0 +1 @@
+prog: sh_script
diff --git a/exp-ptrcheck/tests/stackerr.c b/exp-ptrcheck/tests/stackerr.c
new file mode 100644
index 0000000..9d6e35c
--- /dev/null
+++ b/exp-ptrcheck/tests/stackerr.c
@@ -0,0 +1,52 @@
+
+/* Check basic stack overflow detection.
+
+   It's difficult to get consistent behaviour across all platforms.
+   For example, x86 w/ gcc-4.3.1 gives
+
+     Expected: stack array "a" in frame 2 back from here
+     Actual:   stack array "beforea" in frame 2 back from here
+
+   whereas amd64 w/ gcc-4.3.1 gives
+
+     Expected: stack array "a" in frame 2 back from here
+     Actual:   unknown
+
+   This happens because on x86 the arrays are placed on the
+   stack without holes in between, but not so for amd64.  I don't
+   know why.
+*/
+
+
+#include <stdio.h>
+
+__attribute__((noinline)) void foo ( long* sa, int n )
+{
+  int i;
+  for (i = 0; i < n; i++)
+    sa[i] = 0;
+}
+
+__attribute__((noinline)) void bar ( long* sa, int n )
+{
+   foo(sa, n);
+}
+
+int main ( void )
+{
+  int i;
+  long beforea[3];
+  long a[7];
+  long aftera[3];
+  bar(a, 7+1);     /* generates error */
+  bar(a, 7+0);     /* generates no error */
+  for (i = 0; i < 7+1; i++) {
+     a[i] = 0;
+  }
+  char beforebuf[8];
+  char buf[8];
+  char afterbuf[8];
+  sprintf(buf, "%d", 123456789);
+  return 1 & ((a[4] + beforea[1] + aftera[1] + beforebuf[1] 
+                    + buf[2] + afterbuf[3]) / 100000) ;
+}
diff --git a/exp-ptrcheck/tests/stackerr.stderr.exp-glibc27-x86 b/exp-ptrcheck/tests/stackerr.stderr.exp-glibc27-x86
new file mode 100644
index 0000000..b86eb10
--- /dev/null
+++ b/exp-ptrcheck/tests/stackerr.stderr.exp-glibc27-x86
@@ -0,0 +1,26 @@
+
+Invalid write of size 4
+   at 0x........: foo (stackerr.c:27)
+   by 0x........: bar (stackerr.c:32)
+   by 0x........: main (stackerr.c:41)
+ Address 0x........ expected vs actual:
+ Expected: stack array "a" in frame 2 back from here
+ Actual:   stack array "beforea" in frame 2 back from here
+
+Invalid write of size 4
+   at 0x........: main (stackerr.c:44)
+ Address 0x........ expected vs actual:
+ Expected: stack array "a" in this frame
+ Actual:   stack array "beforea" in this frame
+
+Invalid write of size 1
+   at 0x........: _IO_default_xsputn (in /...libc...)
+   by 0x........: ...
+   by 0x........: ...
+   by 0x........: ...
+   by 0x........: main (stackerr.c:49)
+ Address 0x........ expected vs actual:
+ Expected: stack array "buf" in frame 4 back from here
+ Actual:   stack array "beforebuf" in frame 4 back from here
+
+ERROR SUMMARY: 3 errors from 3 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/stackerr.stderr.exp-glibc28-amd64 b/exp-ptrcheck/tests/stackerr.stderr.exp-glibc28-amd64
new file mode 100644
index 0000000..970d132
--- /dev/null
+++ b/exp-ptrcheck/tests/stackerr.stderr.exp-glibc28-amd64
@@ -0,0 +1,26 @@
+
+Invalid write of size 8
+   at 0x........: foo (stackerr.c:27)
+   by 0x........: bar (stackerr.c:32)
+   by 0x........: main (stackerr.c:41)
+ Address 0x........ expected vs actual:
+ Expected: stack array "a" in frame 2 back from here
+ Actual:   unknown
+
+Invalid write of size 8
+   at 0x........: main (stackerr.c:44)
+ Address 0x........ expected vs actual:
+ Expected: stack array "a" in this frame
+ Actual:   unknown
+
+Invalid write of size 1
+   at 0x........: _IO_default_xsputn (in /...libc...)
+   by 0x........: ...
+   by 0x........: ...
+   by 0x........: ...
+   by 0x........: main (stackerr.c:49)
+ Address 0x........ expected vs actual:
+ Expected: stack array "buf" in frame 4 back from here
+ Actual:   unknown
+
+ERROR SUMMARY: 3 errors from 3 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/stackerr.stdout.exp b/exp-ptrcheck/tests/stackerr.stdout.exp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/exp-ptrcheck/tests/stackerr.stdout.exp
diff --git a/exp-ptrcheck/tests/stackerr.vgtest b/exp-ptrcheck/tests/stackerr.vgtest
new file mode 100644
index 0000000..b81660b
--- /dev/null
+++ b/exp-ptrcheck/tests/stackerr.vgtest
@@ -0,0 +1 @@
+prog: stackerr
diff --git a/exp-ptrcheck/tests/strcpy.c b/exp-ptrcheck/tests/strcpy.c
new file mode 100644
index 0000000..8352297
--- /dev/null
+++ b/exp-ptrcheck/tests/strcpy.c
@@ -0,0 +1,44 @@
+#include <stdlib.h>
+#include <string.h>
+
+// This shows the case whereby subtraction between two pointers from
+// different segments can be used legitimately.
+
+// dest: stack, src: heap
+char* my_strcpy (char* dest, const char* src)
+{
+   char c, *s = (char *) src;
+   long off = dest - s;
+   off = off - 1;
+   do {
+      c = *s++;
+      s[off] = c;          // s + off == dest
+   } while (c != '\0');
+   return dest;
+}
+
+int main(void)
+{
+   char* h  = "hello, world";
+   char* p1 = strdup(h);
+   char* p2 = strdup(h);
+   char  u1[13];
+   char  u2[13];
+
+   // All these are legit
+   p1[p2-p1] = 0;    // p-p   (must be BADSEG'd) // ea is p2[0]
+   u1[p2-u1] = 0;    // p-?
+   p1[u2-p1] = 0;    // ?-p   (must be BADSEG'd)
+   u1[u2-u1] = 0;    // ?-?
+
+   // All these are a 1-byte underrun
+   p1[p2-p1-1] = 0;  // p-p   (must be BADSEG'd) // ea is p2[-1]
+   u1[p2-u1-1] = 0;  // p-?   (undet) 
+   p1[u2-p1-1] = 0;  // ?-p   (must be BADSEG'd)
+   u1[u2-u1-1] = 0;  // ?-?   (undet)
+
+   my_strcpy(u1, p1);
+   my_strcpy(u2, u1);
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/strcpy.stderr.exp b/exp-ptrcheck/tests/strcpy.stderr.exp
new file mode 100644
index 0000000..d18786f
--- /dev/null
+++ b/exp-ptrcheck/tests/strcpy.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/strcpy.vgtest b/exp-ptrcheck/tests/strcpy.vgtest
new file mode 100644
index 0000000..809bbf6
--- /dev/null
+++ b/exp-ptrcheck/tests/strcpy.vgtest
@@ -0,0 +1 @@
+prog: strcpy
diff --git a/exp-ptrcheck/tests/strlen.c b/exp-ptrcheck/tests/strlen.c
new file mode 100644
index 0000000..92b5c14
--- /dev/null
+++ b/exp-ptrcheck/tests/strlen.c
@@ -0,0 +1,33 @@
+
+#include <stdlib.h>
+#include <string.h>
+
+// glibc's versions of functions like strlen() do things word-wise instead
+// of byte-wise, which means they can overrun the end of strings, etc.
+// Naughty, but must be safe, I guess;  Annelid copes with this in the same
+// way Memcheck does, letting it happen unless the --partial-loads-ok=no
+// option is used.
+
+int main(void)
+{
+   char* h  = "hello, world";
+   char* p = strdup(h);
+   char  u[20];
+   char* c;
+   int   len;
+
+   len = strlen(p);
+
+   c = strchr (p, 'l'); 
+   c = strchr (p, 'x'); 
+
+   c = strrchr(p, 'l'); 
+   c = strrchr(p, 'x'); 
+
+   c = memchr (p, 'l', len);  // glibc version ok?
+   c = memchr (p, 'x', len); 
+
+   memcpy(u, p, len+1);       // glibc version ok?
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/strlen_bad.stderr.exp b/exp-ptrcheck/tests/strlen_bad.stderr.exp
new file mode 100644
index 0000000..5698989
--- /dev/null
+++ b/exp-ptrcheck/tests/strlen_bad.stderr.exp
@@ -0,0 +1,42 @@
+
+Invalid read of size 4
+   at 0x........: strlen (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+ Address 0x........ is 12 bytes inside the accessing pointer's
+ legitimate range, a block of size 13 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: __strdup (in /...libc...)
+   by 0x........: main (strlen.c:14)
+
+Invalid read of size 4
+   at 0x........: strchr (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+ Address 0x........ is 12 bytes inside the accessing pointer's
+ legitimate range, a block of size 13 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: __strdup (in /...libc...)
+   by 0x........: main (strlen.c:14)
+
+Invalid read of size 4
+   at 0x........: strrchr (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+ Address 0x........ is 12 bytes inside the accessing pointer's
+ legitimate range, a block of size 13 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: __strdup (in /...libc...)
+   by 0x........: main (strlen.c:14)
+
+Invalid read of size 4
+   at 0x........: strrchr (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+ Address 0x........ is 12 bytes inside the accessing pointer's
+ legitimate range, a block of size 13 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: __strdup (in /...libc...)
+   by 0x........: main (strlen.c:14)
+
+ERROR SUMMARY: 4 errors from 4 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/strlen_bad.vgtest-disabled b/exp-ptrcheck/tests/strlen_bad.vgtest-disabled
new file mode 100644
index 0000000..9f41872
--- /dev/null
+++ b/exp-ptrcheck/tests/strlen_bad.vgtest-disabled
@@ -0,0 +1,2 @@
+prog: strlen
+vgopts: --partial-loads-ok=no
diff --git a/exp-ptrcheck/tests/strlen_good.stderr.exp b/exp-ptrcheck/tests/strlen_good.stderr.exp
new file mode 100644
index 0000000..d18786f
--- /dev/null
+++ b/exp-ptrcheck/tests/strlen_good.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/strlen_good.vgtest-disabled b/exp-ptrcheck/tests/strlen_good.vgtest-disabled
new file mode 100644
index 0000000..14e36fa
--- /dev/null
+++ b/exp-ptrcheck/tests/strlen_good.vgtest-disabled
@@ -0,0 +1,2 @@
+prog: strlen
+vgopts: --partial-loads-ok=yes
diff --git a/exp-ptrcheck/tests/sub.c b/exp-ptrcheck/tests/sub.c
new file mode 100644
index 0000000..73116b6
--- /dev/null
+++ b/exp-ptrcheck/tests/sub.c
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <assert.h>
+
+#include "arith_include1.c"
+int aaa(void) { int x = 0x66; return x; }
+int bbb(void) { return 0x55; }
+int main(void)
+{
+   #include "arith_include2.c"
+
+   // SUB =========================================================
+   g(-, n,  n2, n);  // det, det       // undet, undet?
+
+   g(-, n,  p,  e);  // det, det
+
+   g(-, p,  n,  p);  // ok, det
+
+   g(-, p,  pp, B);  // det, det
+
+   g(-, n,  un, u);  // undet, undet
+   g(-, n,  up, u);  // undet, undet
+
+   g(-, un, n,  u);  // undet, undet
+   g(-, up, n,  u);  // ok, undet
+
+   g(-, un, un2,u);  // det, det
+   g(-, un, up, u);  // undet, undet
+   g(-, up, un, u);  // ok, undet
+   g(-, up, up, u);  // det, det
+
+   g(-, un, p,  B);  // undet, undet
+   g(-, up, p,  B);  // undet, undet
+
+   g(-, p,  un, p);  // det, det
+   g(-, p,  up, p);  // det, det
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/sub.stderr.exp b/exp-ptrcheck/tests/sub.stderr.exp
new file mode 100644
index 0000000..ba007a1
--- /dev/null
+++ b/exp-ptrcheck/tests/sub.stderr.exp
@@ -0,0 +1,101 @@
+
+about to do 15 [0]
+Invalid read of size 4
+   at 0x........: main (sub.c:15)
+ Address 0x........ is not derived from any known block
+about to do 15 [-1]
+
+Invalid read of size 4
+   at 0x........: main (sub.c:15)
+ Address 0x........ is not derived from any known block
+about to do 17 [0]
+
+Invalid read of size 4
+   at 0x........: main (sub.c:17)
+ Address 0x........ is not derived from any known block
+about to do 17 [-1]
+
+Invalid read of size 4
+   at 0x........: main (sub.c:17)
+ Address 0x........ is not derived from any known block
+about to do 19 [0]
+about to do 19 [-1]
+
+Invalid read of size 4
+   at 0x........: main (sub.c:19)
+ Address 0x........ is 4 bytes before the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+about to do 21 [0]
+about to do 21 [-1]
+about to do 23 [0]
+about to do 23 [-1]
+about to do 24 [0]
+about to do 24 [-1]
+about to do 26 [0]
+about to do 26 [-1]
+about to do 27 [0]
+about to do 27 [-1]
+about to do 29 [0]
+
+Invalid read of size 4
+   at 0x........: main (sub.c:29)
+ Address 0x........ is not derived from any known block
+about to do 29 [-1]
+
+Invalid read of size 4
+   at 0x........: main (sub.c:29)
+ Address 0x........ is not derived from any known block
+about to do 30 [0]
+about to do 30 [-1]
+about to do 31 [0]
+about to do 31 [-1]
+about to do 32 [0]
+
+Invalid read of size 4
+   at 0x........: main (sub.c:32)
+ Address 0x........ is not derived from any known block
+about to do 32 [-1]
+
+Invalid read of size 4
+   at 0x........: main (sub.c:32)
+ Address 0x........ is not derived from any known block
+about to do 34 [0]
+about to do 34 [-1]
+about to do 35 [0]
+about to do 35 [-1]
+about to do 37 [0]
+
+Invalid read of size 4
+   at 0x........: main (sub.c:37)
+ Address 0x........ is ... bytes before the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+about to do 37 [-1]
+
+Invalid read of size 4
+   at 0x........: main (sub.c:37)
+ Address 0x........ is ... bytes before the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+about to do 38 [0]
+
+Invalid read of size 4
+   at 0x........: main (sub.c:38)
+ Address 0x........ is ... bytes after the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+about to do 38 [-1]
+
+Invalid read of size 4
+   at 0x........: main (sub.c:38)
+ Address 0x........ is ... bytes after the accessing pointer's
+ legitimate range, a block of size 40 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (arith_include2.c:22)
+
+ERROR SUMMARY: 13 errors from 13 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/sub.vgtest-disabled b/exp-ptrcheck/tests/sub.vgtest-disabled
new file mode 100644
index 0000000..45d1aee
--- /dev/null
+++ b/exp-ptrcheck/tests/sub.vgtest-disabled
@@ -0,0 +1,2 @@
+prog: sub
+stderr_filter: filter_add
diff --git a/exp-ptrcheck/tests/supp.c b/exp-ptrcheck/tests/supp.c
new file mode 100644
index 0000000..9d211b9
--- /dev/null
+++ b/exp-ptrcheck/tests/supp.c
@@ -0,0 +1,19 @@
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/time.h>
+
+int main(void)
+{
+   int   i = 11;
+   char* buf = malloc(sizeof(char) * 6);
+   char  c = buf[-1];                  // LoadStoreErr
+   char* x = buf + (long)buf;          // ArithErr
+   char* y = (char*)((long)buf * i);   // AsmErr
+   write(-1, buf+3, 5);                // SysParamErr
+
+   return x-y+c;
+}
diff --git a/exp-ptrcheck/tests/supp.stderr.exp b/exp-ptrcheck/tests/supp.stderr.exp
new file mode 100644
index 0000000..ef8f811
--- /dev/null
+++ b/exp-ptrcheck/tests/supp.stderr.exp
@@ -0,0 +1,4 @@
+
+Warning: invalid file descriptor -1 in syscall write()
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/supp.supp b/exp-ptrcheck/tests/supp.supp
new file mode 100644
index 0000000..9d3300a
--- /dev/null
+++ b/exp-ptrcheck/tests/supp.supp
@@ -0,0 +1,18 @@
+{
+   first-in-supp.supp
+   exp-ptrcheck:Heap
+   fun:main
+}
+{
+   second-in-supp.supp
+   exp-ptrcheck:Arith
+   fun:main
+}
+{
+   third-in-supp.supp
+   exp-ptrcheck:SysParam
+   write(buf)
+   fun:__GI___libc_write
+   fun:__libc_start_main
+   obj:*/annelid/tests/supp
+}
diff --git a/exp-ptrcheck/tests/supp.vgtest b/exp-ptrcheck/tests/supp.vgtest
new file mode 100644
index 0000000..c0d71e7
--- /dev/null
+++ b/exp-ptrcheck/tests/supp.vgtest
@@ -0,0 +1,2 @@
+vgopts: --suppressions=supp.supp
+prog: supp
diff --git a/exp-ptrcheck/tests/suppgen.stderr.exp b/exp-ptrcheck/tests/suppgen.stderr.exp
new file mode 100644
index 0000000..f85933b
--- /dev/null
+++ b/exp-ptrcheck/tests/suppgen.stderr.exp
@@ -0,0 +1,60 @@
+
+Invalid read of size 1
+   at 0x........: main (supp.c:13)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is 1 bytes before the accessing pointer's
+ legitimate range, the 6-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (supp.c:12)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+---- Print suppression ? --- [Return/N/n/Y/y/C/c] ---- {
+   <insert a suppression name here>
+   Annelid:LoadStore
+   fun:main
+   fun:__libc_start_main
+   obj:*/annelid/tests/supp
+}
+
+Invalid ADD
+   at 0x........: main (supp.c:14)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Both args derived from address 0x........ of 6-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (supp.c:12)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+---- Print suppression ? --- [Return/N/n/Y/y/C/c] ---- {
+   <insert a suppression name here>
+   Annelid:Arith
+   fun:main
+   fun:__libc_start_main
+   obj:*/annelid/tests/supp
+}
+Warning: invalid file descriptor -1 in syscall write()
+
+Syscall param write(buf) is non-contiguous
+   at 0x........: __libc_write (...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First byte (0x........) is 3 bytes within a 6-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (supp.c:12)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Last byte is not within a known block
+
+---- Print suppression ? --- [Return/N/n/Y/y/C/c] ---- {
+   <insert a suppression name here>
+   Annelid:SysParam
+   write(buf)
+   fun:__GI___libc_write
+   fun:__libc_start_main
+   obj:*/annelid/tests/supp
+}
+
+ERROR SUMMARY: 3 errors from 3 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/suppgen.stdin b/exp-ptrcheck/tests/suppgen.stdin
new file mode 100644
index 0000000..b469f97
--- /dev/null
+++ b/exp-ptrcheck/tests/suppgen.stdin
@@ -0,0 +1,5 @@
+y
+y
+y
+y
+
diff --git a/exp-ptrcheck/tests/suppgen.vgtest-disabled b/exp-ptrcheck/tests/suppgen.vgtest-disabled
new file mode 100644
index 0000000..d606f2a
--- /dev/null
+++ b/exp-ptrcheck/tests/suppgen.vgtest-disabled
@@ -0,0 +1,4 @@
+prog: supp
+vgopts: --gen-suppressions=yes
+args: < suppgen.stdin
+stderr_filter: filter_suppgen
diff --git a/exp-ptrcheck/tests/syscall.c b/exp-ptrcheck/tests/syscall.c
new file mode 100644
index 0000000..b446275
--- /dev/null
+++ b/exp-ptrcheck/tests/syscall.c
@@ -0,0 +1,59 @@
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/time.h>
+
+// For some reason, the stack frame below __GI_write is disappearing.
+// Therefore, if I don't want the write errors to be merged, I have to
+// ensure they have a different stack trace.  I do this by using this
+// function.  Weird.
+__attribute__((noinline))
+void mywrite(char* buf, int len)
+{
+   write(-1, buf, len);
+}
+
+__attribute__((noinline))
+void mygetitimer(long arg1, struct itimerval* itval)
+{
+   getitimer(arg1, itval);
+}
+
+__attribute__((noinline))
+void myopen(char* name, long flags)
+{
+   open(name, flags);
+}
+
+int main(void)
+{
+   char *buf = malloc(sizeof(char)*6), *buf2 = malloc(sizeof(char)*6);
+   struct itimerval* itval = malloc(sizeof(struct itimerval) - 1);
+   int diff = buf2 - buf;
+   buf[0] = 'h';
+   buf[1] = 'e';
+   buf[2] = 'l';
+   buf[3] = 'l';
+   buf[4] = 'o';
+   buf[5] = 'x';
+
+   // error (read)  (will fail due to -1, as we want -- don't want any
+   // unpredictable output to foul up the test)
+   mywrite(buf+3, 5);      // error (read)
+   mywrite(buf-1, 5);      // error (read)
+   mywrite(buf+1, diff);   // error (read)
+   myopen(buf+3, 0x0);     // error (read_asciiz)
+
+   mygetitimer(0, itval);    // error (write)
+
+   //----
+   free(buf);
+   mywrite(buf,   5);      // error
+   mywrite(buf+3, 5);      // error
+   mywrite(buf+1, diff);   // error (read)
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/syscall.stderr.exp b/exp-ptrcheck/tests/syscall.stderr.exp
new file mode 100644
index 0000000..3443b29
--- /dev/null
+++ b/exp-ptrcheck/tests/syscall.stderr.exp
@@ -0,0 +1,109 @@
+
+Warning: invalid file descriptor -1 in syscall write()
+Syscall param write(buf) is non-contiguous
+   at 0x........: __libc_write (...libc...)
+   by 0x........: main (syscall.c:32)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First byte (0x........) is 3 bytes within a 6-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (syscall.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Last byte is not within a known block
+Warning: invalid file descriptor -1 in syscall write()
+
+Syscall param write(buf) is non-contiguous
+   at 0x........: __libc_write (...libc...)
+   by 0x........: main (syscall.c:33)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First byte is not within a known block
+Last byte (0x........) is 3 bytes within a 6-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (syscall.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Warning: invalid file descriptor -1 in syscall write()
+
+Syscall param write(buf) is non-contiguous
+   at 0x........: __libc_write (...libc...)
+   by 0x........: main (syscall.c:34)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First byte (0x........) is 1 bytes within a 6-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (syscall.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Last byte (0x........) is 0 bytes within a 6-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (syscall.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+Syscall param open(pathname) is non-contiguous
+   at 0x........: __libc_open (...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First byte (0x........) is 3 bytes within a 6-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (syscall.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Last byte is not within a known block
+
+Syscall param getitimer(timer) is non-contiguous
+   at 0x........: __getitimer (in /...libc...)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First byte (0x........) is 0 bytes within a 15-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (syscall.c:21)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Last byte is not within a known block
+Warning: invalid file descriptor -1 in syscall write()
+
+Syscall param write(buf) contains unaddressable byte(s)
+   at 0x........: __libc_write (...libc...)
+   by 0x........: main (syscall.c:41)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is 0 bytes within a 6-byte block free'd
+   at 0x........: free (vg_replace_malloc.c:...)
+   by 0x........: main (syscall.c:40)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Warning: invalid file descriptor -1 in syscall write()
+
+Syscall param write(buf) is non-contiguous
+   at 0x........: __libc_write (...libc...)
+   by 0x........: main (syscall.c:42)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First byte (0x........) is 3 bytes within a 6-byte block free'd
+   at 0x........: free (vg_replace_malloc.c:...)
+   by 0x........: main (syscall.c:40)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Last byte is not within a known block
+Warning: invalid file descriptor -1 in syscall write()
+
+Syscall param write(buf) is non-contiguous
+   at 0x........: __libc_write (...libc...)
+   by 0x........: main (syscall.c:43)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+First byte (0x........) is 1 bytes within a 6-byte block free'd
+   at 0x........: free (vg_replace_malloc.c:...)
+   by 0x........: main (syscall.c:40)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Last byte (0x........) is 0 bytes within a 6-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (syscall.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+ERROR SUMMARY: 8 errors from 8 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/syscall.vgtest-disabled b/exp-ptrcheck/tests/syscall.vgtest-disabled
new file mode 100644
index 0000000..ea58a58
--- /dev/null
+++ b/exp-ptrcheck/tests/syscall.vgtest-disabled
@@ -0,0 +1 @@
+prog: syscall
diff --git a/exp-ptrcheck/tests/tricky.c b/exp-ptrcheck/tests/tricky.c
new file mode 100644
index 0000000..1ac8ad6
--- /dev/null
+++ b/exp-ptrcheck/tests/tricky.c
@@ -0,0 +1,18 @@
+
+#include <stdlib.h>
+
+int main(void)
+{
+   // When I had n-u --> u, this gave a false positive... can happen because
+   // p+up can give n if you are (un)lucky, because the result is close enough
+   // to zero.
+   int  u[20];
+   int* p = malloc(sizeof(int) * 100);
+
+   p[0] = 0;                           // ok
+   int* n = (int*)((long)p + (long)u); // result is n, because near zero!
+   int* x = (int*)((long)n - (long)u); // x == p
+   x[0] = 0;                           // ok, originally caused false pos.
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/tricky.stderr.exp b/exp-ptrcheck/tests/tricky.stderr.exp
new file mode 100644
index 0000000..d18786f
--- /dev/null
+++ b/exp-ptrcheck/tests/tricky.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/tricky.vgtest b/exp-ptrcheck/tests/tricky.vgtest
new file mode 100644
index 0000000..3f22f2b
--- /dev/null
+++ b/exp-ptrcheck/tests/tricky.vgtest
@@ -0,0 +1 @@
+prog: tricky
diff --git a/exp-ptrcheck/tests/unaligned.c b/exp-ptrcheck/tests/unaligned.c
new file mode 100644
index 0000000..b7c08b0
--- /dev/null
+++ b/exp-ptrcheck/tests/unaligned.c
@@ -0,0 +1,51 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main ( void )
+{
+   char* x = strdup("hello");
+   char c;
+   char c0[8], c1[8], c2[8], c3[8], c4[8];
+
+   // Each of these pointers has a different alignment
+   char** p0 = (char**)&c0[0]; *p0 = x;
+   char** p1 = (char**)&c1[1]; *p1 = x;
+   char** p2 = (char**)&c2[2]; *p2 = x;
+   char** p3 = (char**)&c3[3]; *p3 = x;
+   char** p4 = (char**)&c4[4]; *p4 = x;
+
+   // These 10 are ok
+   c = (*p0)[0];
+   c = (*p1)[0];
+   c = (*p2)[0];
+   c = (*p3)[0];
+   c = (*p4)[0];
+
+   c = (*p0)[5];
+   c = (*p1)[5];
+   c = (*p2)[5];
+   c = (*p3)[5];
+   c = (*p4)[5];
+
+   // These 10 are bad
+   c = (*p0)[-1];    // always word aligned, so det
+   c = (*p1)[-1];    // undet
+   c = (*p2)[-1];    // undet
+   c = (*p3)[-1];    // undet
+   c = (*p4)[-1];    // undet on 64-bit since not 64-bit aligned
+
+   c = (*p0)[6];     // always word aligned, so det
+   c = (*p1)[6];     // undet
+   c = (*p2)[6];     // undet
+   c = (*p3)[6];     // undet
+   c = (*p4)[6];     // undet on 64-bit since not 64-bit aligned
+
+   return 0;
+}
+
+/* What this program does: verifies that (unfortunately) if you store a
+   pointer misaligned, then the associated shadow value decays to Unknown,
+   and so when you retrieve the pointer later and dereference it, you
+   get no check :-( */
diff --git a/exp-ptrcheck/tests/unaligned.stderr.exp-glibc25-amd64 b/exp-ptrcheck/tests/unaligned.stderr.exp-glibc25-amd64
new file mode 100644
index 0000000..c2c4226
--- /dev/null
+++ b/exp-ptrcheck/tests/unaligned.stderr.exp-glibc25-amd64
@@ -0,0 +1,18 @@
+
+Invalid read of size 1
+   at 0x........: main (unaligned.c:33)
+ Address 0x........ is 1 bytes before the accessing pointer's
+ legitimate range, a block of size 6 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: ...
+   by 0x........: main (unaligned.c:8)
+
+Invalid read of size 1
+   at 0x........: main (unaligned.c:39)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 6 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: ...
+   by 0x........: main (unaligned.c:8)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/unaligned.stderr.exp-glibc25-x86 b/exp-ptrcheck/tests/unaligned.stderr.exp-glibc25-x86
new file mode 100644
index 0000000..fd09f58
--- /dev/null
+++ b/exp-ptrcheck/tests/unaligned.stderr.exp-glibc25-x86
@@ -0,0 +1,34 @@
+
+Invalid read of size 1
+   at 0x........: main (unaligned.c:33)
+ Address 0x........ is 1 bytes before the accessing pointer's
+ legitimate range, a block of size 6 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: ...
+   by 0x........: main (unaligned.c:8)
+
+Invalid read of size 1
+   at 0x........: main (unaligned.c:37)
+ Address 0x........ is 1 bytes before the accessing pointer's
+ legitimate range, a block of size 6 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: ...
+   by 0x........: main (unaligned.c:8)
+
+Invalid read of size 1
+   at 0x........: main (unaligned.c:39)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 6 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: ...
+   by 0x........: main (unaligned.c:8)
+
+Invalid read of size 1
+   at 0x........: main (unaligned.c:43)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 6 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: ...
+   by 0x........: main (unaligned.c:8)
+
+ERROR SUMMARY: 4 errors from 4 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/unaligned.vgtest b/exp-ptrcheck/tests/unaligned.vgtest
new file mode 100644
index 0000000..e5076d2
--- /dev/null
+++ b/exp-ptrcheck/tests/unaligned.vgtest
@@ -0,0 +1 @@
+prog: unaligned
diff --git a/exp-ptrcheck/tests/x86/Makefile.am b/exp-ptrcheck/tests/x86/Makefile.am
new file mode 100644
index 0000000..3532e43
--- /dev/null
+++ b/exp-ptrcheck/tests/x86/Makefile.am
@@ -0,0 +1,17 @@
+
+noinst_SCRIPTS = filter_stderr
+
+check_PROGRAMS = asm lea
+
+EXTRA_DIST = $(noinst_SCRIPTS) \
+	asm.vgtest-disabled asm.stderr.exp \
+	lea.vgtest-disabled lea.stderr.exp
+
+AM_CPPFLAGS  = -I$(top_srcdir)/include
+AM_CFLAGS    = $(WERROR) @FLAG_M32@ -Winline -Wall -Wshadow -g \
+		$(FLAG_MMMX) $(FLAG_MSSE)
+AM_CXXFLAGS  = $(AM_CFLAGS)
+AM_CCASFLAGS = @FLAG_M32@
+
+asm_SOURCES = asm.S
+lea_SOURCES = lea.S
diff --git a/exp-ptrcheck/tests/x86/asm.S b/exp-ptrcheck/tests/x86/asm.S
new file mode 100644
index 0000000..372adef
--- /dev/null
+++ b/exp-ptrcheck/tests/x86/asm.S
@@ -0,0 +1,81 @@
+	.file	"asm.S"
+	.text
+.globl main
+	.type	main,@function
+main:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$8, %esp
+	andl	$-16, %esp
+	movl	$0, %eax
+	subl	%eax, %esp
+
+        /* ----------------------------------------------------------------- */
+        /* Setup                                                             */
+        /* ----------------------------------------------------------------- */	
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	subl	$12, %esp
+	pushl	$1365
+	call	malloc
+	addl	$16, %esp
+	movl	%eax, %esi
+	movl	%eax, %edi
+	addl	$4,   %edi
+
+	rdtsc
+	cpuid
+
+	bsf	%eax, %ebx
+	bsf	%esi, %ebx	// error
+	bsr	%eax, %ebx
+	bsr	%esi, %ebx	// error
+
+	sahf
+	lahf
+
+	movl	%esi, %edx
+	shldl	$1, %eax, %ebx
+	shldl	$1, %edx, %ebx	// error
+	shldl	$1, %eax, %edx	// error
+
+	movl	%esi, %edx
+	shrdl	$1, %eax, %ebx
+	shrdl	$1, %edx, %ebx	// error
+	shrdl	$1, %eax, %edx	// error
+
+	movw	%si, %dx
+	shldw	$1, %ax, %bx
+	shldw	$1, %ax, %si
+	shldw	$1, %si, %bx
+
+	movw	%si, %dx
+	shrdw	$1, %ax, %bx
+	shrdw	$1, %ax, %si
+	shrdw	$1, %si, %bx
+
+        fstsw	%ax
+	fstsw	(%esp)		// ok (I get seg faults if I use %esi...)
+
+	movl	$1, %ecx
+//        movsd			// for get_dirflag
+
+        /* ----------------------------------------------------------------- */
+        /* End                                                               */
+        /* ----------------------------------------------------------------- */
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+
+	movl	$0, %eax
+	xorw	%ax, %ax
+	leave
+	ret
+.Lfe1:
+	.size	main,.Lfe1-main
+        .comm   foo,24,4
+	.ident	"GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"
+
+
diff --git a/exp-ptrcheck/tests/x86/asm.stderr.exp b/exp-ptrcheck/tests/x86/asm.stderr.exp
new file mode 100644
index 0000000..d18786f
--- /dev/null
+++ b/exp-ptrcheck/tests/x86/asm.stderr.exp
@@ -0,0 +1,3 @@
+
+
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/x86/asm.vgtest-disabled b/exp-ptrcheck/tests/x86/asm.vgtest-disabled
new file mode 100644
index 0000000..34c2a22
--- /dev/null
+++ b/exp-ptrcheck/tests/x86/asm.vgtest-disabled
@@ -0,0 +1 @@
+prog: asm
diff --git a/exp-ptrcheck/tests/x86/filter_stderr b/exp-ptrcheck/tests/x86/filter_stderr
new file mode 100755
index 0000000..0ae9313
--- /dev/null
+++ b/exp-ptrcheck/tests/x86/filter_stderr
@@ -0,0 +1,3 @@
+#! /bin/sh
+
+../filter_stderr
diff --git a/exp-ptrcheck/tests/x86/lea.S b/exp-ptrcheck/tests/x86/lea.S
new file mode 100644
index 0000000..2d6e4f8
--- /dev/null
+++ b/exp-ptrcheck/tests/x86/lea.S
@@ -0,0 +1,85 @@
+	.file	"lea.S"
+	.text
+.globl main
+	.type	main,@function
+main:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$8, %esp
+	andl	$-16, %esp
+	movl	$0, %eax
+	subl	%eax, %esp
+
+        /* ----------------------------------------------------------------- */
+        /* Setup                                                             */
+        /* ----------------------------------------------------------------- */	
+	pushl	%esi
+
+	subl	$12, %esp
+	pushl	$1365
+	call	malloc
+	addl	$16, %esp
+	movl	%eax, %esi
+
+	movl    $foo, %ecx              /* %ecx = foo */
+
+        xorl    %edx, %edx              /* %edx = 0 */
+
+        /* ----------------------------------------------------------------- */
+        /* Start                                                             */
+        /* ----------------------------------------------------------------- */
+        /* At one point, for LEA1 result I was just using t1.vseg.  This
+         * is too simplistic, because sometimes the offset can be a static data
+         * pointer, so if t1 is a known non-ptr, the result should not be a
+         * non-ptr, but unknown.  Before I fixed it, the following instruction
+         * would have caused this warning:
+         *
+         *   ==5234== Invalid read
+         *   ==5234==    at 0x8048306: main (lea.S:21)
+         *   ==5234==    by 0x8048264: ??? (start.S:81)
+         *   ==5234== Address 0x80494CC is not derived from any heap block
+         */
+        movl    foo(%edx), %eax         /* ?(n) case */
+
+        /* Do a similar check with LEA2 */
+        movl    foo(%edx,%edx,1), %eax
+
+        /* ok -------------------------------------------------------------- */
+        movl    0(%ecx,%edx,1), %eax	// ok
+        movl    0(%edx,%ecx,1), %eax	// ok
+        movl    0(%esi,%edx,1), %eax	// ok
+        movl    0(%edx,%esi,1), %eax	// ok
+
+        movl    0(%ecx,%edx,2), %eax	// ok
+        movl    0(%esi,%edx,2), %eax	// ok
+
+        /* bad ------------------------------------------------------------- */
+        movl    -1(%ecx,%edx,1), %eax	// undet
+        movl    -1(%edx,%ecx,1), %eax	// undet
+        movl    -1(%esi,%edx,1), %eax	// det
+        movl    -1(%edx,%esi,1), %eax	// det
+
+        movl    -1(%ecx,%edx,2), %eax	// undet
+        movl    -1(%esi,%edx,2), %eax	// det
+
+	/* bad (type error) ------------------------------------------------ */
+	leal    0(%esi,%esi,1),	%eax	// adding two pointers
+
+        /* ----------------------------------------------------------------- */
+        /* Scale known pointer by 2.  */
+        leal    0(%edx,%esi,2), %eax  	/* result is pointer */
+
+        /* ----------------------------------------------------------------- */
+        /* End                                                               */
+        /* ----------------------------------------------------------------- */
+	popl	%esi
+
+	movl	$0, %eax
+	leave
+	ret
+.Lfe1:
+	.size	main,.Lfe1-main
+        .comm   foo,24,4
+	.ident	"GCC: (GNU) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)"
+
+
diff --git a/exp-ptrcheck/tests/x86/lea.stderr.exp b/exp-ptrcheck/tests/x86/lea.stderr.exp
new file mode 100644
index 0000000..944ac2b
--- /dev/null
+++ b/exp-ptrcheck/tests/x86/lea.stderr.exp
@@ -0,0 +1,47 @@
+
+Invalid read of size 4
+   at 0x........: main (lea.S:59)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is 1 bytes before the accessing pointer's
+ legitimate range, the 1365-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (lea.S:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+Invalid read of size 4
+   at 0x........: main (lea.S:60)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is 1 bytes before the accessing pointer's
+ legitimate range, the 1365-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (lea.S:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+Invalid read of size 4
+   at 0x........: main (lea.S:63)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Address 0x........ is 1 bytes before the accessing pointer's
+ legitimate range, the 1365-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (lea.S:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+Invalid LEA2
+   at 0x........: main (lea.S:66)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+Both args derived from address 0x........ of 1365-byte block alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (lea.S:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+
+Scaling known pointer by value > 1 in lea instruction
+
+ERROR SUMMARY: 4 errors from 4 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/x86/lea.vgtest-disabled b/exp-ptrcheck/tests/x86/lea.vgtest-disabled
new file mode 100644
index 0000000..e47ec32
--- /dev/null
+++ b/exp-ptrcheck/tests/x86/lea.vgtest-disabled
@@ -0,0 +1 @@
+prog: lea
diff --git a/exp-ptrcheck/tests/xor.c b/exp-ptrcheck/tests/xor.c
new file mode 100644
index 0000000..7e3bade
--- /dev/null
+++ b/exp-ptrcheck/tests/xor.c
@@ -0,0 +1,40 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <assert.h>
+
+#include "arith_include1.c"
+
+int main(void)
+{
+   #include "arith_include2.c"
+   
+   // XOR =========================================================
+   g(^, n,  n,  n);  // det, det
+
+   g(^, n,  p,  u);  // ok, undet
+
+   g(^, p,  n,  u);  // ok, undet
+
+   g(^, p,  p,  n);  // det, det
+
+   g(^, n,  un, u);  // undet, undet
+   g(^, n,  up, u);  // ok, undet
+
+   g(^, un, n,  u);  // undet, undet
+   g(^, up, n,  u);  // ok, undet
+
+   g(^, un, un, n);  // det, det (range)
+   g(^, un, up, u);  // ok, undet
+   g(^, up, un, u);  // ok, undet
+   g(^, up, up, n);  // det, det
+
+   g(^, un, p,  u);  // ok, undet
+   g(^, up, p,  u);  // undet, undet
+
+   g(^, p,  un, u);  // ok, undet
+   g(^, p,  up, u);  // undet, undet
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/xor.stderr.exp b/exp-ptrcheck/tests/xor.stderr.exp
new file mode 100644
index 0000000..e69457f
--- /dev/null
+++ b/exp-ptrcheck/tests/xor.stderr.exp
@@ -0,0 +1,82 @@
+
+about to do 14 [0]
+Invalid read of size 4
+   at 0x........: main (xor.c:14)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+ Address 0x........ is not derived from any known block
+about to do 14 [-1]
+
+Invalid read of size 4
+   at 0x........: main (xor.c:14)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+ Address 0x........ is not derived from any known block
+about to do 16 [0]
+about to do 16 [-1]
+about to do 18 [0]
+about to do 18 [-1]
+about to do 20 [0]
+
+Invalid read of size 4
+   at 0x........: main (xor.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+ Address 0x........ is not derived from any known block
+about to do 20 [-1]
+
+Invalid read of size 4
+   at 0x........: main (xor.c:20)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+ Address 0x........ is not derived from any known block
+about to do 22 [0]
+about to do 22 [-1]
+about to do 23 [0]
+about to do 23 [-1]
+about to do 25 [0]
+about to do 25 [-1]
+about to do 26 [0]
+about to do 26 [-1]
+about to do 28 [0]
+
+Invalid read of size 4
+   at 0x........: main (xor.c:28)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+ Address 0x........ is not derived from any known block
+about to do 28 [-1]
+
+Invalid read of size 4
+   at 0x........: main (xor.c:28)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+ Address 0x........ is not derived from any known block
+about to do 29 [0]
+about to do 29 [-1]
+about to do 30 [0]
+about to do 30 [-1]
+about to do 31 [0]
+
+Invalid read of size 4
+   at 0x........: main (xor.c:31)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+ Address 0x........ is not derived from any known block
+about to do 31 [-1]
+
+Invalid read of size 4
+   at 0x........: main (xor.c:31)
+   by 0x........: __libc_start_main (...libc...)
+   by 0x........: ...
+ Address 0x........ is not derived from any known block
+about to do 33 [0]
+about to do 33 [-1]
+about to do 34 [0]
+about to do 34 [-1]
+about to do 36 [0]
+about to do 36 [-1]
+about to do 37 [0]
+about to do 37 [-1]
+
+ERROR SUMMARY: 8 errors from 8 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/xor.vgtest-disabled b/exp-ptrcheck/tests/xor.vgtest-disabled
new file mode 100644
index 0000000..8d60202
--- /dev/null
+++ b/exp-ptrcheck/tests/xor.vgtest-disabled
@@ -0,0 +1,2 @@
+prog: xor
+stderr_filter: filter_add
diff --git a/exp-ptrcheck/tests/zero.c b/exp-ptrcheck/tests/zero.c
new file mode 100644
index 0000000..adea4d6
--- /dev/null
+++ b/exp-ptrcheck/tests/zero.c
@@ -0,0 +1,24 @@
+
+
+#include <stdlib.h>
+#include <assert.h>
+
+int main ( void )
+{
+   char c, *c0 = malloc(0), *c1;
+
+   c = *c0;    // bad
+
+   c0 = realloc(c0, 10);
+   assert(c0);
+
+   c = *c0;    // ok
+
+   c1 = c0;
+   c0 = realloc(c0, 0);
+   assert(!c0);
+
+   c = *c1;    // bad, dangling
+
+   return 0;
+}
diff --git a/exp-ptrcheck/tests/zero.stderr.exp b/exp-ptrcheck/tests/zero.stderr.exp
new file mode 100644
index 0000000..f4509e1
--- /dev/null
+++ b/exp-ptrcheck/tests/zero.stderr.exp
@@ -0,0 +1,17 @@
+
+Invalid read of size 1
+   at 0x........: main (zero.c:10)
+ Address 0x........ is 0 bytes after the accessing pointer's
+ legitimate range, a block of size 0 alloc'd
+   at 0x........: malloc (vg_replace_malloc.c:...)
+   by 0x........: main (zero.c:8)
+
+Invalid read of size 1
+   at 0x........: main (zero.c:21)
+ Address 0x........ is 0 bytes inside the accessing pointer's
+ once-legitimate range, a block of size 10 free'd
+   at 0x........: free (vg_replace_malloc.c:...)
+   by 0x........: realloc (vg_replace_malloc.c:...)
+   by 0x........: main (zero.c:18)
+
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
diff --git a/exp-ptrcheck/tests/zero.vgtest b/exp-ptrcheck/tests/zero.vgtest
new file mode 100644
index 0000000..d5af1b4
--- /dev/null
+++ b/exp-ptrcheck/tests/zero.vgtest
@@ -0,0 +1 @@
+prog: zero