Merge all remaining changes from branches/PTRCHECK.  These are some
relatively minor extensions to m_debuginfo, a major overhaul of
m_debuginfo/readdwarf3.c to get its space usage under control, and
changes throughout the system to enable heap-use profiling.

The majority of the merged changes were committed into
branches/PTRCHECK as the following revs: 8591 8595 8598 8599 8601 and
8161.



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@8621 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/Makefile.am b/Makefile.am
index 0542488..0707f0a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -12,7 +12,8 @@
 		helgrind \
 		drd
 
-EXP_TOOLS = 	exp-omega
+EXP_TOOLS = 	exp-omega \
+		exp-ptrcheck
 
 # Put docs last because building the HTML is slow and we want to get
 # everything else working before we try it.
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index ac4771b..a30ba75 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -196,7 +196,7 @@
       return *s_ptr;
    } else {
       Char** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(Char*));
-      *s_node = VG_(strdup)(s);
+      *s_node = VG_(strdup)("cg.main.gps.1", s);
       VG_(OSetGen_Insert)(stringTable, s_node);
       return *s_node;
    }
@@ -1762,15 +1762,18 @@
    CC_table =
       VG_(OSetGen_Create)(offsetof(LineCC, loc),
                           cmp_CodeLoc_LineCC,
-                          VG_(malloc), VG_(free));
+                          VG_(malloc), "cg.main.cpci.1",
+                          VG_(free));
    instrInfoTable =
       VG_(OSetGen_Create)(/*keyOff*/0,
                           NULL,
-                          VG_(malloc), VG_(free));
+                          VG_(malloc), "cg.main.cpci.2",
+                          VG_(free));
    stringTable =
       VG_(OSetGen_Create)(/*keyOff*/0,
                           stringCmp,
-                          VG_(malloc), VG_(free));
+                          VG_(malloc), "cg.main.cpci.3",
+                          VG_(free));
 
    configure_caches(&I1c, &D1c, &L2c);
 
diff --git a/cachegrind/cg_sim.c b/cachegrind/cg_sim.c
index ddf50b4..6edf126 100644
--- a/cachegrind/cg_sim.c
+++ b/cachegrind/cg_sim.c
@@ -74,7 +74,8 @@
                                  c->size, c->line_size, c->assoc);
    }
 
-   c->tags = VG_(malloc)(sizeof(UWord) * c->sets * c->assoc);
+   c->tags = VG_(malloc)("cg.sim.ci.1",
+                         sizeof(UWord) * c->sets * c->assoc);
 
    for (i = 0; i < c->sets * c->assoc; i++)
       c->tags[i] = 0;
diff --git a/callgrind/bb.c b/callgrind/bb.c
index 0ccc63f..7939cae 100644
--- a/callgrind/bb.c
+++ b/callgrind/bb.c
@@ -41,7 +41,8 @@
 
    bbs.size    = 8437;
    bbs.entries = 0;
-   bbs.table = (BB**) CLG_MALLOC(bbs.size * sizeof(BB*));
+   bbs.table = (BB**) CLG_MALLOC("cl.bb.ibh.1",
+                                 bbs.size * sizeof(BB*));
 
    for (i = 0; i < bbs.size; i++) bbs.table[i] = NULL;
 }
@@ -70,7 +71,8 @@
     UInt new_idx;
 
     new_size  = 2* bbs.size +3;
-    new_table = (BB**) CLG_MALLOC(new_size * sizeof(BB*));
+    new_table = (BB**) CLG_MALLOC("cl.bb.rbt.1",
+                                  new_size * sizeof(BB*));
  
     if (!new_table) return;
  
@@ -129,7 +131,7 @@
 
    size = sizeof(BB) + instr_count * sizeof(InstrInfo)
                      + (cjmp_count+1) * sizeof(CJmpInfo);
-   new = (BB*) CLG_MALLOC(size);
+   new = (BB*) CLG_MALLOC("cl.bb.nb.1", size);
    VG_(memset)(new, 0, size);
 
    new->obj        = obj;
diff --git a/callgrind/bbcc.c b/callgrind/bbcc.c
index c564266..554c1ed 100644
--- a/callgrind/bbcc.c
+++ b/callgrind/bbcc.c
@@ -48,7 +48,8 @@
 
    bbccs->size    = N_BBCC_INITIAL_ENTRIES;
    bbccs->entries = 0;
-   bbccs->table = (BBCC**) CLG_MALLOC(bbccs->size * sizeof(BBCC*));
+   bbccs->table = (BBCC**) CLG_MALLOC("cl.bbcc.ibh.1",
+                                      bbccs->size * sizeof(BBCC*));
 
    for (i = 0; i < bbccs->size; i++) bbccs->table[i] = NULL;
 }
@@ -197,7 +198,8 @@
     BBCC *curr_BBCC, *next_BBCC;
 
     new_size = 2*current_bbccs.size+3;
-    new_table = (BBCC**) CLG_MALLOC(new_size * sizeof(BBCC*));
+    new_table = (BBCC**) CLG_MALLOC("cl.bbcc.rbh.1",
+                                    new_size * sizeof(BBCC*));
  
     if (!new_table) return;
  
@@ -246,7 +248,7 @@
     BBCC** bbccs;
     int i;
     
-    bbccs = (BBCC**) CLG_MALLOC(sizeof(BBCC*) * size);
+    bbccs = (BBCC**) CLG_MALLOC("cl.bbcc.nr.1", sizeof(BBCC*) * size);
     for(i=0;i<size;i++)
 	bbccs[i] = 0;
 
@@ -271,7 +273,8 @@
    /* We need cjmp_count+1 JmpData structs:
     * the last is for the unconditional jump/call/ret at end of BB
     */
-   new = (BBCC*)CLG_MALLOC(sizeof(BBCC) +
+   new = (BBCC*)CLG_MALLOC("cl.bbcc.nb.1",
+                           sizeof(BBCC) +
 			   (bb->cjmp_count+1) * sizeof(JmpData));
    new->bb  = bb;
    new->tid = CLG_(current_tid);
diff --git a/callgrind/callstack.c b/callgrind/callstack.c
index 89f5cea..d89b53e 100644
--- a/callgrind/callstack.c
+++ b/callgrind/callstack.c
@@ -52,7 +52,8 @@
   CLG_ASSERT(s != 0);
 
   s->size = N_CALL_STACK_INITIAL_ENTRIES;   
-  s->entry = (call_entry*) CLG_MALLOC(s->size * sizeof(call_entry));
+  s->entry = (call_entry*) CLG_MALLOC("cl.callstack.ics.1",
+                                      s->size * sizeof(call_entry));
   s->sp = 0;
   s->entry[0].cxt = 0; /* for assertion in push_cxt() */
 
@@ -96,7 +97,8 @@
   cs->size *= 2;
   while (i > cs->size) cs->size *= 2;
 
-  cs->entry = (call_entry*) VG_(realloc)(cs->entry,
+  cs->entry = (call_entry*) VG_(realloc)("cl.callstack.ess.1",
+                                         cs->entry,
 					 cs->size * sizeof(call_entry));
 
   for(i=oldsize; i<cs->size; i++)
diff --git a/callgrind/clo.c b/callgrind/clo.c
index 5e5972b..1d21a65 100644
--- a/callgrind/clo.c
+++ b/callgrind/clo.c
@@ -98,7 +98,8 @@
 static __inline__ 
 fn_config* new_fnc(void)
 {
-   fn_config* new = (fn_config*) CLG_MALLOC(sizeof(fn_config));
+   fn_config* new = (fn_config*) CLG_MALLOC("cl.clo.nf.1",
+                                            sizeof(fn_config));
 
    new->dump_before  = CONFIG_DEFAULT;
    new->dump_after   = CONFIG_DEFAULT;
@@ -121,7 +122,8 @@
 static config_node* new_config(Char* name, int length)
 {
     int i;
-    config_node* node = (config_node*) CLG_MALLOC(sizeof(config_node) + length);
+    config_node* node = (config_node*) CLG_MALLOC("cl.clo.nc.1",
+                                                  sizeof(config_node) + length);
 
     for(i=0;i<length;i++) {
       if (name[i] == 0) break;
@@ -588,7 +590,7 @@
    }
 
    else if (0 == VG_(strncmp)(arg, "--callgrind-out-file=", 21))
-       CLG_(clo).out_format = VG_(strdup)(arg+21);
+       CLG_(clo).out_format = VG_(strdup)("cl.clo.pclo.1", arg+21);
 
    else if (0 == VG_(strcmp)(arg, "--mangle-names=yes"))
        CLG_(clo).mangle_names = True;
diff --git a/callgrind/command.c b/callgrind/command.c
index c453cba..ba9bbd3 100644
--- a/callgrind/command.c
+++ b/callgrind/command.c
@@ -67,7 +67,7 @@
 
   /* name of command file */
   size = VG_(strlen)(dir) + VG_(strlen)(DEFAULT_COMMANDNAME) +10;
-  command_file = (char*) CLG_MALLOC(size);
+  command_file = (char*) CLG_MALLOC("cl.command.sc.1", size);
   CLG_ASSERT(command_file != 0);
   VG_(sprintf)(command_file, "%s/%s.%d",
 	       dir, DEFAULT_COMMANDNAME, thisPID);
@@ -76,13 +76,13 @@
    * KCachegrind releases, as it doesn't use ".pid" to distinguish
    * different callgrind instances from same base directory.
    */
-  command_file2 = (char*) CLG_MALLOC(size);
+  command_file2 = (char*) CLG_MALLOC("cl.command.sc.2", size);
   CLG_ASSERT(command_file2 != 0);
   VG_(sprintf)(command_file2, "%s/%s",
 	       dir, DEFAULT_COMMANDNAME);
 
   size = VG_(strlen)(dir) + VG_(strlen)(DEFAULT_RESULTNAME) +10;
-  result_file = (char*) CLG_MALLOC(size);
+  result_file = (char*) CLG_MALLOC("cl.command.sc.3", size);
   CLG_ASSERT(result_file != 0);
   VG_(sprintf)(result_file, "%s/%s.%d",
 	       dir, DEFAULT_RESULTNAME, thisPID);
@@ -90,12 +90,13 @@
   /* If we get a command from a command file without .pid, use
    * a result file without .pid suffix
    */
-  result_file2 = (char*) CLG_MALLOC(size);
+  result_file2 = (char*) CLG_MALLOC("cl.command.sc.4", size);
   CLG_ASSERT(result_file2 != 0);
   VG_(sprintf)(result_file2, "%s/%s",
                dir, DEFAULT_RESULTNAME);
 
-  info_file = (char*) CLG_MALLOC(VG_(strlen)(DEFAULT_INFONAME) + 10);
+  info_file = (char*) CLG_MALLOC("cl.command.sc.5",
+                                 VG_(strlen)(DEFAULT_INFONAME) + 10);
   CLG_ASSERT(info_file != 0);
   VG_(sprintf)(info_file, "%s.%d", DEFAULT_INFONAME, thisPID);
 
diff --git a/callgrind/context.c b/callgrind/context.c
index 2ff0fcd..0616c67 100644
--- a/callgrind/context.c
+++ b/callgrind/context.c
@@ -43,7 +43,8 @@
   CLG_ASSERT(s != 0);
 
   s->size   = N_FNSTACK_INITIAL_ENTRIES;   
-  s->bottom = (fn_node**) CLG_MALLOC(s->size * sizeof(fn_node*));
+  s->bottom = (fn_node**) CLG_MALLOC("cl.context.ifs.1",
+                                     s->size * sizeof(fn_node*));
   s->top    = s->bottom;
   s->bottom[0] = 0;
 }
@@ -74,7 +75,8 @@
    
    cxts.size    = N_CXT_INITIAL_ENTRIES;
    cxts.entries = 0;
-   cxts.table   = (Context**) CLG_MALLOC(cxts.size * sizeof(Context*));
+   cxts.table   = (Context**) CLG_MALLOC("cl.context.ict.1",
+                                         cxts.size * sizeof(Context*));
 
    for (i = 0; i < cxts.size; i++)
      cxts.table[i] = 0;
@@ -93,7 +95,8 @@
     UInt new_idx;
 
     new_size  = 2* cxts.size +3;
-    new_table = (Context**) CLG_MALLOC(new_size * sizeof(Context*));
+    new_table = (Context**) CLG_MALLOC("cl.context.rct.1",
+                                       new_size * sizeof(Context*));
 
     if (!new_table) return;
 
@@ -190,7 +193,8 @@
     if (10 * cxts.entries / cxts.size > 8)
         resize_cxt_table();
 
-    new = (Context*) CLG_MALLOC(sizeof(Context)+sizeof(fn_node*)*size);
+    new = (Context*) CLG_MALLOC("cl.context.nc.1",
+                                sizeof(Context)+sizeof(fn_node*)*size);
 
     // hash value calculation similar to cxt_hash_val(), but additionally
     // copying function pointers in one run
@@ -298,7 +302,8 @@
   fn_entries = CLG_(current_fn_stack).top - CLG_(current_fn_stack).bottom;
   if (fn_entries == CLG_(current_fn_stack).size-1) {
     int new_size = CLG_(current_fn_stack).size *2;
-    fn_node** new = (fn_node**) CLG_MALLOC(new_size * sizeof(fn_node*));
+    fn_node** new = (fn_node**) CLG_MALLOC("cl.context.pc.1",
+                                           new_size * sizeof(fn_node*));
     int i;
     for(i=0;i<CLG_(current_fn_stack).size;i++)
       new[i] = CLG_(current_fn_stack).bottom[i];
diff --git a/callgrind/costs.c b/callgrind/costs.c
index fb13280..63e1e29 100644
--- a/callgrind/costs.c
+++ b/callgrind/costs.c
@@ -43,7 +43,8 @@
 
   if (!cost_chunk_current ||
       (cost_chunk_current->size - cost_chunk_current->used < size)) {
-    CostChunk* cc  = (CostChunk*) CLG_MALLOC(sizeof(CostChunk) +
+    CostChunk* cc  = (CostChunk*) CLG_MALLOC("cl.costs.gc.1",
+                                              sizeof(CostChunk) +
 					      COSTCHUNK_SIZE * sizeof(ULong));
     cc->size = COSTCHUNK_SIZE;
     cc->used = 0;
diff --git a/callgrind/debug.c b/callgrind/debug.c
index 43854b7..f9bb820 100644
--- a/callgrind/debug.c
+++ b/callgrind/debug.c
@@ -429,10 +429,10 @@
   VG_(printf)("\n");
 }
 
-void* CLG_(malloc)(UWord s, char* f)
+void* CLG_(malloc)(HChar* cc, UWord s, char* f)
 {
     CLG_DEBUG(3, "Malloc(%lu) in %s.\n", s, f);
-    return VG_(malloc)(s);
+    return VG_(malloc)(cc,s);
 }
 
 #else /* CLG_ENABLE_DEBUG */
diff --git a/callgrind/dump.c b/callgrind/dump.c
index 3a91972..6fda459 100644
--- a/callgrind/dump.c
+++ b/callgrind/dump.c
@@ -105,7 +105,8 @@
       CLG_(stat).distinct_fns +
       CLG_(stat).context_counter;
     CLG_ASSERT(dump_array == 0);
-    dump_array = (Bool*) CLG_MALLOC(dump_array_size * sizeof(Bool));
+    dump_array = (Bool*) CLG_MALLOC("cl.dump.ida.1",
+                                    dump_array_size * sizeof(Bool));
     obj_dumped  = dump_array;
     file_dumped = obj_dumped + CLG_(stat).distinct_objs;
     fn_dumped   = file_dumped + CLG_(stat).distinct_files;
@@ -1218,7 +1219,8 @@
 
     /* allocate bbcc array, insert BBCCs and sort */
     prepare_ptr = array =
-      (BBCC**) CLG_MALLOC((prepare_count+1) * sizeof(BBCC*));    
+      (BBCC**) CLG_MALLOC("cl.dump.pd.1",
+                          (prepare_count+1) * sizeof(BBCC*));    
 
     CLG_(forall_bbccs)(hash_addPtr);
 
@@ -1693,12 +1695,13 @@
        i++;
    }
    i = lastSlash;
-   out_directory = (Char*) CLG_MALLOC(i+1);
+   out_directory = (Char*) CLG_MALLOC("cl.dump.init_dumps.1", i+1);
    VG_(strncpy)(out_directory, out_file, i);
    out_directory[i] = 0;
 
    /* allocate space big enough for final filenames */
-   filename = (Char*) CLG_MALLOC(VG_(strlen)(out_file)+32);
+   filename = (Char*) CLG_MALLOC("cl.dump.init_dumps.2",
+                                 VG_(strlen)(out_file)+32);
    CLG_ASSERT(filename != 0);
        
    /* Make sure the output base file can be written.
diff --git a/callgrind/events.c b/callgrind/events.c
index 0b62b4f..3f6abbb 100644
--- a/callgrind/events.c
+++ b/callgrind/events.c
@@ -46,7 +46,7 @@
 
   et = &(eventtype[eventtype_count]);
   et->id = eventtype_count; 
-  et->name = (UChar*) VG_(strdup)(name);
+  et->name = (UChar*) VG_(strdup)("cl.events.re.1", name);
   et->description = 0;
 
   eventtype_count++;
@@ -77,7 +77,8 @@
 {
   EventSet* es;
 
-  es = (EventSet*) CLG_MALLOC(sizeof(EventSet) +
+  es = (EventSet*) CLG_MALLOC("cl.events.geSet.1",
+                               sizeof(EventSet) +
 			       capacity * sizeof(EventSetEntry));
   es->capacity = capacity;
   es->size = 0;
@@ -499,7 +500,8 @@
 
   CLG_ASSERT(es != 0);
 
-  em = (EventMapping*) CLG_MALLOC(sizeof(EventMapping) +
+  em = (EventMapping*) CLG_MALLOC("cl.events.geMapping.1",
+                                   sizeof(EventMapping) +
 				   es->capacity * sizeof(Int));
   em->capacity = es->capacity;
   em->size = 0;
diff --git a/callgrind/fn.c b/callgrind/fn.c
index 66389b0..74f9188 100644
--- a/callgrind/fn.c
+++ b/callgrind/fn.c
@@ -186,8 +186,8 @@
    Int i;
    obj_node* new;
 
-   new = (obj_node*) CLG_MALLOC(sizeof(obj_node));
-   new->name  = di ? VG_(strdup)( VG_(seginfo_filename)(di) )
+   new = (obj_node*) CLG_MALLOC("cl.fn.non.1", sizeof(obj_node));
+   new->name  = di ? VG_(strdup)( "cl.fn.non.2",VG_(seginfo_filename)(di) )
                      : anonymous_obj;
    for (i = 0; i < N_FILE_ENTRIES; i++) {
       new->files[i] = NULL;
@@ -244,8 +244,9 @@
 			 obj_node* obj, file_node* next)
 {
   Int i;
-  file_node* new = (file_node*) CLG_MALLOC(sizeof(file_node));
-  new->name  = VG_(strdup)(filename);
+  file_node* new = (file_node*) CLG_MALLOC("cl.fn.nfn.1",
+                                           sizeof(file_node));
+  new->name  = VG_(strdup)("cl.fn.nfn.2", filename);
   for (i = 0; i < N_FN_ENTRIES; i++) {
     new->fns[i] = NULL;
   }
@@ -286,8 +287,9 @@
 fn_node* new_fn_node(Char fnname[FILENAME_LEN],
 		     file_node* file, fn_node* next)
 {
-    fn_node* new = (fn_node*) CLG_MALLOC(sizeof(fn_node));
-    new->name = VG_(strdup)(fnname);
+    fn_node* new = (fn_node*) CLG_MALLOC("cl.fn.nfnnd.1",
+                                         sizeof(fn_node));
+    new->name = VG_(strdup)("cl.fn.nfnnd.2", fnname);
 
     CLG_(stat).distinct_fns++;
     new->number   = CLG_(stat).distinct_fns;
@@ -574,7 +576,8 @@
   if (a->size <= CLG_(stat).distinct_fns)
     a->size = CLG_(stat).distinct_fns+1;
   
-  a->array = (UInt*) CLG_MALLOC(a->size * sizeof(UInt));
+  a->array = (UInt*) CLG_MALLOC("cl.fn.gfe.1",
+                                a->size * sizeof(UInt));
   for(i=0;i<a->size;i++)
     a->array[i] = 0;
 }
@@ -617,7 +620,7 @@
     CLG_DEBUG(0, "Resize fn_active_array: %d => %d\n",
 	     current_fn_active.size, newsize);
 
-    new = (UInt*) CLG_MALLOC(newsize * sizeof(UInt));
+    new = (UInt*) CLG_MALLOC("cl.fn.rfa.1", newsize * sizeof(UInt));
     for(i=0;i<current_fn_active.size;i++)
       new[i] = current_fn_active.array[i];
     while(i<newsize)
diff --git a/callgrind/global.h b/callgrind/global.h
index 90f151e..b6a0334 100644
--- a/callgrind/global.h
+++ b/callgrind/global.h
@@ -868,14 +868,14 @@
 void CLG_(print_addr)(Addr addr);
 void CLG_(print_addr_ln)(Addr addr);
 
-void* CLG_(malloc)(UWord s, char* f);
+void* CLG_(malloc)(HChar* cc, UWord s, char* f);
 void* CLG_(free)(void* p, char* f);
 #if 0
-#define CLG_MALLOC(x) CLG_(malloc)(x,__FUNCTION__)
-#define CLG_FREE(p)   CLG_(free)(p,__FUNCTION__)
+#define CLG_MALLOC(_cc,x) CLG_(malloc)((_cc),x,__FUNCTION__)
+#define CLG_FREE(p)       CLG_(free)(p,__FUNCTION__)
 #else
-#define CLG_MALLOC(x) VG_(malloc)(x)
-#define CLG_FREE(p)   VG_(free)(p)
+#define CLG_MALLOC(_cc,x) VG_(malloc)((_cc),x)
+#define CLG_FREE(p)       VG_(free)(p)
 #endif
 
 #endif /* CLG_GLOBAL */
diff --git a/callgrind/jumps.c b/callgrind/jumps.c
index 1549c4f..425e22a 100644
--- a/callgrind/jumps.c
+++ b/callgrind/jumps.c
@@ -46,7 +46,8 @@
 
    jccs->size    = N_JCC_INITIAL_ENTRIES;
    jccs->entries = 0;
-   jccs->table = (jCC**) CLG_MALLOC(jccs->size * sizeof(jCC*));
+   jccs->table = (jCC**) CLG_MALLOC("cl.jumps.ijh.1",
+                                    jccs->size * sizeof(jCC*));
    jccs->spontaneous = 0;
 
    for (i = 0; i < jccs->size; i++)
@@ -89,7 +90,8 @@
     jCC *curr_jcc, *next_jcc;
 
     new_size  = 2* current_jccs.size +3;
-    new_table = (jCC**) CLG_MALLOC(new_size * sizeof(jCC*));
+    new_table = (jCC**) CLG_MALLOC("cl.jumps.rjt.1",
+                                   new_size * sizeof(jCC*));
  
     if (!new_table) return;
  
@@ -145,7 +147,7 @@
    if (10 * current_jccs.entries / current_jccs.size > 8)
        resize_jcc_table();
 
-   new = (jCC*) CLG_MALLOC(sizeof(jCC));
+   new = (jCC*) CLG_MALLOC("cl.jumps.nj.1", sizeof(jCC));
 
    new->from      = from;
    new->jmp       = jmp;
diff --git a/callgrind/sim.c b/callgrind/sim.c
index 3d9ae6c..506ed9e 100644
--- a/callgrind/sim.c
+++ b/callgrind/sim.c
@@ -214,7 +214,8 @@
 		   c->sectored ? ", sectored":"");
    }
 
-   c->tags = (UWord*) CLG_MALLOC(sizeof(UWord) * c->sets * c->assoc);
+   c->tags = (UWord*) CLG_MALLOC("cl.sim.cs_ic.1",
+                                 sizeof(UWord) * c->sets * c->assoc);
    if (clo_collect_cacheuse)
        cacheuse_initcache(c);
    else
@@ -611,12 +612,15 @@
     unsigned int start_mask, start_val;
     unsigned int end_mask, end_val;
 
-    c->use    = CLG_MALLOC(sizeof(line_use) * c->sets * c->assoc);
-    c->loaded = CLG_MALLOC(sizeof(line_loaded) * c->sets * c->assoc);
-    c->line_start_mask = CLG_MALLOC(sizeof(int) * c->line_size);
-    c->line_end_mask = CLG_MALLOC(sizeof(int) * c->line_size);
+    c->use    = CLG_MALLOC("cl.sim.cu_ic.1",
+                           sizeof(line_use) * c->sets * c->assoc);
+    c->loaded = CLG_MALLOC("cl.sim.cu_ic.2",
+                           sizeof(line_loaded) * c->sets * c->assoc);
+    c->line_start_mask = CLG_MALLOC("cl.sim.cu_ic.3",
+                                    sizeof(int) * c->line_size);
+    c->line_end_mask = CLG_MALLOC("cl.sim.cu_ic.4",
+                                  sizeof(int) * c->line_size);
     
-
     c->line_size_mask = c->line_size-1;
 
     /* Meaning of line_start_mask/line_end_mask
@@ -1614,7 +1618,7 @@
 {
    int   i1, i2, i3;
    int   i;
-   char *opt = VG_(strdup)(orig_opt);
+   char *opt = VG_(strdup)("cl.sim.po.1", orig_opt);
 
    i = i1 = opt_len;
 
diff --git a/callgrind/threads.c b/callgrind/threads.c
index 95b5908..779fe41 100644
--- a/callgrind/threads.c
+++ b/callgrind/threads.c
@@ -100,7 +100,8 @@
 {
     thread_info* t;
 
-    t = (thread_info*) CLG_MALLOC(sizeof(thread_info));
+    t = (thread_info*) CLG_MALLOC("cl.threads.nt.1",
+                                  sizeof(thread_info));
 
     /* init state */
     CLG_(init_exec_stack)( &(t->states) );
@@ -323,7 +324,8 @@
 static exec_state* new_exec_state(Int sigNum)
 {
     exec_state* es;
-    es = (exec_state*) CLG_MALLOC(sizeof(exec_state));
+    es = (exec_state*) CLG_MALLOC("cl.threads.nes.1",
+                                  sizeof(exec_state));
 
     /* allocate real cost space: needed as incremented by
      * simulation functions */
diff --git a/configure.in b/configure.in
index 19c828f..001c45c 100644
--- a/configure.in
+++ b/configure.in
@@ -1518,6 +1518,9 @@
    exp-omega/Makefile
    exp-omega/tests/Makefile
    exp-omega/docs/Makefile
+   exp-ptrcheck/Makefile
+   exp-ptrcheck/tests/Makefile
+   exp-ptrcheck/docs/Makefile
    drd/Makefile
    drd/docs/Makefile
    drd/scripts/download-and-build-splash2
diff --git a/coregrind/m_commandline.c b/coregrind/m_commandline.c
index c2256cf..7a22c08 100644
--- a/coregrind/m_commandline.c
+++ b/coregrind/m_commandline.c
@@ -67,7 +67,7 @@
    if ( !fd.isError ) {
       size = VG_(fsize)(fd.res);
       if (size > 0) {
-         f_clo = VG_(malloc)(size+1);
+         f_clo = VG_(malloc)("commandline.rdv.1", size+1);
          vg_assert(f_clo);
          n = VG_(read)(fd.res, f_clo, size);
          if (n == -1) n = 0;
@@ -154,17 +154,20 @@
    vg_assert(!already_called);
    already_called = True;
 
-   tmp_xarray = VG_(newXA)( VG_(malloc), VG_(free), sizeof(HChar*) );
+   tmp_xarray = VG_(newXA)( VG_(malloc), "commandline.sua.1",
+                            VG_(free), sizeof(HChar*) );
    vg_assert(tmp_xarray);
 
    vg_assert( ! VG_(args_for_valgrind) );
    VG_(args_for_valgrind)
-      = VG_(newXA)( VG_(malloc), VG_(free), sizeof(HChar*) );
+      = VG_(newXA)( VG_(malloc), "commandline.sua.2",
+                    VG_(free), sizeof(HChar*) );
    vg_assert( VG_(args_for_valgrind) );
 
    vg_assert( ! VG_(args_for_client) );
    VG_(args_for_client)
-      = VG_(newXA)( VG_(malloc), VG_(free), sizeof(HChar*) );
+      = VG_(newXA)( VG_(malloc), "commandline.sua.3",
+                    VG_(free), sizeof(HChar*) );
    vg_assert( VG_(args_for_client) );
 
    /* Collect up the args-for-V. */
@@ -203,7 +206,8 @@
       // put into VG_(args_for_valgrind) and so must persist.
       HChar* home    = VG_(getenv)("HOME");
       HChar* f1_clo  = home ? read_dot_valgrindrc( home ) : NULL;
-      HChar* env_clo = VG_(strdup)( VG_(getenv)(VALGRIND_OPTS) );
+      HChar* env_clo = VG_(strdup)( "commandline.sua.4",
+                                    VG_(getenv)(VALGRIND_OPTS) );
       HChar* f2_clo  = NULL;
 
       // Don't read ./.valgrindrc if "." is the same as "$HOME", else its
diff --git a/coregrind/m_coredump/coredump-elf.c b/coregrind/m_coredump/coredump-elf.c
index 064cea5..f740b9d 100644
--- a/coregrind/m_coredump/coredump-elf.c
+++ b/coregrind/m_coredump/coredump-elf.c
@@ -79,7 +79,7 @@
 
    n_starts = 1;
    while (True) {
-      starts = VG_(malloc)( n_starts * sizeof(Addr) );
+      starts = VG_(malloc)( "coredump-elf.gss.1", n_starts * sizeof(Addr) );
       if (starts == NULL)
          break;
       r = VG_(am_get_segment_starts)( starts, n_starts );
@@ -184,7 +184,7 @@
    Int notelen = sizeof(struct note) + 
       VG_ROUNDUP(namelen, 4) + 
       VG_ROUNDUP(datasz, 4);
-   struct note *n = VG_(arena_malloc)(VG_AR_CORE, notelen);
+   struct note *n = VG_(arena_malloc)(VG_AR_CORE, "coredump-elf.an.1", notelen);
 
    VG_(memset)(n, 0, notelen);
 
@@ -349,7 +349,8 @@
    notelist = NULL;
 
    /* Second, work out their layout */
-   phdrs = VG_(arena_malloc)(VG_AR_CORE, sizeof(*phdrs) * num_phdrs);
+   phdrs = VG_(arena_malloc)(VG_AR_CORE, "coredump-elf.mec.1", 
+                             sizeof(*phdrs) * num_phdrs);
 
    for(i = 1; i < VG_N_THREADS; i++) {
       vki_elf_fpregset_t  fpu;
diff --git a/coregrind/m_debuginfo/d3basics.c b/coregrind/m_debuginfo/d3basics.c
index d349f3c..9addc39 100644
--- a/coregrind/m_debuginfo/d3basics.c
+++ b/coregrind/m_debuginfo/d3basics.c
@@ -38,10 +38,12 @@
 #include "pub_core_libcassert.h"
 #include "pub_core_libcprint.h"
 #include "pub_core_options.h"
+#include "pub_core_xarray.h"
 
 #include "pub_core_vki.h"       /* VKI_PROT_READ */
 #include "pub_core_aspacemgr.h" /* VG_(is_valid_for_client) */
 
+#include "priv_misc.h"
 #include "priv_d3basics.h"      /* self */
 
 HChar* ML_(pp_DW_children) ( DW_children hashch )
@@ -461,7 +463,6 @@
        && expr[0] == DW_OP_regx) {
       /* JRS: 2008Feb20: I believe the following is correct, but would
          like to see a test case show up before enabling it. */
-      vg_assert(0);
       expr++;
       res.kind = GXR_RegNo;
       res.word = (UWord)read_leb128U( &expr );
@@ -472,7 +473,7 @@
       /*NOTREACHED*/
    }
 
-   /* Evidently this expresion denotes a value, not a register name.
+   /* Evidently this expression denotes a value, not a register name.
       So evaluate it accordingly. */
 
    if (push_initial_zero)
@@ -674,6 +675,148 @@
 }
 
 
+/* Evaluate a very simple Guarded (DWARF3) expression.  The expression
+   is expected to denote a constant, with no reference to any
+   registers nor to any frame base expression.  The expression is
+   expected to have at least one guard.  If there is more than one
+   guard, all the sub-expressions are evaluated and compared.  The
+   address ranges on the guards are ignored.  GXR_Failure is returned
+   in the following circumstances:
+   * no guards
+   * any of the subexpressions require a frame base expression
+   * any of the subexpressions denote a register location
+   * any of the subexpressions do not produce a manifest constant
+   * there's more than one subexpression, all of which successfully
+     evaluate to a constant, but they don't all produce the same constant.
+ */
+GXResult ML_(evaluate_trivial_GX)( GExpr* gx, Addr data_bias )
+{
+   GXResult   res;
+   Addr       aMin, aMax;
+   UChar      uc;
+   UShort     nbytes;
+   Word       i, nGuards;
+   MaybeUWord *muw, *muw2;
+
+   HChar*  badness = NULL;
+   UChar*  p       = &gx->payload[0];
+   XArray* results = VG_(newXA)( ML_(dinfo_zalloc), "di.d3basics.etG.1",
+                                 ML_(dinfo_free),
+                                 sizeof(MaybeUWord) );
+
+   uc = *p++; /*biasMe*/
+   vg_assert(uc == 0 || uc == 1);
+   /* in fact it's senseless to evaluate if the guards need biasing.
+      So don't. */
+   vg_assert(uc == 0);
+
+   nGuards = 0;
+   while (True) {
+      MaybeUWord thisResult;
+      uc = *p++;
+      if (uc == 1) /*isEnd*/
+         break;
+      vg_assert(uc == 0);
+      aMin   = * (Addr*)p;   p += sizeof(Addr);
+      aMax   = * (Addr*)p;   p += sizeof(Addr);
+      nbytes = * (UShort*)p; p += sizeof(UShort);
+      nGuards++;
+      if (0) VG_(printf)("           guard %ld: %#lx %#lx\n", 
+                         nGuards, aMin,aMax);
+
+      thisResult.b = False;
+      thisResult.w = 0;
+
+      /* Peer at this particular subexpression, to see if it's
+         obviously a constant. */
+      if (nbytes == 1 + sizeof(Addr) && *p == DW_OP_addr) {
+         thisResult.b = True;
+         thisResult.w = *(Addr*)(p+1) + data_bias;
+      }
+      else if (nbytes == 2 + sizeof(Addr) 
+               && *p == DW_OP_addr
+               && *(p + 1 + sizeof(Addr)) == DW_OP_GNU_push_tls_address) {
+         if (!badness)
+            badness = "trivial GExpr is DW_OP_addr plus trailing junk";
+      }
+      else if (nbytes >= 1 && *p >= DW_OP_reg0 && *p <= DW_OP_reg31) {
+         if (!badness)
+            badness = "trivial GExpr denotes register (1)";
+      }
+      else if (nbytes >= 1 && *p == DW_OP_fbreg) {
+         if (!badness)
+            badness = "trivial GExpr requires fbGX";
+      }
+      else if (nbytes >= 1 && *p >= DW_OP_breg0 && *p <= DW_OP_breg31) {
+         if (!badness)
+            badness = "trivial GExpr requires register value";
+      }
+      else if (nbytes >= 1 && *p == DW_OP_regx) {
+         if (!badness)
+            badness = "trivial GExpr denotes register (2)";
+      }
+      else {
+         VG_(printf)(" ML_(evaluate_trivial_GX): unhandled:\n   ");
+         ML_(pp_GX)( gx );
+         VG_(printf)("\n");
+         tl_assert(0);
+      }
+
+      VG_(addToXA)( results, &thisResult );
+
+      p += (UWord)nbytes;
+   }
+
+   res.kind = GXR_Failure;
+
+   tl_assert(nGuards == VG_(sizeXA)( results ));
+   tl_assert(nGuards >= 0);
+   if (nGuards == 0) {
+      tl_assert(!badness);
+      res.word = (UWord)"trivial GExpr has no guards (!)";
+      VG_(deleteXA)( results );
+      return res;
+   }
+
+   for (i = 0; i < nGuards; i++) {
+      muw = VG_(indexXA)( results, i );
+      if (muw->b == False)
+         break;
+   }
+
+   vg_assert(i >= 0 && i <= nGuards);
+   if (i < nGuards) {
+      /* at least one subexpression failed to produce a manifest constant. */
+      vg_assert(badness);
+      res.word = (UWord)badness;
+      VG_(deleteXA)( results );
+      return res;
+   }
+
+   /* All the subexpressions produced a constant, but did they all produce
+      the same one? */
+   muw = VG_(indexXA)( results, 0 );
+   tl_assert(muw->b == True); /* we just established that all exprs are ok */
+
+   for (i = 1; i < nGuards; i++) {
+      muw2 = VG_(indexXA)( results, i );
+      tl_assert(muw2->b == True);
+      if (muw2->w != muw->w) {
+         res.word = (UWord)"trivial GExpr: subexpressions disagree";
+         VG_(deleteXA)( results );
+         return res;
+      }
+   }
+
+   /* Well, we have success.  All subexpressions evaluated, and 
+      they all agree.  Hurrah. */
+   res.kind = GXR_Value;
+   res.word = muw->w;
+   VG_(deleteXA)( results );
+   return res;
+}
+
+
 void ML_(pp_GXResult) ( GXResult res )
 {
    switch (res.kind) {
@@ -689,6 +832,34 @@
 }
 
 
+void ML_(pp_GX) ( GExpr* gx ) {
+   Addr   aMin, aMax;
+   UChar  uc;
+   UShort nbytes;
+   UChar* p = &gx->payload[0];
+   uc = *p++;
+   VG_(printf)("GX(%s){", uc == 0 ? "final" : "Breqd" );
+   vg_assert(uc == 0 || uc == 1);
+   while (True) {
+      uc = *p++;
+      if (uc == 1)
+         break; /*isEnd*/
+      vg_assert(uc == 0);
+      aMin   = * (Addr*)p;  p += sizeof(Addr);
+      aMax   = * (Addr*)p;  p += sizeof(Addr);
+      nbytes = * (UShort*)p; p += sizeof(UShort);
+      VG_(printf)("[%#lx,%#lx]=", aMin, aMax);
+      while (nbytes > 0) {
+         VG_(printf)("%02x", (UInt)*p++);
+         nbytes--;
+      }
+      if (*p == 0)
+         VG_(printf)(",");
+   }
+   VG_(printf)("}");
+}
+
+
 /*--------------------------------------------------------------------*/
 /*--- end                                               d3basics.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c
index ce09314..01032e0 100644
--- a/coregrind/m_debuginfo/debuginfo.c
+++ b/coregrind/m_debuginfo/debuginfo.c
@@ -156,6 +156,11 @@
 /*--- Notification (acquire/discard) helpers               ---*/
 /*------------------------------------------------------------*/
 
+/* Gives out unique abstract handles for allocated DebugInfos.  See
+   comment in priv_storage.h, declaration of struct _DebugInfo, for
+   details. */
+static ULong handle_counter = 1;
+
 /* Allocate and zero out a new DebugInfo record. */
 static 
 DebugInfo* alloc_DebugInfo( const UChar* filename,
@@ -166,9 +171,10 @@
 
    vg_assert(filename);
 
-   di = ML_(dinfo_zalloc)(sizeof(DebugInfo));
-   di->filename  = ML_(dinfo_strdup)(filename);
-   di->memname   = memname ? ML_(dinfo_strdup)(memname)
+   di = ML_(dinfo_zalloc)("di.debuginfo.aDI.1", sizeof(DebugInfo));
+   di->handle    = handle_counter++;
+   di->filename  = ML_(dinfo_strdup)("di.debuginfo.aDI.2", filename);
+   di->memname   = memname ? ML_(dinfo_strdup)("di.debuginfo.aDI.3", memname)
                            : NULL;
 
    /* Everything else -- pointers, sizes, arrays -- is zeroed by calloc.
@@ -194,7 +200,7 @@
 {
    Word i, j, n;
    struct strchunk *chunk, *next;
-   TyAdmin* admin;
+   TyEnt* ent;
    GExpr* gexpr;
 
    vg_assert(di != NULL);
@@ -209,17 +215,18 @@
       ML_(dinfo_free)(chunk);
    }
 
-   /* Delete the two admin lists.  These lists exist purely so that we
-      can visit each object exactly once when we need to delete
-      them. */
-   if (di->admin_tyadmins) {
-      n = VG_(sizeXA)(di->admin_tyadmins);
+   /* Delete the two admin arrays.  These lists exist primarily so
+      that we can visit each object exactly once when we need to
+      delete them. */
+   if (di->admin_tyents) {
+      n = VG_(sizeXA)(di->admin_tyents);
       for (i = 0; i < n; i++) {
-         admin = (TyAdmin*)VG_(indexXA)(di->admin_tyadmins, i);
-         ML_(delete_payload_of_TyAdmin)(admin);
+         ent = (TyEnt*)VG_(indexXA)(di->admin_tyents, i);
+         /* Dump anything hanging off this ent */
+         ML_(TyEnt__make_EMPTY)(ent);
       }
-      VG_(deleteXA)(di->admin_tyadmins);
-      di->admin_tyadmins = NULL;
+      VG_(deleteXA)(di->admin_tyents);
+      di->admin_tyents = NULL;
    }
 
    if (di->admin_gexprs) {
@@ -490,14 +497,22 @@
    will try load debug info if the mapping at 'a' belongs to Valgrind;
    whereas normally (False) it will not do that.  This allows us to
    carefully control when the thing will read symbols from the
-   Valgrind executable itself. */
+   Valgrind executable itself.
 
-void VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV )
+   If a call to VG_(di_notify_mmap) causes debug info to be read, then
+   the returned ULong is an abstract handle which can later be used to
+   refer to the debuginfo read as a result of this specific mapping,
+   in later queries to m_debuginfo.  In this case the handle value
+   will be one or above.  If the returned value is zero, no debug info
+   was read. */
+
+ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV )
 {
    NSegment const * seg;
    HChar*     filename;
    Bool       ok, is_rx_map, is_rw_map;
    DebugInfo* di;
+   ULong      di_handle;
    SysRes     fd;
    Int        nread;
    HChar      buf1k[1024];
@@ -523,12 +538,12 @@
    /* Ignore non-file mappings */
    if ( ! (seg->kind == SkFileC
            || (seg->kind == SkFileV && allow_SkFileV)) )
-      return;
+      return 0;
 
    /* If the file doesn't have a name, we're hosed.  Give up. */
    filename = VG_(am_get_filename)( (NSegment*)seg );
    if (!filename)
-      return;
+      return 0;
 
    if (debug)
       VG_(printf)("di_notify_mmap-2: %s\n", filename);
@@ -550,13 +565,13 @@
          fake_di.filename = filename;
          ML_(symerr)(&fake_di, True, "failed to stat64/stat this file");
       }
-      return;
+      return 0;
    }
 
    /* Finally, the point of all this stattery: if it's not a regular file,
       don't try to read debug info from it. */
    if (! VKI_S_ISREG(statbuf.st_mode))
-      return;
+      return 0;
 
    /* no uses of statbuf below here. */
 
@@ -572,25 +587,25 @@
          fake_di.filename = filename;
          ML_(symerr)(&fake_di, True, "can't open file to inspect ELF header");
       }
-      return;
+      return 0;
    }
    nread = VG_(read)( fd.res, buf1k, sizeof(buf1k) );
    VG_(close)( fd.res );
 
    if (nread == 0)
-      return;
+      return 0;
    if (nread < 0) {
       DebugInfo fake_di;
       VG_(memset)(&fake_di, 0, sizeof(fake_di));
       fake_di.filename = filename;
       ML_(symerr)(&fake_di, True, "can't read file to inspect ELF header");
-      return;
+      return 0;
    }
    vg_assert(nread > 0 && nread <= sizeof(buf1k) );
 
    /* We're only interested in mappings of ELF object files. */
    if (!ML_(is_elf_object_file)( buf1k, (SizeT)nread ))
-      return;
+      return 0;
 
    /* Now we have to guess if this is a text-like mapping, a data-like
       mapping, neither or both.  The rules are:
@@ -645,7 +660,7 @@
 
    /* If it is neither text-ish nor data-ish, we're not interested. */
    if (!(is_rx_map || is_rw_map))
-      return;
+      return 0;
 
    /* See if we have a DebugInfo for this filename.  If not,
       create one. */
@@ -676,48 +691,58 @@
       }
    }
 
-   if (di->have_rx_map && di->have_rw_map && !di->have_dinfo) {
+   /* If we don't have an rx and rw mapping, or if we already have
+      debuginfo for this mapping for whatever reason, go no
+      further. */
+   if ( ! (di->have_rx_map && di->have_rw_map && !di->have_dinfo) )
+      return 0;
 
-      vg_assert(di->filename);
-      TRACE_SYMTAB("\n");
-      TRACE_SYMTAB("------ start ELF OBJECT "
-                   "------------------------------\n");
-      TRACE_SYMTAB("------ name = %s\n", di->filename);
-      TRACE_SYMTAB("\n");
+   /* Ok, so, finally, let's try to read the debuginfo. */
+   vg_assert(di->filename);
+   TRACE_SYMTAB("\n");
+   TRACE_SYMTAB("------ start ELF OBJECT "
+                "------------------------------\n");
+   TRACE_SYMTAB("------ name = %s\n", di->filename);
+   TRACE_SYMTAB("\n");
 
-      /* We're going to read symbols and debug info for the avma
-         ranges [rx_map_avma, +rx_map_size) and [rw_map_avma,
-         +rw_map_size).  First get rid of any other DebugInfos which
-         overlap either of those ranges (to avoid total confusion). */
-      discard_DebugInfos_which_overlap_with( di );
+   /* We're going to read symbols and debug info for the avma
+      ranges [rx_map_avma, +rx_map_size) and [rw_map_avma,
+      +rw_map_size).  First get rid of any other DebugInfos which
+      overlap either of those ranges (to avoid total confusion). */
+   discard_DebugInfos_which_overlap_with( di );
 
-      /* .. and acquire new info. */
-      ok = ML_(read_elf_debug_info)( di );
+   /* .. and acquire new info. */
+   ok = ML_(read_elf_debug_info)( di );
 
-      if (ok) {
-         TRACE_SYMTAB("\n------ Canonicalising the "
-                      "acquired info ------\n");
-         /* prepare read data for use */
-         ML_(canonicaliseTables)( di );
-         /* notify m_redir about it */
-         TRACE_SYMTAB("\n------ Notifying m_redir ------\n");
-         VG_(redir_notify_new_DebugInfo)( di );
-         /* Note that we succeeded */
-         di->have_dinfo = True;
-      } else {
-         TRACE_SYMTAB("\n------ ELF reading failed ------\n");
-         /* Something went wrong (eg. bad ELF file).  Should we delete
-            this DebugInfo?  No - it contains info on the rw/rx
-            mappings, at least. */
-      }
+   if (ok) {
 
-      TRACE_SYMTAB("\n");
-      TRACE_SYMTAB("------ name = %s\n", di->filename);
-      TRACE_SYMTAB("------ end ELF OBJECT "
-                   "------------------------------\n");
-      TRACE_SYMTAB("\n");
+      TRACE_SYMTAB("\n------ Canonicalising the "
+                   "acquired info ------\n");
+      /* prepare read data for use */
+      ML_(canonicaliseTables)( di );
+      /* notify m_redir about it */
+      TRACE_SYMTAB("\n------ Notifying m_redir ------\n");
+      VG_(redir_notify_new_DebugInfo)( di );
+      /* Note that we succeeded */
+      di->have_dinfo = True;
+      tl_assert(di->handle > 0);
+      di_handle = di->handle;
 
+   } else {
+      TRACE_SYMTAB("\n------ ELF reading failed ------\n");
+      /* Something went wrong (eg. bad ELF file).  Should we delete
+         this DebugInfo?  No - it contains info on the rw/rx
+         mappings, at least. */
+      di_handle = 0;
    }
+
+   TRACE_SYMTAB("\n");
+   TRACE_SYMTAB("------ name = %s\n", di->filename);
+   TRACE_SYMTAB("------ end ELF OBJECT "
+                "------------------------------\n");
+   TRACE_SYMTAB("\n");
+
+   return di_handle;
 }
 
 
@@ -760,7 +785,7 @@
    read debug info for it -- or conversely, have recently been dumped,
    in which case the relevant debug info has to be unloaded. */
 
-void VG_(di_aix5_notify_segchange)( 
+ULong VG_(di_aix5_notify_segchange)( 
                Addr   code_start,
                Word   code_len,
                Addr   data_start,
@@ -770,6 +795,8 @@
                Bool   is_mainexe,
                Bool   acquire )
 {
+   ULong hdl = 0;
+
    if (acquire) {
 
       Bool       ok;
@@ -811,6 +838,8 @@
          VG_(redir_notify_new_DebugInfo)( di );
          /* Note that we succeeded */
          di->have_dinfo = True;
+         hdl = di->handle;
+         vg_assert(hdl > 0);
       } else {
          /*  Something went wrong (eg. bad XCOFF file). */
          discard_DebugInfo( di );
@@ -825,6 +854,8 @@
          discard_syms_in_range( code_start, code_len );
 
    }
+
+   return hdl;
 }
         
 
@@ -837,6 +868,19 @@
 /*---                                                      ---*/
 /*------------------------------------------------------------*/
 
+void VG_(di_discard_ALL_debuginfo)( void )
+{
+   DebugInfo *di, *di2;
+   di = debugInfo_list;
+   while (di) {
+      di2 = di->next;
+      VG_(printf)("XXX rm %p\n", di);
+      free_DebugInfo( di );
+      di = di2;
+   }
+}
+
+
 /*------------------------------------------------------------*/
 /*--- Use of symbol table & location info to create        ---*/
 /*--- plausible-looking stack dumps.                       ---*/
@@ -1613,7 +1657,7 @@
             case CFIR_MEMCFAREL: {                      \
                Addr a = cfa + (Word)_off;               \
                if (a < min_accessible                   \
-                   || a+sizeof(Addr) > max_accessible)  \
+                   || a > max_accessible-sizeof(Addr))  \
                   return False;                         \
                _prev = *(Addr*)a;                       \
                break;                                   \
@@ -1664,6 +1708,7 @@
    regs, which supplies ip,sp,fp values, will be NULL for global
    variables, and non-NULL for local variables. */
 static Bool data_address_is_in_var ( /*OUT*/UWord* offset,
+                                     XArray* /* TyEnt */ tyents,
                                      DiVariable*   var,
                                      RegSummary*   regs,
                                      Addr          data_addr,
@@ -1673,12 +1718,12 @@
    SizeT      var_szB;
    GXResult   res;
    Bool       show = False;
+
    vg_assert(var->name);
-   vg_assert(var->type);
    vg_assert(var->gexpr);
 
    /* Figure out how big the variable is. */
-   muw = ML_(sizeOfType)(var->type);
+   muw = ML_(sizeOfType)(tyents, var->typeR);
    /* if this var has a type whose size is unknown, it should never
       have been added.  ML_(addVar) should have rejected it. */
    vg_assert(muw.b == True);
@@ -1688,7 +1733,7 @@
    if (show) {
       VG_(printf)("VVVV: data_address_%#lx_is_in_var: %s :: ",
                   data_addr, var->name );
-      ML_(pp_Type_C_ishly)( var->type );
+      ML_(pp_TyEnt_C_ishly)( tyents, var->typeR );
       VG_(printf)("\n");
    }
 
@@ -1867,7 +1912,6 @@
    dname1[n_dname-1] = dname2[n_dname-1] = 0;
 }
 
-
 /* Determine if data_addr is a local variable in the frame
    characterised by (ip,sp,fp), and if so write its description into
    dname{1,2}[0..n_dname-1], and return True.  If not, return
@@ -1976,11 +2020,13 @@
          if (debug)
             VG_(printf)("QQQQ:    var:name=%s %#lx-%#lx %#lx\n",
                         var->name,arange->aMin,arange->aMax,ip);
-         if (data_address_is_in_var( &offset, var, &regs, data_addr,
-                                     di->data_bias )) {
+         if (data_address_is_in_var( &offset, di->admin_tyents,
+                                     var, &regs,
+                                     data_addr, di->data_bias )) {
             OffT residual_offset = 0;
             XArray* described = ML_(describe_type)( &residual_offset,
-                                                    var->type, offset );
+                                                    di->admin_tyents, 
+                                                    var->typeR, offset );
             format_message( dname1, dname2, n_dname, 
                             data_addr, var, offset, residual_offset,
                             described, frameNo, tid );
@@ -2023,7 +2069,7 @@
       scope. */
    for (di = debugInfo_list; di != NULL; di = di->next) {
       OSet*        global_scope;
-      Int          gs_size;
+      Word         gs_size;
       Addr         zero;
       DiAddrRange* global_arange;
       Word         i;
@@ -2073,12 +2119,13 @@
             This means, if the evaluation of the location
             expression/list requires a register, we have to let it
             fail. */
-         if (data_address_is_in_var( &offset, var, 
+         if (data_address_is_in_var( &offset, di->admin_tyents, var, 
                                      NULL/* RegSummary* */, 
                                      data_addr, di->data_bias )) {
             OffT residual_offset = 0;
             XArray* described = ML_(describe_type)( &residual_offset,
-                                                    var->type, offset );
+                                                    di->admin_tyents,
+                                                    var->typeR, offset );
             format_message( dname1, dname2, n_dname, 
                             data_addr, var, offset, residual_offset,
                             described, -1/*frameNo*/, tid );
@@ -2197,6 +2244,392 @@
 }
 
 
+//////////////////////////////////////////////////////////////////
+//                                                              //
+// Support for other kinds of queries to the Dwarf3 var info    //
+//                                                              //
+//////////////////////////////////////////////////////////////////
+
+/* Figure out if the variable 'var' has a location that is linearly
+   dependent on a stack pointer value, or a frame pointer value, and
+   if it is, add a description of it to 'blocks'.  Otherwise ignore
+   it.  If 'arrays_only' is True, also ignore it unless it has an
+   array type. */
+
+static 
+void analyse_deps ( /*MOD*/XArray* /* of FrameBlock */ blocks,
+                    XArray* /* TyEnt */ tyents,
+                    Addr ip, Addr data_bias, DiVariable* var,
+                    Bool arrays_only )
+{
+   GXResult   res_sp_6k, res_sp_7k, res_fp_6k, res_fp_7k;
+   RegSummary regs;
+   MaybeUWord muw;
+   Bool       isVec;
+   TyEnt*     ty;
+
+   Bool debug = False;
+   if (0&&debug)
+      VG_(printf)("adeps: var %s\n", var->name );
+
+   /* Figure out how big the variable is. */
+   muw = ML_(sizeOfType)(tyents, var->typeR);
+   /* if this var has a type whose size is unknown or zero, it should
+      never have been added.  ML_(addVar) should have rejected it. */
+   vg_assert(muw.b == True);
+   vg_assert(muw.w > 0);
+
+   /* skip if non-array and we're only interested in arrays */
+   ty = ML_(TyEnts__index_by_cuOff)( tyents, NULL, var->typeR );
+   vg_assert(ty);
+   vg_assert(ty->tag == Te_UNKNOWN || ML_(TyEnt__is_type)(ty));
+   if (ty->tag == Te_UNKNOWN)
+      return; /* perhaps we should complain in this case? */
+   isVec = ty->tag == Te_TyArray;
+   if (arrays_only && !isVec)
+      return;
+
+   if (0) {ML_(pp_TyEnt_C_ishly)(tyents, var->typeR);
+           VG_(printf)("  %s\n", var->name);}
+
+   /* Do some test evaluations of the variable's location expression,
+      in order to guess whether it is sp-relative, fp-relative, or
+      none.  A crude hack, which can be interpreted roughly as finding
+      the first derivative of the location expression w.r.t. the
+      supplied frame and stack pointer values. */
+   regs.fp   = 0;
+   regs.ip   = ip;
+   regs.sp   = 6 * 1024;
+   res_sp_6k = ML_(evaluate_GX)( var->gexpr, var->fbGX, &regs, data_bias );
+
+   regs.fp   = 0;
+   regs.ip   = ip;
+   regs.sp   = 7 * 1024;
+   res_sp_7k = ML_(evaluate_GX)( var->gexpr, var->fbGX, &regs, data_bias );
+
+   regs.fp   = 6 * 1024;
+   regs.ip   = ip;
+   regs.sp   = 0;
+   res_fp_6k = ML_(evaluate_GX)( var->gexpr, var->fbGX, &regs, data_bias );
+
+   regs.fp   = 7 * 1024;
+   regs.ip   = ip;
+   regs.sp   = 0;
+   res_fp_7k = ML_(evaluate_GX)( var->gexpr, var->fbGX, &regs, data_bias );
+
+   vg_assert(res_sp_6k.kind == res_sp_7k.kind);
+   vg_assert(res_sp_6k.kind == res_fp_6k.kind);
+   vg_assert(res_sp_6k.kind == res_fp_7k.kind);
+
+   if (res_sp_6k.kind == GXR_Value) {
+      StackBlock block;
+      GXResult res;
+      UWord sp_delta = res_sp_7k.word - res_sp_6k.word;
+      UWord fp_delta = res_fp_7k.word - res_fp_6k.word;
+      tl_assert(sp_delta == 0 || sp_delta == 1024);
+      tl_assert(fp_delta == 0 || fp_delta == 1024);
+
+      if (sp_delta == 0 && fp_delta == 0) {
+         /* depends neither on sp nor fp, so it can't be a stack
+            local.  Ignore it. */
+      }
+      else
+      if (sp_delta == 1024 && fp_delta == 0) {
+         regs.sp = regs.fp = 0;
+         regs.ip = ip;
+         res = ML_(evaluate_GX)( var->gexpr, var->fbGX, &regs, data_bias );
+         tl_assert(res.kind == GXR_Value);
+         if (debug)
+         VG_(printf)("   %5ld .. %5ld (sp) %s\n",
+                     res.word, res.word + muw.w - 1, var->name);
+         block.base  = res.word;
+         block.szB   = muw.w;
+         block.spRel = True;
+         block.isVec = isVec;
+         VG_(memset)( &block.name[0], 0, sizeof(block.name) );
+         if (var->name)
+            VG_(strncpy)( &block.name[0], var->name, sizeof(block.name)-1 );
+         block.name[ sizeof(block.name)-1 ] = 0;
+         VG_(addToXA)( blocks, &block );
+      }
+      else
+      if (sp_delta == 0 && fp_delta == 1024) {
+         regs.sp = regs.fp = 0;
+         regs.ip = ip;
+         res = ML_(evaluate_GX)( var->gexpr, var->fbGX, &regs, data_bias );
+         tl_assert(res.kind == GXR_Value);
+         if (debug)
+         VG_(printf)("   %5ld .. %5ld (FP) %s\n",
+                     res.word, res.word + muw.w - 1, var->name);
+         block.base  = res.word;
+         block.szB   = muw.w;
+         block.spRel = False;
+         block.isVec = isVec;
+         VG_(memset)( &block.name[0], 0, sizeof(block.name) );
+         if (var->name)
+            VG_(strncpy)( &block.name[0], var->name, sizeof(block.name)-1 );
+         block.name[ sizeof(block.name)-1 ] = 0;
+         VG_(addToXA)( blocks, &block );
+      }
+      else {
+         vg_assert(0);
+      }
+   }
+}
+
+
+/* Get an XArray of StackBlock which describe the stack (auto) blocks
+   for this ip.  The caller is expected to free the XArray at some
+   point.  If 'arrays_only' is True, only array-typed blocks are
+   returned; otherwise blocks of all types are returned. */
+
+void* /* really, XArray* of StackBlock */
+      VG_(di_get_stack_blocks_at_ip)( Addr ip, Bool arrays_only )
+{
+   /* This is a derivation of consider_vars_in_frame() above. */
+   Word       i;
+   DebugInfo* di;
+   RegSummary regs;
+   Bool debug = False;
+
+   XArray* res = VG_(newXA)( ML_(dinfo_zalloc), "di.debuginfo.dgsbai.1",
+                             ML_(dinfo_free),
+                             sizeof(StackBlock) );
+
+   static UInt n_search = 0;
+   static UInt n_steps = 0;
+   n_search++;
+   if (debug)
+      VG_(printf)("QQQQ: dgsbai: ip %#lx\n", ip);
+   /* first, find the DebugInfo that pertains to 'ip'. */
+   for (di = debugInfo_list; di; di = di->next) {
+      n_steps++;
+      /* text segment missing? unlikely, but handle it .. */
+      if (!di->text_present || di->text_size == 0)
+         continue;
+      /* Ok.  So does this text mapping bracket the ip? */
+      if (di->text_avma <= ip && ip < di->text_avma + di->text_size)
+         break;
+   }
+ 
+   /* Didn't find it.  Strange -- means ip is a code address outside
+      of any mapped text segment.  Unlikely but not impossible -- app
+      could be generating code to run. */
+   if (!di)
+      return res; /* currently empty */
+
+   if (0 && ((n_search & 0x1) == 0))
+      VG_(printf)("VG_(di_get_stack_blocks_at_ip): %u searches, "
+                  "%u DebugInfos looked at\n", 
+                  n_search, n_steps);
+   /* Start of performance-enhancing hack: once every ??? (chosen
+      hackily after profiling) successful searches, move the found
+      DebugInfo one step closer to the start of the list.  This makes
+      future searches cheaper. */
+   if ((n_search & 0xFFFF) == 0) {
+      /* Move si one step closer to the start of the list. */
+      move_DebugInfo_one_step_forward( di );
+   }
+   /* End of performance-enhancing hack. */
+
+   /* any var info at all? */
+   if (!di->varinfo)
+      return res; /* currently empty */
+
+   /* Work through the scopes from most deeply nested outwards,
+      looking for code address ranges that bracket 'ip'.  The
+      variables on each such address range found are in scope right
+      now.  Don't descend to level zero as that is the global
+      scope. */
+   regs.ip = ip;
+   regs.sp = 0;
+   regs.fp = 0;
+
+   /* "for each scope, working outwards ..." */
+   for (i = VG_(sizeXA)(di->varinfo) - 1; i >= 1; i--) {
+      XArray*      vars;
+      Word         j;
+      DiAddrRange* arange;
+      OSet*        this_scope 
+         = *(OSet**)VG_(indexXA)( di->varinfo, i );
+      if (debug)
+         VG_(printf)("QQQQ:   considering scope %ld\n", (Word)i);
+      if (!this_scope)
+         continue;
+      /* Find the set of variables in this scope that
+         bracket the program counter. */
+      arange = VG_(OSetGen_LookupWithCmp)(
+                  this_scope, &ip, 
+                  ML_(cmp_for_DiAddrRange_range)
+               );
+      if (!arange)
+         continue;
+      /* stay sane */
+      vg_assert(arange->aMin <= arange->aMax);
+      /* It must bracket the ip we asked for, else
+         ML_(cmp_for_DiAddrRange_range) is somehow broken. */
+      vg_assert(arange->aMin <= ip && ip <= arange->aMax);
+      /* It must have an attached XArray of DiVariables. */
+      vars = arange->vars;
+      vg_assert(vars);
+      /* But it mustn't cover the entire address range.  We only
+         expect that to happen for the global scope (level 0), which
+         we're not looking at here.  Except, it may cover the entire
+         address range, but in that case the vars array must be
+         empty. */
+      vg_assert(! (arange->aMin == (Addr)0
+                   && arange->aMax == ~(Addr)0
+                   && VG_(sizeXA)(vars) > 0) );
+      for (j = 0; j < VG_(sizeXA)( vars ); j++) {
+         DiVariable* var = (DiVariable*)VG_(indexXA)( vars, j );
+         if (debug)
+            VG_(printf)("QQQQ:    var:name=%s %#lx-%#lx %#lx\n", 
+                        var->name,arange->aMin,arange->aMax,ip);
+         analyse_deps( res, di->admin_tyents, ip,
+                       di->data_bias, var, arrays_only );
+      }
+   }
+
+   return res;
+}
+
+
+/* Get an array of GlobalBlock which describe the global blocks owned
+   by the shared object characterised by the given di_handle.  Asserts
+   if the handle is invalid.  The caller is responsible for freeing
+   the array at some point.  If 'arrays_only' is True, only
+   array-typed blocks are returned; otherwise blocks of all types are
+   returned. */
+
+void* /* really, XArray* of GlobalBlock */
+      VG_(di_get_global_blocks_from_dihandle) ( ULong di_handle,
+                                                Bool  arrays_only )
+{
+   /* This is a derivation of consider_vars_in_frame() above. */
+
+   DebugInfo* di;
+   XArray* gvars; /* XArray* of GlobalBlock */
+   Word nScopes, scopeIx;
+
+   /* The first thing to do is find the DebugInfo that
+      pertains to 'di_handle'. */
+   tl_assert(di_handle > 0);
+   for (di = debugInfo_list; di; di = di->next) {
+      if (di->handle == di_handle)
+         break;
+   }
+
+   /* If this fails, we were unable to find any DebugInfo with the
+      given handle.  This is considered an error on the part of the
+      caller. */
+   tl_assert(di != NULL);
+
+   /* we'll put the collected variables in here. */
+   gvars = VG_(newXA)( ML_(dinfo_zalloc), "di.debuginfo.dggbfd.1",
+                       ML_(dinfo_free), sizeof(GlobalBlock) );
+   tl_assert(gvars);
+
+   /* any var info at all? */
+   if (!di->varinfo)
+      return gvars;
+
+   /* we'll iterate over all the variables we can find, even if
+      it seems senseless to visit stack-allocated variables */
+   /* Iterate over all scopes */
+   nScopes = VG_(sizeXA)( di->varinfo );
+   for (scopeIx = 0; scopeIx < nScopes; scopeIx++) {
+
+      /* Iterate over each (code) address range at the current scope */
+      DiAddrRange* range;
+      OSet* /* of DiAddrInfo */ scope
+         = *(OSet**)VG_(indexXA)( di->varinfo, scopeIx );
+      tl_assert(scope);
+      VG_(OSetGen_ResetIter)(scope);
+      while ( (range = VG_(OSetGen_Next)(scope)) ) {
+
+         /* Iterate over each variable in the current address range */
+         Word nVars, varIx;
+         tl_assert(range->vars);
+         nVars = VG_(sizeXA)( range->vars );
+         for (varIx = 0; varIx < nVars; varIx++) {
+
+            Bool        isVec;
+            GXResult    res;
+            MaybeUWord  muw;
+            GlobalBlock gb;
+            TyEnt*      ty;
+            DiVariable* var = VG_(indexXA)( range->vars, varIx );
+            tl_assert(var->name);
+            if (0) VG_(printf)("at depth %ld var %s ", scopeIx, var->name );
+
+            /* Now figure out if this variable has a constant address
+               (that is, independent of FP, SP, phase of moon, etc),
+               and if so, what the address is.  Any variable with a
+               constant address is deemed to be a global so we collect
+               it. */
+            if (0) { VG_(printf)("EVAL: "); ML_(pp_GX)(var->gexpr);
+                     VG_(printf)("\n"); }
+            res = ML_(evaluate_trivial_GX)( var->gexpr, di->data_bias );
+
+            /* Not a constant address => not interesting */
+            if (res.kind != GXR_Value) {
+               if (0) VG_(printf)("FAIL\n");
+               continue;
+            }
+
+            /* Ok, it's a constant address.  See if we want to collect
+               it. */
+            if (0) VG_(printf)("%#lx\n", res.word);
+
+            /* Figure out how big the variable is. */
+            muw = ML_(sizeOfType)(di->admin_tyents, var->typeR);
+
+            /* if this var has a type whose size is unknown or zero,
+               it should never have been added.  ML_(addVar) should
+               have rejected it. */
+            vg_assert(muw.b == True);
+            vg_assert(muw.w > 0);
+
+            /* skip if non-array and we're only interested in
+               arrays */
+            ty = ML_(TyEnts__index_by_cuOff)( di->admin_tyents, NULL,
+                                              var->typeR );
+            vg_assert(ty);
+            vg_assert(ty->tag == Te_UNKNOWN || ML_(TyEnt__is_type)(ty));
+            if (ty->tag == Te_UNKNOWN)
+               continue; /* perhaps we should complain in this case? */
+
+            isVec = ty->tag == Te_TyArray;
+            if (arrays_only && !isVec) continue;
+
+            /* Ok, so collect it! */
+            tl_assert(var->name);
+            tl_assert(di->soname);
+            if (0) VG_(printf)("XXXX %s %s %d\n", var->name,
+                                var->fileName?(HChar*)var->fileName
+                                             :"??",var->lineNo);
+            VG_(memset)(&gb, 0, sizeof(gb));
+            gb.addr  = res.word;
+            gb.szB   = muw.w;
+            gb.isVec = isVec;
+            VG_(strncpy)(&gb.name[0], var->name, sizeof(gb.name)-1);
+            VG_(strncpy)(&gb.soname[0], di->soname, sizeof(gb.soname)-1);
+            tl_assert(gb.name[ sizeof(gb.name)-1 ] == 0);
+            tl_assert(gb.soname[ sizeof(gb.soname)-1 ] == 0);
+
+            VG_(addToXA)( gvars, &gb );
+
+         } /* for (varIx = 0; varIx < nVars; varIx++) */
+
+      } /* while ( (range = VG_(OSetGen_Next)(scope)) ) */
+
+   } /* for (scopeIx = 0; scopeIx < nScopes; scopeIx++) */
+
+   return gvars;
+}
+
+
+
 /*------------------------------------------------------------*/
 /*--- DebugInfo accessor functions                         ---*/
 /*------------------------------------------------------------*/
diff --git a/coregrind/m_debuginfo/misc.c b/coregrind/m_debuginfo/misc.c
index cd31ae9..85b411d 100644
--- a/coregrind/m_debuginfo/misc.c
+++ b/coregrind/m_debuginfo/misc.c
@@ -42,10 +42,10 @@
 #include "priv_misc.h"            /* self */
 
 
-void* ML_(dinfo_zalloc) ( SizeT szB ) {
+void* ML_(dinfo_zalloc) ( HChar* cc, SizeT szB ) {
    void* v;
    vg_assert(szB > 0);
-   v = VG_(arena_malloc)( VG_AR_DINFO, szB );
+   v = VG_(arena_malloc)( VG_AR_DINFO, cc, szB );
    vg_assert(v);
    VG_(memset)(v, 0, szB);
    return v;
@@ -55,8 +55,15 @@
    VG_(arena_free)( VG_AR_DINFO, v );
 }
 
-UChar* ML_(dinfo_strdup) ( const UChar* str ) {
-   return VG_(arena_strdup)( VG_AR_DINFO, str );
+UChar* ML_(dinfo_strdup) ( HChar* cc, const UChar* str ) {
+   return VG_(arena_strdup)( VG_AR_DINFO, cc, str );
+}
+
+UChar* ML_(dinfo_memdup)( HChar* cc, UChar* str, SizeT nStr ) {
+   UChar* dst = VG_(arena_malloc)( VG_AR_DINFO, cc, nStr );
+   tl_assert(dst);
+   VG_(memcpy)(dst, str, nStr);
+   return dst;
 }
 
 
diff --git a/coregrind/m_debuginfo/priv_d3basics.h b/coregrind/m_debuginfo/priv_d3basics.h
index 3d3cb48..1113bf0 100644
--- a/coregrind/m_debuginfo/priv_d3basics.h
+++ b/coregrind/m_debuginfo/priv_d3basics.h
@@ -635,6 +635,15 @@
                                      Addr data_bias,
                                      Bool push_initial_zero );
 
+/* Evaluate a very simple Guarded (DWARF3) expression.  The expression
+   is expected to denote a constant, with no reference to any
+   registers nor to any frame base expression.  GXR_Failure is
+   returned if there is more than one guard, or none, a register
+   location is denoted, a frame base expression is required, or the
+   expression is not manifestly a constant.  The range of addresses
+   covered by the guard is also ignored. */
+GXResult ML_(evaluate_trivial_GX)( GExpr* gx, Addr data_bias );
+
 #endif /* ndef __PRIV_D3BASICS_H */
 
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/m_debuginfo/priv_misc.h b/coregrind/m_debuginfo/priv_misc.h
index a9c919e..caf6db4 100644
--- a/coregrind/m_debuginfo/priv_misc.h
+++ b/coregrind/m_debuginfo/priv_misc.h
@@ -37,10 +37,15 @@
 #define __PRIV_MISC_H
 
 
-/* Allocate(zeroed), free, strdup, all in VG_AR_DINFO. */
-void*  ML_(dinfo_zalloc)( SizeT szB );
+/* Allocate(zeroed), free, strdup, memdup, all in VG_AR_DINFO. */
+void*  ML_(dinfo_zalloc)( HChar* cc, SizeT szB );
 void   ML_(dinfo_free)( void* v );
-UChar* ML_(dinfo_strdup)( const UChar* str );
+UChar* ML_(dinfo_strdup)( HChar* cc, const UChar* str );
+UChar* ML_(dinfo_memdup)( HChar* cc, UChar* str, SizeT nStr );
+
+/* A handy type, a la Haskell's Maybe type.  Yes, I know, C sucks.
+   Been there.  Done that.  Seen the movie.  Got the T-shirt.  Etc. */
+typedef struct { UWord w; Bool b; } MaybeUWord;
 
 
 #endif /* ndef __PRIV_MISC_H */
diff --git a/coregrind/m_debuginfo/priv_storage.h b/coregrind/m_debuginfo/priv_storage.h
index 04799a6..fedd90c 100644
--- a/coregrind/m_debuginfo/priv_storage.h
+++ b/coregrind/m_debuginfo/priv_storage.h
@@ -221,7 +221,7 @@
 typedef
    struct {
       UChar* name;  /* in DebugInfo.strchunks */
-      Type*  type;  /* on DebugInfo.admin list */
+      UWord  typeR; /* a cuOff */
       GExpr* gexpr; /* on DebugInfo.gexprs list */
       GExpr* fbGX;  /* SHARED. */
       UChar* fileName; /* where declared; may be NULL. in
@@ -248,6 +248,16 @@
    struct _DebugInfo* next;   /* list of DebugInfos */
    Bool               mark;   /* marked for deletion? */
 
+   /* An abstract handle, which can be used by entities outside of
+      m_debuginfo to (in an abstract datatype sense) refer to this
+      struct _DebugInfo.  A .handle of zero is invalid; valid handles
+      are 1 and above.  The same handle is never issued twice (in any
+      given run of Valgrind), so a handle becomes invalid when the
+      associated struct _DebugInfo is discarded, and remains invalid
+      forever thereafter.  The .handle field is set as soon as this
+      structure is allocated. */
+   ULong handle;
+
    /* Used for debugging only - indicate what stuff to dump whilst
       reading stuff into the seginfo.  Are computed as early in the
       lifetime of the DebugInfo as possible -- at the point when it is
@@ -399,16 +409,18 @@
    */
    XArray* /* of OSet of DiAddrRange */varinfo;
 
-   /* These are lists of the relevant typed objects, held here
-      expressly for the purposes of visiting each object exactly once
+   /* These are arrays of the relevant typed objects, held here
+      partially for the purposes of visiting each object exactly once
       when we need to delete them. */
 
-   /* An array of TyAdmin structs, and the payloads that they refer
-      to. */
-   XArray* /* of TyAdmin */ admin_tyadmins;
+   /* An array of TyEnts.  These are needed to make sense of any types
+      in the .varinfo.  Also, when deleting this DebugInfo, we must
+      first traverse this array and throw away malloc'd stuff hanging
+      off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
+   XArray* /* of TyEnt */ admin_tyents;
 
-   /* A list of guarded DWARF3 expressions. */
-   GExpr*   admin_gexprs;
+   /* An array of guarded DWARF3 expressions. */
+   XArray* admin_gexprs;
 };
 
 /* --------------------- functions --------------------- */
@@ -437,7 +449,7 @@
                          Addr   aMin,
                          Addr   aMax,
                          UChar* name,
-                         Type*  type,
+                         UWord  typeR, /* a cuOff */
                          GExpr* gexpr,
                          GExpr* fbGX, /* SHARED. */
                          UChar* fileName, /* where decl'd - may be NULL */
diff --git a/coregrind/m_debuginfo/priv_tytypes.h b/coregrind/m_debuginfo/priv_tytypes.h
index dcc6f40..598ead6 100644
--- a/coregrind/m_debuginfo/priv_tytypes.h
+++ b/coregrind/m_debuginfo/priv_tytypes.h
@@ -36,134 +36,174 @@
 #ifndef __PRIV_TYTYPES_H
 #define __PRIV_TYTYPES_H
 
-typedef  struct _TyAdmin   TyAdmin;
-typedef  struct _TyAtom    TyAtom;
-typedef  struct _TyField   TyField;
-typedef  struct _TyBounds  TyBounds;
-typedef  struct _D3Expr    D3Expr;
-typedef  struct _Type      Type;
-
-#define TyBounds_MAGIC 0x0d573990UL
-
 typedef
-   enum { TyA_Atom=10, TyA_Field, TyA_Bounds, TyA_Expr, TyA_Type } 
-   TyAdminTag;
+   enum {
+      Te_EMPTY=10, /* empty (contains no info) */
+      Te_INDIR,    /* indirection to some other TyEnt */
+      Te_UNKNOWN,  /* denotes a unknown type/field/whatever */
+      Te_Atom,     /* name & 64-bit const, iow, enumeration member */
+      Te_Field,    /* struct/class field defn */
+      Te_Bound,    /* array bounds indication, for one dimension */
+      Te_TyBase,   /* base type */
+      Te_TyPorR,   /* pointer or reference type */
+      Te_TyTyDef,  /* a renaming of some other type */
+      Te_TyStOrUn, /* structure or union type */
+      Te_TyEnum,   /* an enum type */
+      Te_TyArray,  /* an array type */
+      Te_TyFn,     /* function type */
+      Te_TyQual,   /* qualified type */
+      Te_TyVoid    /* void type */
+   }
+   TyEntTag;
 
-struct _TyAdmin {
-   UWord      cuOff;
-   void*      payload;
-   TyAdminTag tag;
-};
+/* Fields ending in "R" are references to other TyEnts.  Fields ending
+   in "Rs" are XArray*s of references to other TyEnts. */
+typedef
+   struct {
+      UWord    cuOff;
+      TyEntTag tag;
+      union {
+         struct {
+         } EMPTY;
+         struct {
+            UWord indR;
+         } INDIR;
+         struct {
+         } UNKNOWN;
+         struct {
+            UChar* name; /* in mallocville */
+            Long   value;
+         } Atom;
+         struct {
+            UChar* name;  /* in mallocville */
+            UWord  typeR; /* should be Te_TyXXXX */
+            UChar* loc;   /* location expr, in mallocville */
+            UWord  nLoc;  /* number of bytes in .loc */
+            Bool   isStruct;
+         } Field;
+         struct {
+            Bool knownL;
+            Bool knownU;
+            Long boundL;
+            Long boundU;
+         } Bound;
+         struct {
+            UChar* name; /* in mallocville */
+            Int    szB;
+            UChar  enc; /* S:signed U:unsigned F:floating C:complex float */
+         } TyBase;
+         struct {
+            Int   szB;
+            UWord typeR;
+            Bool  isPtr;
+         } TyPorR;
+         struct {
+            UChar* name;  /* in mallocville */
+            UWord  typeR; /* MAY BE D3_INVALID_CUOFF, denoting unknown */
+         } TyTyDef;
+         struct {
+            UChar*  name; /* in mallocville */
+            UWord   szB;
+            XArray* /* of UWord */ fieldRs;
+            Bool    complete;
+            Bool    isStruct;
+         } TyStOrUn;
+         struct {
+            UChar*  name; /* in mallocville */
+            Int     szB;
+            XArray* /* of UWord */ atomRs;
+         } TyEnum;
+         struct {
+            UWord   typeR;
+            XArray* /* of UWord */ boundRs;
+         } TyArray;
+         struct {
+         } TyFn;
+         struct {
+            UChar qual; /* C:const V:volatile */
+            UWord typeR;
+         } TyQual;
+         struct {
+            Bool isFake; /* True == introduced by the reader */
+         } TyVoid;
+      } Te;
+   }
+   TyEnt;
 
-/* an enumeration value */
-struct _TyAtom {
-   UChar* name; /* in DebugInfo.strchunks */
-   Long   value;
-};
+/* Does this TyEnt denote a type, as opposed to some other kind of
+   thing? */
+Bool ML_(TyEnt__is_type)( TyEnt* );
 
-struct _TyField {
-   UChar*  name; /* in DebugInfo.strchunks */
-   Type*   typeR;
-   D3Expr* loc;
-   Bool    isStruct;
-};
+/* Print a TyEnt, debug-style. */
+void ML_(pp_TyEnt)( TyEnt* );
 
-struct _TyBounds {
-   UInt magic;
-   Bool knownL;
-   Bool knownU;
-   Long boundL;
-   Long boundU;
-};
+/* Print a whole XArray of TyEnts, debug-style */
+void ML_(pp_TyEnts)( XArray* tyents, HChar* who );
 
-struct _D3Expr {
-   UChar* bytes; /* in DebugInfo.strchunks */
-   UWord  nbytes;
-};
+/* Print a TyEnt, C style, chasing stuff as necessary. */
+void ML_(pp_TyEnt_C_ishly)( XArray* /* of TyEnt */ tyents,
+                            UWord cuOff );
 
-struct _Type {
-   enum { Ty_Base=30, Ty_PorR, Ty_TyDef, Ty_StOrUn, 
-          Ty_Enum, Ty_Array, Ty_Fn, Ty_Qual, Ty_Void } tag;
-   union {
-      struct {
-         UChar* name; /* in DebugInfo.strchunks */
-         Int    szB;
-         UChar  enc; /* S:signed U:unsigned F:floating C:complex float */
-      } Base;
-      struct {
-         Int   szB;
-         Type* typeR;
-         Bool  isPtr;
-      } PorR;
-      struct {
-         UChar* name;  /* in DebugInfo.strchunks */
-         Type*  typeR; /* MAY BE NULL, denoting unknown */
-      } TyDef;
-      struct {
-         UChar*  name; /* in DebugInfo.strchunks */
-         UWord   szB;
-         XArray* /* of TyField* */ fields;
-         Bool    complete;
-         Bool    isStruct;
-      } StOrUn;
-      struct {
-         UChar*  name; /* in DebugInfo.strchunks */
-         Int     szB;
-         XArray* /* of TyAtom* */ atomRs;
-      } Enum;
-      struct {
-         Type*   typeR;
-         XArray* /* of TyBounds* */ bounds;
-      } Array;
-      struct {
-      } Fn;
-      struct {
-         UChar qual; /* C:const V:volatile */
-         Type* typeR;
-      } Qual;
-      struct {
-         Bool isFake; /* True == introduced by the reader */
-      } Void;
-   } Ty;
-};
+/* Generates a total ordering on TyEnts based only on their .cuOff
+   fields. */
+Word ML_(TyEnt__cmp_by_cuOff_only) ( TyEnt* te1, TyEnt* te2 );
 
-TyAdmin*  ML_(new_TyAdmin)  ( UWord cuOff );
-TyAtom*   ML_(new_TyAtom)   ( UChar* name, Long value );
-TyField*  ML_(new_TyField)  ( UChar* name, Type* typeR, D3Expr* loc );
-TyBounds* ML_(new_TyBounds) ( void );
-Type*     ML_(new_Type)     ( void );
-D3Expr*   ML_(new_D3Expr)   ( UChar* bytes, UWord nbytes );
+/* Generates a total ordering on TyEnts based on everything except
+   their .cuOff fields. */
+Word ML_(TyEnt__cmp_by_all_except_cuOff) ( TyEnt* te1, TyEnt* te2 );
 
-/* Delete the payload attached to this TyAdmin, but not the TyAdmin
-   itself. */
-void ML_(delete_payload_of_TyAdmin) ( TyAdmin* );
+/* Free up all directly or indirectly heap-allocated stuff attached to
+   this TyEnt, and set its tag to Te_EMPTY.  The .cuOff field is
+   unchanged. */
+void ML_(TyEnt__make_EMPTY) ( TyEnt* te );
 
-void ML_(pp_TyAdmin)  ( TyAdmin* admin );
-void ML_(pp_TyAtom)   ( TyAtom* atom );
-void ML_(pp_TyField)  ( TyField* field );
-void ML_(pp_TyBounds) ( TyBounds* bounds );
-void ML_(pp_Type)     ( Type* ty );
-void ML_(pp_D3Expr)   ( D3Expr* expr );
+/* How big is this type?  If .b in the returned struct is False, the
+   size is unknown. */
 
-/* NOTE: this assumes that the types have all been 'resolved' (that
-   is, inter-type references expressed as .debug_info offsets have
-   been converted into pointers) */
-void ML_(pp_Type_C_ishly) ( Type* ty );
-
-/* How big is this type?  (post-resolved only)  If .b in the
-   returned struct is False, the size is unknown. */
-/* FIXME: check all pointers before dereferencing */
-
-typedef struct { UWord w; Bool b; } MaybeUWord;
-
-MaybeUWord ML_(sizeOfType)( Type* ty );
+MaybeUWord ML_(sizeOfType)( XArray* /* of TyEnt */ tyents,
+                            UWord cuOff );
 
 /* Describe where in the type 'offset' falls.  Caller must
    deallocate the resulting XArray. */
 XArray* /*UChar*/ ML_(describe_type)( /*OUT*/OffT* residual_offset,
-                                      Type* ty, OffT offset );
+                                      XArray* /* of TyEnt */ tyents,
+                                      UWord ty_cuOff, 
+                                      OffT offset );
 
 
+/* A fast-lookup cache for ML_(TyEnts__index_by_cuOff).  Nothing
+   particularly surprising here; it's 2 way set associative, with some
+   number of ways, doesn't particularly have to be a power of 2.  In
+   order to have a way to indicate an invalid entry, we set the second
+   value of the pair to NULL, and keep checking for it, since
+   unfortunately there's no obvious cuOff number that we could put in
+   the first word of the pair that could indicate an invalid entry.
+
+   4096 arrived at as the best value for an E6600 loading Qt-4.4.1
+   Designer and all associated libraries, compiled by gcc-4.3.1, 
+   -g -O, 64-bit, which is at least a moderately good stress test,
+   with the largest library being about 150MB.*/
+
+#define N_TYENT_INDEX_CACHE 4096
+
+typedef
+   struct {
+      struct { UWord cuOff0; TyEnt* ent0; 
+               UWord cuOff1; TyEnt* ent1; }
+         ce[N_TYENT_INDEX_CACHE];
+   }
+   TyEntIndexCache;
+
+void ML_(TyEntIndexCache__invalidate) ( TyEntIndexCache* cache );
+
+/* 'ents' is an XArray of TyEnts, sorted by their .cuOff fields.  Find
+   the entry which has .cuOff field as specified.  Returns NULL if not
+   found.  Asserts if more than one entry has the specified .cuOff
+   value. */
+TyEnt* ML_(TyEnts__index_by_cuOff) ( XArray* /* of TyEnt */ ents,
+                                     TyEntIndexCache* cache,
+                                     UWord cuOff_to_find );
+
 #endif /* ndef __PRIV_TYTYPES_H */
 
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/m_debuginfo/readdwarf.c b/coregrind/m_debuginfo/readdwarf.c
index 34baf86..de050c7 100644
--- a/coregrind/m_debuginfo/readdwarf.c
+++ b/coregrind/m_debuginfo/readdwarf.c
@@ -99,7 +99,7 @@
       vg_assert( (wa->tab_size == 0 && wa->tab == NULL)
                  || (wa->tab_size != 0 && wa->tab != NULL) );
       new_size = wa->tab_size == 0 ? 8 : 2 * wa->tab_size;
-      new_tab  = ML_(dinfo_zalloc)(new_size * sizeof(Word));
+      new_tab  = ML_(dinfo_zalloc)("di.aWA.1", new_size * sizeof(Word));
       vg_assert(new_tab != NULL);
       for (i = 0; i < wa->tab_used; i++)
          new_tab[i] = wa->tab[i];
@@ -2040,7 +2040,7 @@
       src = ctx->exprs;
       dst = debuginfo->cfsi_exprs;
       if (src && (VG_(sizeXA)(src) > 0) && (!dst)) {
-         dst = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
+         dst = VG_(newXA)( ML_(dinfo_zalloc), "di.ccCt.1", ML_(dinfo_free),
                            sizeof(CfiExpr) );
          vg_assert(dst);
          debuginfo->cfsi_exprs = dst;
@@ -2083,6 +2083,7 @@
          dst = debuginfo->cfsi_exprs;                         \
          if (src && (VG_(sizeXA)(src) > 0) && (!dst)) {       \
             dst = VG_(newXA)( ML_(dinfo_zalloc),              \
+                              "di.ccCt.2",                    \
                               ML_(dinfo_free),                \
                               sizeof(CfiExpr) );              \
             vg_assert(dst);                                   \
@@ -3739,7 +3740,8 @@
          ctx.data_a_f = the_CIEs[cie].data_a_f;
          ctx.initloc  = fde_initloc;
          ctx.ra_reg   = the_CIEs[cie].ra_reg;
-         ctx.exprs    = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), 
+         ctx.exprs    = VG_(newXA)( ML_(dinfo_zalloc), "di.rcid.1",
+                                    ML_(dinfo_free), 
                                     sizeof(CfiExpr) );
          vg_assert(ctx.exprs);
 
diff --git a/coregrind/m_debuginfo/readdwarf3.c b/coregrind/m_debuginfo/readdwarf3.c
index da4e8bc..84248b2 100644
--- a/coregrind/m_debuginfo/readdwarf3.c
+++ b/coregrind/m_debuginfo/readdwarf3.c
@@ -57,6 +57,11 @@
    merely zero when not explicitly stated.  So we too have to make
    that assumption.
 
+   POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
+   unitary_range_list() bias the resulting range list in the same way
+   that its more general cousin, get_range_list(), does?  I don't
+   know.
+
    TODO, 2008 Feb 17:
 
    get rid of cu_svma_known and document the assumed-zero svma hack.
@@ -93,15 +98,19 @@
 
    POTENTIAL PERFORMANCE IMPROVEMENTS:
 
-   The number of type entities that end up in the list of TyAdmins
-   rapidly becomes huge (eg, for libQtGui.so.4.3.2 (amd64-linux, size
-   80729047 bytes), there are 786860 entries in the list).  Mostly
-   this seems to be caused by g++ adding type DIEs for all the basic
-   types once for each source file contributing to the compilation
-   unit, and for a large library they add up quickly.  That causes
-   both a lot of work for this reader module, and also wastes vast
-   amounts of memory storing this duplicated information.  We could
-   surely do a lot better here.
+   Currently, duplicate removal and all other queries for the type
+   entities array is done using cuOffset-based pointing, which
+   involves a binary search (VG_(lookupXA)) for each access.  This is
+   wildly inefficient, although simple.  It would be better to
+   translate all the cuOffset-based references (iow, all the "R" and
+   "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
+   'tyents' right at the start of dedup_types(), and use direct
+   indexing (VG_(indexXA)) wherever possible after that.
+
+   cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
+   VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
+   points, and possibly also make an _UNCHECKED version which skips
+   the range checks in performance-critical situations such as this.
 
    Handle interaction between read_DIE and parse_{var,type}_DIE
    better.  Currently read_DIE reads the entire DIE just to find where
@@ -129,6 +138,7 @@
 #include "pub_core_libcprint.h"
 #include "pub_core_options.h"
 #include "pub_core_xarray.h"
+#include "pub_core_wordfm.h"
 #include "priv_misc.h"             /* dinfo_zalloc/free */
 #include "priv_tytypes.h"
 #include "priv_d3basics.h"
@@ -145,8 +155,8 @@
 #define TRACE_D3(format, args...) \
    if (td3) { VG_(printf)(format, ## args); }
 
-#define D3_INVALID_CUOFF  ((void*)(-1UL))
-#define D3_FAKEVOID_CUOFF ((void*)(-2UL))
+#define D3_INVALID_CUOFF  ((UWord)(-1UL))
+#define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
 
 typedef
    struct {
@@ -451,32 +461,9 @@
    location list.  Zero length ranges, with aMax == aMin-1, are not
    allowed.
 */
-void ML_(pp_GX) ( GExpr* gx ) {
-   Addr   aMin, aMax;
-   UChar  uc;
-   UShort nbytes;
-   UChar* p = &gx->payload[0];
-   uc = *p++;
-   VG_(printf)("GX(%s){", uc == 0 ? "final" : "Breqd" );
-   vg_assert(uc == 0 || uc == 1);
-   while (True) {
-      uc = *p++;
-      if (uc == 1)
-         break; /*isEnd*/
-      vg_assert(uc == 0);
-      aMin   = * (Addr*)p;  p += sizeof(Addr);
-      aMax   = * (Addr*)p;  p += sizeof(Addr);
-      nbytes = * (UShort*)p; p += sizeof(UShort);
-      VG_(printf)("[%#lx,%#lx]=", aMin, aMax);
-      while (nbytes > 0) {
-         VG_(printf)("%02x", (UInt)*p++);
-         nbytes--;
-      }
-      if (*p == 0)
-         VG_(printf)(",");
-   }
-   VG_(printf)("}");
-}
+/* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
+   it more logically belongs. */
+
 
 /* "Comment_Regarding_DWARF3_Text_Biasing" (is referred to elsewhere)
     -----------------------------------------------------------------
@@ -613,7 +600,8 @@
         + sizeof(UShort) /*nbytes*/    + nbytes
         + sizeof(UChar); /*isEnd*/
 
-   gx = ML_(dinfo_zalloc)( sizeof(GExpr) + bytesReqd );
+   gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1", 
+                           sizeof(GExpr) + bytesReqd );
    vg_assert(gx);
 
    p = pstart = &gx->payload[0];
@@ -658,7 +646,8 @@
             debug_loc_offset, get_address_of_Cursor( &loc ) );
 
    /* Who frees this xa?  It is freed before this fn exits. */
-   xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
+   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1", 
+                    ML_(dinfo_free),
                     sizeof(UChar) );
 
    { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
@@ -728,7 +717,7 @@
    nbytes = VG_(sizeXA)( xa );
    vg_assert(nbytes >= 1);
 
-   gx = ML_(dinfo_zalloc)( sizeof(GExpr) + nbytes );
+   gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
    vg_assert(gx);
    VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
    vg_assert( &gx->payload[nbytes] 
@@ -756,24 +745,49 @@
    AddrRange;
 
 
+/* Generate an arbitrary structural total ordering on
+   XArray* of AddrRange. */
+static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 )
+{
+   Word n1, n2, i;
+   tl_assert(rngs1 && rngs2);
+   n1 = VG_(sizeXA)( rngs1 );  
+   n2 = VG_(sizeXA)( rngs2 );
+   if (n1 < n2) return -1;
+   if (n1 > n2) return 1;
+   for (i = 0; i < n1; i++) {
+      AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
+      AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
+      if (rng1->aMin < rng2->aMin) return -1;
+      if (rng1->aMin > rng2->aMin) return 1;
+      if (rng1->aMax < rng2->aMax) return -1;
+      if (rng1->aMax > rng2->aMax) return 1;
+   }
+   return 0;
+}
+
+
 __attribute__((noinline))
 static XArray* /* of AddrRange */ empty_range_list ( void )
 {
    XArray* xa; /* XArray of AddrRange */
    /* Who frees this xa?  varstack_preen() does. */
-   xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
+   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
+                    ML_(dinfo_free),
                     sizeof(AddrRange) );
    return xa;
 }
 
 
+__attribute__((noinline))
 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
 {
    XArray*   xa;
    AddrRange pair;
    vg_assert(aMin <= aMax);
    /* Who frees this xa?  varstack_preen() does. */
-   xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
+   xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
+                    ML_(dinfo_free),
                     sizeof(AddrRange) );
    pair.aMin = aMin;
    pair.aMax = aMax;
@@ -808,7 +822,7 @@
    set_position_of_Cursor( &ranges, debug_ranges_offset );
 
    /* Who frees this xa?  varstack_preen() does. */
-   xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
+   xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
                     sizeof(AddrRange) );
    base = 0;
    while (True) {
@@ -1143,10 +1157,14 @@
       UWord   nRanges;
       Addr    rngOneMin;
       Addr    rngOneMax;
-      XArray* rngMany; /* of AddrRange.  UNIQUE PTR in AR_DINFO. */
+      XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
+      /* Do not free .rngMany, since many TempVars will have the same
+         value.  Instead the associated storage is to be freed by
+         deleting 'rangetree', which stores a single copy of each
+         range. */
       /* --- */
       Int     level;
-      Type*   typeR;
+      UWord   typeR; /* a cuOff */
       GExpr*  gexpr; /* for this variable */
       GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
                         any */
@@ -1383,16 +1401,19 @@
 
 
 __attribute__((noinline))
-static void parse_var_DIE ( /*MOD*/XArray* /* of TempVar* */ tempvars,
-                            /*MOD*/XArray* /* of GExpr* */ gexprs,
-                            /*MOD*/D3VarParser* parser,
-                            DW_TAG dtag,
-                            UWord posn,
-                            Int level,
-                            Cursor* c_die,
-                            Cursor* c_abbv,
-                            CUConst* cc,
-                            Bool td3 )
+static void parse_var_DIE (
+   /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
+   /*MOD*/XArray* /* of TempVar* */ tempvars,
+   /*MOD*/XArray* /* of GExpr* */ gexprs,
+   /*MOD*/D3VarParser* parser,
+   DW_TAG dtag,
+   UWord posn,
+   Int level,
+   Cursor* c_die,
+   Cursor* c_abbv,
+   CUConst* cc,
+   Bool td3
+)
 {
    ULong       cts;
    Int         ctsSzB;
@@ -1592,7 +1613,7 @@
 
    if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
       UChar* name        = NULL;
-      Type*  typeR       = D3_INVALID_CUOFF;
+      UWord  typeR       = D3_INVALID_CUOFF;
       Bool   external    = False;
       GExpr* gexpr       = NULL;
       Int    n_attrs     = 0;
@@ -1618,7 +1639,7 @@
             VG_(addToXA)(gexprs, &gexpr);
          }
          if (attr == DW_AT_type && ctsSzB > 0) {
-            typeR = (Type*)(UWord)cts;
+            typeR = (UWord)cts;
          }
          if (attr == DW_AT_external && ctsSzB > 0 && cts > 0) {
             external = True;
@@ -1648,7 +1669,7 @@
          (1) has location and type    -> completed
          (2) has type only            -> is an abstract instance
          (3) has location and abs_ori -> is a concrete instance
-         Name, filename and line number are all option frills.
+         Name, filename and line number are all optional frills.
       */
       if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF) 
            /* 2 */ || (typeR != D3_INVALID_CUOFF)
@@ -1714,7 +1735,7 @@
          nRanges = VG_(sizeXA)(xa);
          vg_assert(nRanges >= 0);
 
-         tv = ML_(dinfo_zalloc)( sizeof(TempVar) );
+         tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
          tv->name   = name;
          tv->level  = external ? 0 : parser->sp;
          tv->typeR  = typeR;
@@ -1737,7 +1758,20 @@
             tv->rngOneMax = range->aMax;
          }
          else if (nRanges > 1) {
-            tv->rngMany = VG_(cloneXA)( xa ); /* free when 'tv' freed */
+            /* See if we already have a range list which is
+               structurally identical.  If so, use that; if not, clone
+               this one, and add it to our collection. */
+            UWord keyW, valW;
+            if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
+               XArray* old = (XArray*)keyW;
+               tl_assert(valW == 0);
+               tl_assert(old != xa);
+               tv->rngMany = old;
+            } else {
+               XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
+               tv->rngMany = cloned;
+               VG_(addToFM)( rangestree, (UWord)cloned, 0 );
+            }
          }
 
          VG_(addToXA)( tempvars, &tv );
@@ -1747,11 +1781,11 @@
          /* collect stats on how effective the ->ranges special
             casing is */
          if (0) {
-           static Int ntot=0, ngt=0;
-           ntot++;
-           if (tv->rngMany) ngt++;
-           if (0 == (ntot % 100000))
-              VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
+            static Int ntot=0, ngt=0;
+            ntot++;
+            if (tv->rngMany) ngt++;
+            if (0 == (ntot % 100000))
+               VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
          }
 
       }
@@ -1870,7 +1904,12 @@
       /* A stack of types which are currently under construction */
       Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
                    stack */
-      Type* qparent[N_D3_TYPE_STACK];
+      /* Note that the TyEnts in qparentE are temporary copies of the
+         ones accumulating in the main tyent array.  So it is not safe
+         to free up anything on them when popping them off the stack
+         (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
+         memset them to zero when done. */
+      TyEnt qparentE[N_D3_TYPE_STACK]; /* parent TyEnts */
       Int   qlevel[N_D3_TYPE_STACK];
 
    }
@@ -1881,7 +1920,7 @@
    VG_(printf)("  typestack (%s) {\n", str);
    for (i = 0; i <= parser->sp; i++) {
       VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
-      ML_(pp_Type)( parser->qparent[i] );
+      ML_(pp_TyEnt)( &parser->qparentE[i] );
       VG_(printf)("\n");
    }
    VG_(printf)("  }\n");
@@ -1899,9 +1938,11 @@
       if (parser->qlevel[parser->sp] <= level) break;
       if (0) 
          TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
-      vg_assert(parser->qparent[parser->sp]);
-      parser->qparent[parser->sp] = NULL;
-      parser->qlevel[parser->sp]  = 0;
+      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
+      VG_(memset)(&parser->qparentE[parser->sp], 0, sizeof(TyEnt));
+      parser->qparentE[parser->sp].cuOff = D3_INVALID_CUOFF;
+      parser->qparentE[parser->sp].tag = Te_EMPTY;
+      parser->qlevel[parser->sp] = 0;
       parser->sp--;
       changed = True;
    }
@@ -1917,10 +1958,10 @@
 static void typestack_push ( CUConst* cc,
                              D3TypeParser* parser,
                              Bool td3,
-                             Type* parent, Int level ) {
+                             TyEnt* parentE, Int level ) {
    if (0)
-   TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %p\n",
-            parser->sp+1, level, parent);
+   TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
+            parser->sp+1, level, parentE->cuOff);
 
    /* First we need to zap everything >= 'level', as we are about to
       replace any previous entry at 'level', so .. */
@@ -1934,10 +1975,12 @@
    if (parser->sp >= 0)
       vg_assert(parser->qlevel[parser->sp] < level);
    parser->sp++;
-   vg_assert(parser->qparent[parser->sp] == NULL);
+   vg_assert(parser->qparentE[parser->sp].tag == Te_EMPTY);
    vg_assert(parser->qlevel[parser->sp]  == 0);
-   vg_assert(parent != NULL);
-   parser->qparent[parser->sp] = parent;
+   vg_assert(parentE);
+   vg_assert(ML_(TyEnt__is_type)(parentE));
+   vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
+   parser->qparentE[parser->sp] = *parentE;
    parser->qlevel[parser->sp]  = level;
    if (td3)
       typestack_show( parser, "after push" );
@@ -1952,9 +1995,27 @@
 
    We may find the DIE uninteresting, in which case we should ignore
    it.
+
+   What happens: the DIE is examined.  If uninteresting, it is ignored.
+   Otherwise, the DIE gives rise to two things:
+
+   (1) the offset of this DIE in the CU -- the cuOffset, a UWord
+   (2) a TyAdmin structure, which holds the type, or related stuff
+
+   (2) is added at the end of 'tyadmins', at some index, say 'i'.
+
+   A pair (cuOffset, i) is added to 'tydict'.
+
+   Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
+   a mapping from cuOffset to the index of the corresponding entry in
+   'tyadmin'.
+
+   When resolving a cuOffset to a TyAdmin, first look up the cuOffset
+   in the tydict (by binary search).  This gives an index into
+   tyadmins, and the required entity lives in tyadmins at that index.
 */
 __attribute__((noinline))
-static void parse_type_DIE ( /*MOD*/XArray* /* of TyAdmin */ admin,
+static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
                              /*MOD*/D3TypeParser* parser,
                              DW_TAG dtag,
                              UWord posn,
@@ -1964,19 +2025,22 @@
                              CUConst* cc,
                              Bool td3 )
 {
-   ULong     cts;
-   Int       ctsSzB;
-   UWord     ctsMemSzB;
-   Type*     type   = NULL;
-   TyAtom*   atom   = NULL;
-   TyField*  field  = NULL;
-   D3Expr*   expr   = NULL;
-   TyBounds* bounds = NULL;
-   TyAdmin   tyad;
+   ULong cts;
+   Int   ctsSzB;
+   UWord ctsMemSzB;
+   TyEnt typeE;
+   TyEnt atomE;
+   TyEnt fieldE;
+   TyEnt boundE;
 
    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
    UWord saved_abbv_c_offset = get_position_of_Cursor( c_abbv );
 
+   VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
+   VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
+   VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
+   VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
+
    /* If we've returned to a level at or above any previously noted
       parent, un-note it, so we don't believe we're still collecting
       its children. */
@@ -2020,8 +2084,9 @@
 
    if (dtag == DW_TAG_base_type) {
       /* We can pick up a new base type any time. */
-      type = ML_(new_Type)();
-      type->tag = Ty_Base;
+      VG_(memset)(&typeE, 0, sizeof(typeE));
+      typeE.cuOff = D3_INVALID_CUOFF;
+      typeE.tag   = Te_TyBase;
       while (True) {
          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
@@ -2029,23 +2094,24 @@
          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
                             cc, c_die, False/*td3*/, form );
          if (attr == DW_AT_name && ctsMemSzB > 0) {
-            type->Ty.Base.name
-               = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
+            typeE.Te.TyBase.name
+               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.1",
+                                    (UChar*)(UWord)cts );
          }
          if (attr == DW_AT_byte_size && ctsSzB > 0) {
-            type->Ty.Base.szB = cts;
+            typeE.Te.TyBase.szB = cts;
          }
          if (attr == DW_AT_encoding && ctsSzB > 0) {
             switch (cts) {
                case DW_ATE_unsigned: case DW_ATE_unsigned_char:
                case DW_ATE_boolean:/* FIXME - is this correct? */
-                  type->Ty.Base.enc = 'U'; break;
+                  typeE.Te.TyBase.enc = 'U'; break;
                case DW_ATE_signed: case DW_ATE_signed_char:
-                  type->Ty.Base.enc = 'S'; break;
+                  typeE.Te.TyBase.enc = 'S'; break;
                case DW_ATE_float:
-                  type->Ty.Base.enc = 'F'; break;
+                  typeE.Te.TyBase.enc = 'F'; break;
                case DW_ATE_complex_float:
-                  type->Ty.Base.enc = 'C'; break;
+                  typeE.Te.TyBase.enc = 'C'; break;
                default:
                   goto bad_DIE;
             }
@@ -2054,21 +2120,22 @@
 
       /* Invent a name if it doesn't have one.  gcc-4.3
          -ftree-vectorize is observed to emit nameless base types. */
-      if (!type->Ty.Base.name)
-         type->Ty.Base.name 
-            = ML_(addStr)( cc->di, "<anon_base_type>", -1 );
+      if (!typeE.Te.TyBase.name)
+         typeE.Te.TyBase.name 
+            = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
+                                 "<anon_base_type>" );
 
       /* Do we have something that looks sane? */
       if (/* must have a name */
-          type->Ty.Base.name == NULL
+          typeE.Te.TyBase.name == NULL
           /* and a plausible size.  Yes, really 32: "complex long
              double" apparently has size=32 */
-          || type->Ty.Base.szB < 0 || type->Ty.Base.szB > 32
+          || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
           /* and a plausible encoding */
-          || (type->Ty.Base.enc != 'U'
-              && type->Ty.Base.enc != 'S' 
-              && type->Ty.Base.enc != 'F'
-              && type->Ty.Base.enc != 'C'))
+          || (typeE.Te.TyBase.enc != 'U'
+              && typeE.Te.TyBase.enc != 'S' 
+              && typeE.Te.TyBase.enc != 'F'
+              && typeE.Te.TyBase.enc != 'C'))
          goto bad_DIE;
       /* Last minute hack: if we see this
          <1><515>: DW_TAG_base_type
@@ -2076,12 +2143,13 @@
              DW_AT_encoding    : 5
              DW_AT_name        : void
          convert it into a real Void type. */
-      if (type->Ty.Base.szB == 0
-          && 0 == VG_(strcmp)("void", type->Ty.Base.name)) {
-         VG_(memset)(type, 0, sizeof(*type));
-         type->tag = Ty_Void;
-         type->Ty.Void.isFake = False; /* it's a real one! */
+      if (typeE.Te.TyBase.szB == 0
+          && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
+         ML_(TyEnt__make_EMPTY)(&typeE);
+         typeE.tag = Te_TyVoid;
+         typeE.Te.TyVoid.isFake = False; /* it's a real one! */
       }
+
       goto acquire_Type;
    }
 
@@ -2090,20 +2158,21 @@
       /* This seems legit for _pointer_type and _reference_type.  I
          don't know if rolling _ptr_to_member_type in here really is
          legit, but it's better than not handling it at all. */
-      type = ML_(new_Type)();
-      type->tag = Ty_PorR;
+      VG_(memset)(&typeE, 0, sizeof(typeE));
+      typeE.cuOff = D3_INVALID_CUOFF;
+      typeE.tag   = Te_TyPorR;
       /* target type defaults to void */
-      type->Ty.PorR.typeR = D3_FAKEVOID_CUOFF;
-      type->Ty.PorR.isPtr = dtag == DW_TAG_pointer_type
-                            || dtag == DW_TAG_ptr_to_member_type;
+      typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
+      typeE.Te.TyPorR.isPtr = dtag == DW_TAG_pointer_type
+                              || dtag == DW_TAG_ptr_to_member_type;
       /* Pointer types don't *have* to specify their size, in which
          case we assume it's a machine word.  But if they do specify
          it, it must be a machine word :-) This probably assumes that
          the word size of the Dwarf3 we're reading is the same size as
          that on the machine.  gcc appears to give a size whereas icc9
          doesn't. */
-      if (type->Ty.PorR.isPtr)
-         type->Ty.PorR.szB = sizeof(Word);
+      if (typeE.Te.TyPorR.isPtr)
+         typeE.Te.TyPorR.szB = sizeof(Word);
       while (True) {
          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
@@ -2111,14 +2180,14 @@
          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
                             cc, c_die, False/*td3*/, form );
          if (attr == DW_AT_byte_size && ctsSzB > 0) {
-            type->Ty.PorR.szB = cts;
+            typeE.Te.TyPorR.szB = cts;
          }
          if (attr == DW_AT_type && ctsSzB > 0) {
-            type->Ty.PorR.typeR = (Type*)(UWord)cts;
+            typeE.Te.TyPorR.typeR = (UWord)cts;
          }
       }
       /* Do we have something that looks sane? */
-      if (type->Ty.PorR.szB != sizeof(Word))
+      if (typeE.Te.TyPorR.szB != sizeof(Word))
          goto bad_DIE;
       else
          goto acquire_Type;
@@ -2126,12 +2195,13 @@
 
    if (dtag == DW_TAG_enumeration_type) {
       /* Create a new Type to hold the results. */
-      type = ML_(new_Type)();
-      type->tag = Ty_Enum;
-      type->Ty.Enum.name = NULL;
-      type->Ty.Enum.atomRs
-         = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
-                       sizeof(TyAtom*) );
+      VG_(memset)(&typeE, 0, sizeof(typeE));
+      typeE.cuOff = posn;
+      typeE.tag   = Te_TyEnum;
+      typeE.Te.TyEnum.atomRs
+         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1", 
+                       ML_(dinfo_free),
+                       sizeof(UWord) );
       while (True) {
          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
@@ -2139,25 +2209,28 @@
          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
                             cc, c_die, False/*td3*/, form );
          if (attr == DW_AT_name && ctsMemSzB > 0) {
-            type->Ty.Enum.name
-               = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
+            typeE.Te.TyEnum.name
+              = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.2",
+                                   (UChar*)(UWord)cts );
          }
          if (attr == DW_AT_byte_size && ctsSzB > 0) {
-            type->Ty.Enum.szB = cts;
+            typeE.Te.TyEnum.szB = cts;
          }
       }
       /* Do we have something that looks sane? */
-      if (type->Ty.Enum.szB == 0 /* we must know the size */
-          /* But the name can be present, or not */)
+      if (typeE.Te.TyEnum.szB == 0 /* we must know the size */
+         /* But the name can be present, or not */)
          goto bad_DIE;
       /* On't stack! */
-      typestack_push( cc, parser, td3, type, level );
+      typestack_push( cc, parser, td3, &typeE, level );
       goto acquire_Type;
    }
 
    if (dtag == DW_TAG_enumerator) {
       Bool have_value = False;
-      atom = ML_(new_TyAtom)( NULL, 0 );
+      VG_(memset)( &atomE, 0, sizeof(atomE) );
+      atomE.cuOff = posn;
+      atomE.tag   = Te_Atom;
       while (True) {
          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
@@ -2165,24 +2238,28 @@
          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
                             cc, c_die, False/*td3*/, form );
          if (attr == DW_AT_name && ctsMemSzB > 0) {
-            atom->name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
+            atomE.Te.Atom.name 
+              = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enumerator.1",
+                                   (UChar*)(UWord)cts );
          }
          if (attr == DW_AT_const_value && ctsSzB > 0) {
-            atom->value = cts;
+            atomE.Te.Atom.value = cts;
             have_value = True;
          }
       }
       /* Do we have something that looks sane? */
-      if ((!have_value) || atom->name == NULL)
+      if ((!have_value) || atomE.Te.Atom.name == NULL)
          goto bad_DIE;
       /* Do we have a plausible parent? */
       if (typestack_is_empty(parser)) goto bad_DIE;
-      vg_assert(parser->qparent[parser->sp]);
+      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
+      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
       if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
-      if (parser->qparent[parser->sp]->tag != Ty_Enum) goto bad_DIE;
+      if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto bad_DIE;
       /* Record this child in the parent */
-      vg_assert(parser->qparent[parser->sp]->Ty.Enum.atomRs);
-      VG_(addToXA)( parser->qparent[parser->sp]->Ty.Enum.atomRs, &atom );
+      vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
+      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
+                    &atomE );
       /* And record the child itself */
       goto acquire_Atom;
    }
@@ -2196,15 +2273,17 @@
       Bool is_decl  = False;
       Bool is_spec  = False;
       /* Create a new Type to hold the results. */
-      type = ML_(new_Type)();
-      type->tag = Ty_StOrUn;
-      type->Ty.StOrUn.name = NULL;
-      type->Ty.StOrUn.fields
-         = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
-                       sizeof(TyAtom*) );
-      type->Ty.StOrUn.complete = True;
-      type->Ty.StOrUn.isStruct = dtag == DW_TAG_structure_type 
-                                 || dtag == DW_TAG_class_type;
+      VG_(memset)(&typeE, 0, sizeof(typeE));
+      typeE.cuOff = posn;
+      typeE.tag   = Te_TyStOrUn;
+      typeE.Te.TyStOrUn.name = NULL;
+      typeE.Te.TyStOrUn.fieldRs
+         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1", 
+                       ML_(dinfo_free),
+                       sizeof(UWord) );
+      typeE.Te.TyStOrUn.complete = True;
+      typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type 
+                                   || dtag == DW_TAG_class_type;
       while (True) {
          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
@@ -2212,11 +2291,12 @@
          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
                             cc, c_die, False/*td3*/, form );
          if (attr == DW_AT_name && ctsMemSzB > 0) {
-            type->Ty.StOrUn.name
-               = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
+            typeE.Te.TyStOrUn.name
+               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.2",
+                                    (UChar*)(UWord)cts );
          }
          if (attr == DW_AT_byte_size && ctsSzB >= 0) {
-            type->Ty.StOrUn.szB = cts;
+            typeE.Te.TyStOrUn.szB = cts;
             have_szB = True;
          }
          if (attr == DW_AT_declaration && ctsSzB > 0 && cts > 0) {
@@ -2230,9 +2310,9 @@
       if (is_decl && (!is_spec)) {
          /* It's a DW_AT_declaration.  We require the name but
             nothing else. */
-         if (type->Ty.StOrUn.name == NULL)
+         if (typeE.Te.TyStOrUn.name == NULL)
             goto bad_DIE;
-         type->Ty.StOrUn.complete = False;
+         typeE.Te.TyStOrUn.complete = False;
          goto acquire_Type;
       }
       if ((!is_decl) /* && (!is_spec) */) {
@@ -2241,7 +2321,7 @@
              /* But the name can be present, or not */)
             goto bad_DIE;
          /* On't stack! */
-         typestack_push( cc, parser, td3, type, level );
+         typestack_push( cc, parser, td3, &typeE, level );
          goto acquire_Type;
       }
       else {
@@ -2256,9 +2336,10 @@
          members must have a DW_AT_data_member_location expression
          whereas union members must not. */
       Bool parent_is_struct;
-      field = ML_(new_TyField)( NULL, NULL, NULL );
-      field->typeR = D3_INVALID_CUOFF;
-      expr  = NULL;
+      VG_(memset)( &fieldE, 0, sizeof(fieldE) );
+      fieldE.cuOff = posn;
+      fieldE.tag   = Te_Field;
+      fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
       while (True) {
          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
@@ -2266,22 +2347,27 @@
          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
                             cc, c_die, False/*td3*/, form );
          if (attr == DW_AT_name && ctsMemSzB > 0) {
-            field->name = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
+            fieldE.Te.Field.name
+               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.1",
+                                    (UChar*)(UWord)cts );
          }
          if (attr == DW_AT_type && ctsSzB > 0) {
-            field->typeR = (Type*)(UWord)cts;
+            fieldE.Te.Field.typeR = (UWord)cts;
          }
          if (attr == DW_AT_data_member_location && ctsMemSzB > 0) {
-            UChar* copy = ML_(addStr)( cc->di, (UChar*)(UWord)cts, 
-                                               (Int)ctsMemSzB );
-            expr = ML_(new_D3Expr)( copy, (UWord)ctsMemSzB );
+            fieldE.Te.Field.nLoc = (UWord)ctsMemSzB;
+            fieldE.Te.Field.loc
+               = ML_(dinfo_memdup)( "di.readdwarf3.ptD.member.2",
+                                    (UChar*)(UWord)cts, 
+                                    (SizeT)fieldE.Te.Field.nLoc );
          }
       }
       /* Do we have a plausible parent? */
       if (typestack_is_empty(parser)) goto bad_DIE;
-      vg_assert(parser->qparent[parser->sp]);
+      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
+      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
       if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
-      if (parser->qparent[parser->sp]->tag != Ty_StOrUn) goto bad_DIE;
+      if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto bad_DIE;
       /* Do we have something that looks sane?  If this a member of a
          struct, we must have a location expression; but if a member
          of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
@@ -2289,37 +2375,42 @@
          observed to emit constant-zero expressions.  So just ignore
          them. */
       parent_is_struct
-         = parser->qparent[parser->sp]->Ty.StOrUn.isStruct;
-      if (!field->name)
-         field->name = ML_(addStr)(cc->di, "<anon_field>", -1);
-      if ((!field->name) || (field->typeR == D3_INVALID_CUOFF))
+         = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
+      if (!fieldE.Te.Field.name)
+         fieldE.Te.Field.name
+            = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
+                                 "<anon_field>" );
+      vg_assert(fieldE.Te.Field.name);
+      if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
          goto bad_DIE;
-      if (parent_is_struct && (!expr))
+      if (parent_is_struct && (!fieldE.Te.Field.loc))
          goto bad_DIE;
-      if ((!parent_is_struct) && expr) {
+      if ((!parent_is_struct) && fieldE.Te.Field.loc) {
          /* If this is a union type, pretend we haven't seen the data
             member location expression, as it is by definition
             redundant (it must be zero). */
-         expr = NULL;
+         ML_(dinfo_free)(fieldE.Te.Field.loc);
+         fieldE.Te.Field.loc  = NULL;
+         fieldE.Te.Field.nLoc = 0;
       }
       /* Record this child in the parent */
-      field->isStruct = parent_is_struct;
-      if (expr)
-         field->loc = expr;
-      vg_assert(parser->qparent[parser->sp]->Ty.StOrUn.fields);
-      VG_(addToXA)( parser->qparent[parser->sp]->Ty.StOrUn.fields,
-                    &field );
+      fieldE.Te.Field.isStruct = parent_is_struct;
+      vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
+      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
+                    &posn );
       /* And record the child itself */
-      goto acquire_Field_and_Expr;
+      goto acquire_Field;
    }
 
    if (dtag == DW_TAG_array_type) {
-      type = ML_(new_Type)();
-      type->tag = Ty_Array;
-      type->Ty.Array.typeR = D3_INVALID_CUOFF;
-      type->Ty.Array.bounds
-         = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
-                       sizeof(TyBounds*) );
+      VG_(memset)(&typeE, 0, sizeof(typeE));
+      typeE.cuOff = posn;
+      typeE.tag   = Te_TyArray;
+      typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
+      typeE.Te.TyArray.boundRs
+         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
+                       ML_(dinfo_free),
+                       sizeof(UWord) );
       while (True) {
          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
@@ -2327,13 +2418,13 @@
          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
                             cc, c_die, False/*td3*/, form );
          if (attr == DW_AT_type && ctsSzB > 0) {
-            type->Ty.Array.typeR = (Type*)(UWord)cts;
+            typeE.Te.TyArray.typeR = (UWord)cts;
          }
       }
-      if (type->Ty.Array.typeR == D3_INVALID_CUOFF)
+      if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
          goto bad_DIE;
       /* On't stack! */
-      typestack_push( cc, parser, td3, type, level );
+      typestack_push( cc, parser, td3, &typeE, level );
       goto acquire_Type;
    }
 
@@ -2352,7 +2443,10 @@
          default:  vg_assert(0); /* assured us by handling of
                                     DW_TAG_compile_unit in this fn */
       }
-      bounds = ML_(new_TyBounds)();
+
+      VG_(memset)( &boundE, 0, sizeof(boundE) );
+      boundE.cuOff = D3_INVALID_CUOFF;
+      boundE.tag   = Te_Bound;
       while (True) {
          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
@@ -2378,41 +2472,44 @@
          type) */
       /* Do we have a plausible parent? */
       if (typestack_is_empty(parser)) goto bad_DIE;
-      vg_assert(parser->qparent[parser->sp]);
+      vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
+      vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
       if (level != parser->qlevel[parser->sp]+1) goto bad_DIE;
-      if (parser->qparent[parser->sp]->tag != Ty_Array) goto bad_DIE;
+      if (parser->qparentE[parser->sp].tag != Te_TyArray) goto bad_DIE;
 
       /* Figure out if we have a definite range or not */
       if (have_lower && have_upper && (!have_count)) {
-         bounds->knownL = True;
-         bounds->knownU = True;
-         bounds->boundL = lower;
-         bounds->boundU = upper;
+         boundE.Te.Bound.knownL = True;
+         boundE.Te.Bound.knownU = True;
+         boundE.Te.Bound.boundL = lower;
+         boundE.Te.Bound.boundU = upper;
       } 
       else if (have_lower && (!have_upper) && (!have_count)) {
-         bounds->knownL = True;
-         bounds->knownU = False;
-         bounds->boundL = lower;
-         bounds->boundU = 0;
+         boundE.Te.Bound.knownL = True;
+         boundE.Te.Bound.knownU = False;
+         boundE.Te.Bound.boundL = lower;
+         boundE.Te.Bound.boundU = 0;
       } else {
          /* FIXME: handle more cases */
          goto bad_DIE;
       }
 
       /* Record this bound in the parent */
-      vg_assert(parser->qparent[parser->sp]->Ty.Array.bounds);
-      VG_(addToXA)( parser->qparent[parser->sp]->Ty.Array.bounds,
-                    &bounds );
+      boundE.cuOff = posn;
+      vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
+      VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
+                    &boundE );
       /* And record the child itself */
-      goto acquire_Bounds;
+      goto acquire_Bound;
    }
 
    if (dtag == DW_TAG_typedef) {
-      /* We can pick up a new base type any time. */
-      type = ML_(new_Type)();
-      type->tag = Ty_TyDef;
-      type->Ty.TyDef.name = NULL;
-      type->Ty.TyDef.typeR = D3_INVALID_CUOFF;
+      /* We can pick up a new typedef any time. */
+      VG_(memset)(&typeE, 0, sizeof(typeE));
+      typeE.cuOff = D3_INVALID_CUOFF;
+      typeE.tag   = Te_TyTyDef;
+      typeE.Te.TyTyDef.name = NULL;
+      typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
       while (True) {
          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
@@ -2420,16 +2517,17 @@
          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
                             cc, c_die, False/*td3*/, form );
          if (attr == DW_AT_name && ctsMemSzB > 0) {
-            type->Ty.TyDef.name
-               = ML_(addStr)( cc->di, (UChar*)(UWord)cts, -1 );
+            typeE.Te.TyTyDef.name
+               = ML_(dinfo_strdup)( "di.readdwarf3.ptD.typedef.1",
+                                    (UChar*)(UWord)cts );
          }
          if (attr == DW_AT_type && ctsSzB > 0) {
-            type->Ty.TyDef.typeR = (Type*)(UWord)cts;
+            typeE.Te.TyTyDef.typeR = (UWord)cts;
          }
       }
       /* Do we have something that looks sane? */
       if (/* must have a name */
-          type->Ty.TyDef.name == NULL
+          typeE.Te.TyTyDef.name == NULL
           /* but the referred-to type can be absent */)
          goto bad_DIE;
       else
@@ -2439,19 +2537,21 @@
    if (dtag == DW_TAG_subroutine_type) {
       /* function type? just record that one fact and ask no
          further questions. */
-      type = ML_(new_Type)();
-      type->tag = Ty_Fn;
+      VG_(memset)(&typeE, 0, sizeof(typeE));
+      typeE.cuOff = D3_INVALID_CUOFF;
+      typeE.tag   = Te_TyFn;
       goto acquire_Type;
    }
 
    if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type) {
       Int have_ty = 0;
-      type = ML_(new_Type)();
-      type->tag = Ty_Qual;
-      type->Ty.Qual.qual
+      VG_(memset)(&typeE, 0, sizeof(typeE));
+      typeE.cuOff = D3_INVALID_CUOFF;
+      typeE.tag   = Te_TyQual;
+      typeE.Te.TyQual.qual
          = dtag == DW_TAG_volatile_type ? 'V' : 'C';
       /* target type defaults to 'void' */
-      type->Ty.Qual.typeR = D3_FAKEVOID_CUOFF;
+      typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
       while (True) {
          DW_AT   attr = (DW_AT)  get_ULEB128( c_abbv );
          DW_FORM form = (DW_FORM)get_ULEB128( c_abbv );
@@ -2459,7 +2559,7 @@
          get_Form_contents( &cts, &ctsSzB, &ctsMemSzB,
                             cc, c_die, False/*td3*/, form );
          if (attr == DW_AT_type && ctsSzB > 0) {
-            type->Ty.Qual.typeR = (Type*)(UWord)cts;
+            typeE.Te.TyQual.typeR = (UWord)cts;
             have_ty++;
          }
       }
@@ -2478,57 +2578,45 @@
 
   acquire_Type:
    if (0) VG_(printf)("YYYY Acquire Type\n");
-   vg_assert(type); vg_assert(!atom); vg_assert(!field);
-   vg_assert(!expr); vg_assert(!bounds);
-   VG_(memset)( &tyad, 0, sizeof(tyad) );
-   tyad.cuOff   = posn;
-   tyad.payload = type;
-   tyad.tag     = TyA_Type;
-   VG_(addToXA)( admin, &tyad );
+   vg_assert(ML_(TyEnt__is_type)( &typeE ));
+   vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
+   typeE.cuOff = posn;
+   VG_(addToXA)( tyents, &typeE );
    return;
    /*NOTREACHED*/
 
   acquire_Atom:
    if (0) VG_(printf)("YYYY Acquire Atom\n");
-   vg_assert(!type); vg_assert(atom); vg_assert(!field);
-   vg_assert(!expr); vg_assert(!bounds);
-   VG_(memset)( &tyad, 0, sizeof(tyad) );
-   tyad.cuOff   = posn;
-   tyad.payload = atom;
-   tyad.tag     = TyA_Atom;
-   VG_(addToXA)( admin, &tyad );
+   vg_assert(atomE.tag == Te_Atom);
+   vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
+   atomE.cuOff = posn;
+   VG_(addToXA)( tyents, &atomE );
    return;
    /*NOTREACHED*/
 
-  acquire_Field_and_Expr:
+  acquire_Field:
    /* For union members, Expr should be absent */
-   if (0) VG_(printf)("YYYY Acquire Field and Expr\n");
-   vg_assert(!type); vg_assert(!atom); vg_assert(field); 
-   /*vg_assert(expr);*/ vg_assert(!bounds);
-   if (expr) {
-      VG_(memset)( &tyad, 0, sizeof(tyad) );
-      tyad.cuOff   = (UWord)D3_INVALID_CUOFF;
-      tyad.payload = expr;
-      tyad.tag     = TyA_Expr;
-      VG_(addToXA)( admin, &tyad );
+   if (0) VG_(printf)("YYYY Acquire Field\n");
+   vg_assert(fieldE.tag == Te_Field);
+   vg_assert( (fieldE.Te.Field.nLoc > 0 && fieldE.Te.Field.loc != NULL)
+              || (fieldE.Te.Field.nLoc == 0 && fieldE.Te.Field.loc == NULL) );
+   if (fieldE.Te.Field.isStruct) {
+      vg_assert(fieldE.Te.Field.nLoc > 0);
+   } else {
+      vg_assert(fieldE.Te.Field.nLoc == 0);
    }
-   VG_(memset)( &tyad, 0, sizeof(tyad) );
-   tyad.cuOff   = posn;
-   tyad.payload = field;
-   tyad.tag     = TyA_Field;
-   VG_(addToXA)( admin, &tyad );
+   vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
+   fieldE.cuOff = posn;
+   VG_(addToXA)( tyents, &fieldE );
    return;
    /*NOTREACHED*/
 
-  acquire_Bounds:
-   if (0) VG_(printf)("YYYY Acquire Bounds\n");
-   vg_assert(!type); vg_assert(!atom); vg_assert(!field);
-   vg_assert(!expr); vg_assert(bounds);
-   VG_(memset)( &tyad, 0, sizeof(tyad) );
-   tyad.cuOff   = posn;
-   tyad.payload = bounds;
-   tyad.tag     = TyA_Bounds;
-   VG_(addToXA)( admin, &tyad );
+  acquire_Bound:
+   if (0) VG_(printf)("YYYY Acquire Bound\n");
+   vg_assert(boundE.tag == Te_Bound);
+   vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
+   boundE.cuOff = posn;
+   VG_(addToXA)( tyents, &boundE );
    return;
    /*NOTREACHED*/
 
@@ -2555,235 +2643,321 @@
 
 /*------------------------------------------------------------*/
 /*---                                                      ---*/
+/*--- Compression of type DIE information                  ---*/
+/*---                                                      ---*/
+/*------------------------------------------------------------*/
+
+static UWord chase_cuOff ( Bool* changed,
+                           XArray* /* of TyEnt */ ents,
+                           TyEntIndexCache* ents_cache,
+                           UWord cuOff )
+{
+   TyEnt* ent;
+   ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
+
+   if (!ent) {
+      VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
+      *changed = False;
+      return cuOff;
+   }
+
+   vg_assert(ent->tag != Te_EMPTY);
+   if (ent->tag != Te_INDIR) {
+      *changed = False;
+      return cuOff;
+   } else {
+      vg_assert(ent->Te.INDIR.indR < cuOff);
+      *changed = True;
+      return ent->Te.INDIR.indR;
+   }
+}
+
+static
+void chase_cuOffs_in_XArray ( Bool* changed,
+                              XArray* /* of TyEnt */ ents,
+                              TyEntIndexCache* ents_cache,
+                              /*MOD*/XArray* /* of UWord */ cuOffs )
+{
+   Bool b2 = False;
+   Word i, n = VG_(sizeXA)( cuOffs );
+   for (i = 0; i < n; i++) {
+      Bool   b = False;
+      UWord* p = VG_(indexXA)( cuOffs, i );
+      *p = chase_cuOff( &b, ents, ents_cache, *p );
+      if (b)
+         b2 = True;
+   }
+   *changed = b2;
+}
+
+static Bool TyEnt__subst_R_fields ( XArray* /* of TyEnt */ ents,
+                                    TyEntIndexCache* ents_cache,
+                                    /*MOD*/TyEnt* te )
+{
+   Bool b, changed = False;
+   switch (te->tag) {
+      case Te_EMPTY:
+         break;
+      case Te_INDIR:
+         te->Te.INDIR.indR
+            = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
+         if (b) changed = True;
+         break;
+      case Te_UNKNOWN:
+         break;
+      case Te_Atom:
+         break;
+      case Te_Field:
+         te->Te.Field.typeR
+            = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
+         if (b) changed = True;
+         break;
+      case Te_Bound:
+         break;
+      case Te_TyBase:
+         break;
+      case Te_TyPorR:
+         te->Te.TyPorR.typeR
+            = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
+         if (b) changed = True;
+         break;
+      case Te_TyTyDef:
+         te->Te.TyTyDef.typeR
+            = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
+         if (b) changed = True;
+         break;
+      case Te_TyStOrUn:
+         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
+         if (b) changed = True;
+         break;
+      case Te_TyEnum:
+         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
+         if (b) changed = True;
+         break;
+      case Te_TyArray:
+         te->Te.TyArray.typeR
+            = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
+         if (b) changed = True;
+         chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
+         if (b) changed = True;
+         break;
+      case Te_TyFn:
+         break;
+      case Te_TyQual:
+         te->Te.TyQual.typeR
+            = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
+         if (b) changed = True;
+         break;
+      case Te_TyVoid:
+         break;
+      default:
+         ML_(pp_TyEnt)(te);
+         vg_assert(0);
+   }
+   return changed;
+}
+
+/* Make a pass over 'ents'.  For each tyent, inspect the target of any
+   'R' or 'Rs' fields (those which refer to other tyents), and replace
+   any which point to INDIR nodes with the target of the indirection
+   (which should not itself be an indirection).  In summary, this
+   routine shorts out all references to indirection nodes. */
+static
+Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
+                                     TyEntIndexCache* ents_cache )
+{
+   Word i, n, nChanged = 0;
+   Bool b;
+   n = VG_(sizeXA)( ents );
+   for (i = 0; i < n; i++) {
+      TyEnt* ent = VG_(indexXA)( ents, i );
+      vg_assert(ent->tag != Te_EMPTY);
+      /* We have to substitute everything, even indirections, so as to
+         ensure that chains of indirections don't build up. */
+      b = TyEnt__subst_R_fields( ents, ents_cache, ent );
+      if (b)
+         nChanged++;
+   }
+
+   return nChanged;
+}
+
+
+/* Make a pass over 'ents', building a dictionary of TyEnts as we go.
+   Look up each new tyent in the dictionary in turn.  If it is already
+   in the dictionary, replace this tyent with an indirection to the
+   existing one, and delete any malloc'd stuff hanging off this one.
+   In summary, this routine commons up all tyents that are identical
+   as defined by TyEnt__cmp_by_all_except_cuOff. */
+static
+Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
+{
+   Word    n, i, nDeleted;
+   WordFM* dict; /* TyEnt* -> void */
+   TyEnt*  ent;
+   UWord   keyW, valW;
+
+   dict = VG_(newFM)(
+             ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1", 
+             ML_(dinfo_free),
+             (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
+          );
+
+   nDeleted = 0;
+   n = VG_(sizeXA)( ents );
+   for (i = 0; i < n; i++) {
+      ent = VG_(indexXA)( ents, i );
+      vg_assert(ent->tag != Te_EMPTY);
+     
+      /* Ignore indirections, although check that they are
+         not forming a cycle. */
+      if (ent->tag == Te_INDIR) {
+         vg_assert(ent->Te.INDIR.indR < ent->cuOff);
+         continue;
+      }
+
+      keyW = valW = 0;
+      if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
+         /* it's already in the dictionary. */
+         TyEnt* old = (TyEnt*)keyW;
+         vg_assert(valW == 0);
+         vg_assert(old != ent);
+         vg_assert(old->tag != Te_INDIR);
+         /* since we are traversing the array in increasing order of
+            cuOff: */
+         vg_assert(old->cuOff < ent->cuOff); 
+         /* So anyway, dump this entry and replace it with an
+            indirection to the one in the dictionary.  Note that the
+            assertion above guarantees that we cannot create cycles of
+            indirections, since we are always creating an indirection
+            to a tyent with a cuOff lower than this one. */
+         ML_(TyEnt__make_EMPTY)( ent );
+         ent->tag = Te_INDIR;
+         ent->Te.INDIR.indR = old->cuOff;
+         nDeleted++;
+      } else {
+         /* not in dictionary; add it and keep going. */
+         VG_(addToFM)( dict, (UWord)ent, 0 );
+      }
+   }
+
+   VG_(deleteFM)( dict, NULL, NULL );
+
+   return nDeleted;
+}
+
+
+static
+void dedup_types ( Bool td3, 
+                   /*MOD*/XArray* /* of TyEnt */ ents,
+                   TyEntIndexCache* ents_cache )
+{
+   Word m, n, i, nDel, nSubst, nThresh;
+   if (0) td3 = True;
+
+   n = VG_(sizeXA)( ents );
+
+   /* If a commoning pass and a substitution pass both make fewer than
+      this many changes, just stop.  It's pointless to burn up CPU
+      time trying to compress the last 1% or so out of the array. */
+   nThresh = n / 200;
+
+   /* First we must sort .ents by its .cuOff fields, so we
+      can index into it. */
+   VG_(setCmpFnXA)(
+      ents,
+      (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
+   );
+   VG_(sortXA)( ents );
+
+   /* Now repeatedly do commoning and substitution passes over
+      the array, until there are no more changes. */
+   do {
+      nDel   = dedup_types_commoning_pass ( ents );
+      nSubst = dedup_types_substitution_pass ( ents, ents_cache );
+      vg_assert(nDel >= 0 && nSubst >= 0);
+      TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
+   } while (nDel > nThresh || nSubst > nThresh);
+
+   /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
+      In fact this should be true at the end of every loop iteration
+      above (a commoning pass followed by a substitution pass), but
+      checking it on every iteration is excessively expensive.  Note,
+      this loop also computes 'm' for the stats printing below it. */
+   m = 0;
+   n = VG_(sizeXA)( ents );
+   for (i = 0; i < n; i++) {
+      TyEnt *ent, *ind;
+      ent = VG_(indexXA)( ents, i );
+      if (ent->tag != Te_INDIR) continue;
+      m++;
+      ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
+                                         ent->Te.INDIR.indR );
+      vg_assert(ind);
+      vg_assert(ind->tag != Te_INDIR);
+   }
+
+   TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
+}
+
+
+/*------------------------------------------------------------*/
+/*---                                                      ---*/
 /*--- Resolution of references to type DIEs                ---*/
 /*---                                                      ---*/
 /*------------------------------------------------------------*/
 
-static Int cmp_D3TyAdmin_by_cuOff ( void* v1, void* v2 ) {
-   TyAdmin* a1 = (TyAdmin*)v1;
-   TyAdmin* a2 = (TyAdmin*)v2;
-   if (a1->cuOff < a2->cuOff) return -1;
-   if (a1->cuOff > a2->cuOff) return 1;
-   return 0;
-}
-
-/* Look up 'cuOff' in 'admin', to find the associated D3TyAdmin.
-   Check that the found D3TyAdmin has tag 'adtag'.  Sets *payload to
-   be the resulting payload pointer and returns True on success.
-
-   Also, if 'allow_invalid' is True, then if cuOff is
-   D3_INVALID_CUOFF, return NULL in *payload.
-
-   Otherwise (conceptually fails) and returns False.
-
-   Note that 'admin' has previously been sorted on its .cuOff fields,
-   so we can legitimately look up using them.
- */
-__attribute__((noinline))
-static Bool resolve_binding ( /*OUT*/void** payload,
-                              XArray* /* of TyAdmin */ admin, 
-                              void* cuOff,
-                              TyAdminTag tag, 
-                              Bool allow_invalid ) {
-   Bool    found;
-   Word    ixLo, ixHi;
-   TyAdmin dummy, *tyad;
-
-   if (cuOff == D3_INVALID_CUOFF) {
-      if (allow_invalid) {
-         *payload = NULL;
-         return True;
-      } else {
-         return False;
-      }
-   }
-
-   /* Hence ... */
-   tl_assert(cuOff != D3_INVALID_CUOFF);
-
-   VG_(memset)(&dummy, 0, sizeof(dummy));
-   dummy.cuOff = (UWord)cuOff;
-   found = VG_(lookupXA)( admin, &dummy, &ixLo, &ixHi );
-   if (!found)
-      return False;
-   /* If this doesn't hold, we must have seen more than one DIE with
-      the same cuOff(set).  Which isn't possible. */
-   vg_assert(ixLo == ixHi);
-   tyad = (TyAdmin*)VG_(indexXA)( admin, ixLo );
-   /* All payload pointers should be non-NULL.  Ensured by assertion in
-      loop in resolve_type_entities that creates 'map'.  Hence it is
-      safe to return NULL to indicate 'not found'. */
-   vg_assert(tyad->payload);
-   vg_assert(tyad->cuOff == (UWord)cuOff); /* stay sane */
-
-   if (tyad->tag != tag)
-      return False;
-
-   *payload = tyad->payload;
-   return True;
-}
-
-
-/* First, we sort the 'admin' array by its .cuOff fields.  That means
-   we can now hand it to resolve_binding() above, which simply looks
-   up the payload associated with a given cuOff value by doing a
-   binary search in 'admin'.
-
-   There is a subtlety that some of the .cuOff fields in 'admin' are
-   D3_INVALID_CUOFF, and we don't want anybody to be able to look up
-   anything against that value.  Rather than filter those out, we
-   leave them in, sort as usual, but arrange so that resolve_binding()
-   never looks up a D3_INVALID_CUOFF value.
-
-   Having done the sorting, we then work through the payload fields of
-   'admin' and convert all cuOff values into real pointers, by looking
-   them up using resolve_binding().  That's the whole purpose of this
-   resolution mechanism.  We also resolve the type expressions on the
-   supplied 'vars' array. */
+/* Make a pass through the (temporary) variables array.  Examine the
+   type of each variable, check is it found, and chase any Te_INDIRs.
+   Postcondition is: each variable has a typeR field that refers to a
+   valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
+   not to refer to a Te_INDIR.  (This is so that we can throw all the
+   Te_INDIRs away later). */
 
 __attribute__((noinline))
-static void resolve_type_entities ( /*MOD*/XArray* /* of TyAdmin */ admin,
-                                    /*MOD*/XArray* /* of TempVar* */ vars )
+static void resolve_variable_types (
+               void (*barf)( HChar* ) __attribute__((noreturn)),
+               /*R-O*/XArray* /* of TyEnt */ ents,
+               /*MOD*/TyEntIndexCache* ents_cache,
+               /*MOD*/XArray* /* of TempVar* */ vars
+            )
 {
-   Bool     ok;
-   Word     i, n;
-   void*    payload;
-   TyAdmin* adp;
-
-   tl_assert(admin);
-
-   n = VG_(sizeXA)( admin );
-
-   VG_(setCmpFnXA)( admin, cmp_D3TyAdmin_by_cuOff );
-   if (0) 
-      VG_(printf)("XXXXXX sorting map with %d entries\n",
-                  (Int)VG_(sizeXA)(admin));
-   VG_(sortXA)( admin );
-
-   for (i = 0; i < n; i++) {
-      adp = (TyAdmin*)VG_(indexXA)( admin, i );
-      vg_assert(adp);
-      vg_assert(adp->payload);
-      switch (adp->tag) {
-      case TyA_Bounds: {
-         TyBounds* bounds = (TyBounds*)adp->payload;
-         if (bounds->knownL && bounds->knownU 
-             && bounds->knownL > bounds->knownU) goto baaad;
-         break;
-      }
-      case TyA_Atom: {
-         TyAtom* atom = (TyAtom*)adp->payload;
-         if (!atom->name) goto baaad;
-         break;
-      }
-      case TyA_Expr: {
-         D3Expr* expr = (D3Expr*)adp->payload;
-         if (!expr->bytes) goto baaad;
-         break;
-      }
-      case TyA_Field: {
-         TyField* field = (TyField*)adp->payload;
-         if (!field->name) goto baaad;
-         if ( (field->isStruct && (!field->loc)) 
-              || ((!field->isStruct) && field->loc))
-            goto baaad;
-         ok = resolve_binding( &payload, admin, field->typeR,
-                               TyA_Type, False/*!allow_invalid*/ );
-         if (!ok) goto baaad;
-         field->typeR = payload;
-         break;
-      }
-      case TyA_Type: {
-         UChar   enc;
-         XArray* xa;
-         Type* ty = (Type*)adp->payload;
-         switch (ty->tag) {
-            case Ty_Base:
-               enc = ty->Ty.Base.enc;
-               if ((!ty->Ty.Base.name) 
-                   || ty->Ty.Base.szB < 1 || ty->Ty.Base.szB > 32
-                   || (enc != 'S' && enc != 'U' && enc != 'F' && enc != 'C'))
-                  goto baaad;
-               break;
-            case Ty_TyDef:
-               if (!ty->Ty.TyDef.name) goto baaad;
-               ok = resolve_binding( &payload, admin,
-                                     ty->Ty.TyDef.typeR, 
-                                     TyA_Type,
-                                     True/*allow_invalid*/ );
-               if (!ok) goto baaad;
-               ty->Ty.TyDef.typeR = payload;
-               break;
-            case Ty_PorR:
-               if (ty->Ty.PorR.szB != sizeof(Word)) goto baaad;
-               ok = resolve_binding( &payload, admin,
-                                     ty->Ty.PorR.typeR, 
-                                     TyA_Type,
-                                     False/*!allow_invalid*/ );
-               if (!ok) goto baaad;
-               ty->Ty.PorR.typeR = payload;
-               break;
-            case Ty_Array:
-               if (!ty->Ty.Array.bounds) goto baaad;
-               ok = resolve_binding( &payload, admin,
-                                     ty->Ty.Array.typeR, 
-                                     TyA_Type,
-                                     False/*!allow_invalid*/ );
-               if (!ok) goto baaad;
-               ty->Ty.Array.typeR = payload;
-               break;
-            case Ty_Enum:
-               if ((!ty->Ty.Enum.atomRs)
-                   || ty->Ty.Enum.szB < 1 
-                   || ty->Ty.Enum.szB > 8) goto baaad;
-               xa = ty->Ty.Enum.atomRs;
-               break;
-            case Ty_StOrUn:
-               xa = ty->Ty.StOrUn.fields;
-               if (!xa) goto baaad;
-               break;
-            case Ty_Fn:
-               break;
-            case Ty_Qual:
-               if (ty->Ty.Qual.qual != 'C' 
-                   && ty->Ty.Qual.qual != 'V') goto baaad;
-               ok = resolve_binding( &payload, admin,
-                                     ty->Ty.Qual.typeR, 
-                                     TyA_Type,
-                                     False/*!allow_invalid*/ );
-               if (!ok) goto baaad;
-               ty->Ty.Qual.typeR = payload;
-               break;
-            case Ty_Void:
-               if (ty->Ty.Void.isFake != False 
-                   && ty->Ty.Void.isFake != True) goto baaad;
-               break;
-            default:
-               goto baaad;
-         }
-         break;
-      }
-      baaad:
-      default:
-         VG_(printf)("valgrind: bad D3TyAdmin: ");
-         ML_(pp_TyAdmin)(adp);
-         VG_(printf)("\n");
-      }
-   }
-
-   /* Now resolve the variables list */
+   Word i, n;
    n = VG_(sizeXA)( vars );
    for (i = 0; i < n; i++) {
       TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
-      payload = NULL;
-      ok = resolve_binding( &payload, admin, var->typeR,
-                            TyA_Type, True/*allow_invalid*/ );
+      /* This is the stated type of the variable.  But it might be
+         an indirection, so be careful. */
+      TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
+                                                var->typeR );
+      if (ent && ent->tag == Te_INDIR) {
+         ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, 
+                                            ent->Te.INDIR.indR );
+         vg_assert(ent);
+         vg_assert(ent->tag != Te_INDIR);
+      }
 
-      if (0 && !ok)
-         VG_(printf)("Can't resolve type reference 0x%lx\n",
-                     (UWord)var->typeR);
-      //vg_assert(ok);
-      var->typeR = payload;
+      /* Deal first with "normal" cases */
+      if (ent && ML_(TyEnt__is_type)(ent)) {
+         var->typeR = ent->cuOff;
+         continue;
+      }
+
+      /* If there's no ent, it probably we did not manage to read a
+         type at the cuOffset which is stated as being this variable's
+         type.  Maybe a deficiency in parse_type_DIE.  Complain. */
+      if (ent == NULL) {
+         VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
+         barf("resolve_variable_types: "
+              "cuOff does not refer to a known type");
+      }
+      vg_assert(ent);
+      /* If ent has any other tag, something bad happened, along the
+         lines of var->typeR not referring to a type at all. */
+      vg_assert(ent->tag == Te_UNKNOWN);
+      /* Just accept it; the type will be useless, but at least keep
+         going. */
+      var->typeR = ent->cuOff;
    }
 }
 
@@ -2802,12 +2976,15 @@
    return 0;
 }
 
-static void read_DIE ( /*MOD*/XArray* /* of TyAdmin */ admin,
-                       /*OUT*/XArray* /* of TempVar* */ tempvars,
-                       /*MOD*/XArray* /* of GExpr* */ gexprs,
-                       /*MOD*/D3TypeParser* typarser,
-                       /*MOD*/D3VarParser* varparser,
-                       Cursor* c, Bool td3, CUConst* cc, Int level )
+static void read_DIE ( 
+   /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
+   /*MOD*/XArray* /* of TyEnt */ tyents,
+   /*MOD*/XArray* /* of TempVar* */ tempvars,
+   /*MOD*/XArray* /* of GExpr* */ gexprs,
+   /*MOD*/D3TypeParser* typarser,
+   /*MOD*/D3VarParser* varparser,
+   Cursor* c, Bool td3, CUConst* cc, Int level
+)
 {
    Cursor abbv;
    ULong  atag, abbv_code;
@@ -2863,7 +3040,7 @@
    set_position_of_Cursor( c,     start_die_c_offset );
    set_position_of_Cursor( &abbv, start_abbv_c_offset );
 
-   parse_type_DIE( admin,
+   parse_type_DIE( tyents,
                    typarser,
                    (DW_TAG)atag,
                    posn,
@@ -2876,7 +3053,8 @@
    set_position_of_Cursor( c,     start_die_c_offset );
    set_position_of_Cursor( &abbv, start_abbv_c_offset );
 
-   parse_var_DIE( tempvars,
+   parse_var_DIE( rangestree,
+                  tempvars,
                   gexprs,
                   varparser,
                   (DW_TAG)atag,
@@ -2896,7 +3074,8 @@
       while (True) {
          atag = peek_ULEB128( c );
          if (atag == 0) break;
-         read_DIE( admin, tempvars, gexprs, typarser, varparser,
+         read_DIE( rangestree, tyents, tempvars, gexprs,
+                   typarser, varparser,
                    c, td3, cc, level+1 );
       }
       /* Now we need to eat the terminating zero */
@@ -2911,8 +3090,7 @@
 static
 void new_dwarf3_reader_wrk ( 
    struct _DebugInfo* di,
-   __attribute__((noreturn))
-   void (*barf)( HChar* ),
+   __attribute__((noreturn)) void (*barf)( HChar* ),
    UChar* debug_info_img,   SizeT debug_info_sz,
    UChar* debug_abbv_img,   SizeT debug_abbv_sz,
    UChar* debug_line_img,   SizeT debug_line_sz,
@@ -2921,9 +3099,13 @@
    UChar* debug_loc_img,    SizeT debug_loc_sz
 )
 {
-   XArray* /* of TyAdmin */ admin;
-   XArray* /* of GExpr* */ gexprs;
-   XArray* /* of TempVar* */ tempvars;
+   XArray* /* of TyEnt */     tyents;
+   XArray* /* of TyEnt */     tyents_to_keep;
+   XArray* /* of GExpr* */    gexprs;
+   XArray* /* of TempVar* */  tempvars;
+   WordFM* /* of (XArray* of AddrRange, void) */ rangestree;
+   TyEntIndexCache* tyents_cache = NULL;
+   TyEntIndexCache* tyents_to_keep_cache = NULL;
    TempVar *varp, *varp2;
    GExpr* gexpr;
    Cursor abbv; /* for showing .debug_abbrev */
@@ -2938,7 +3120,6 @@
    XArray* /* of TempVar* */ dioff_lookup_tab;
    Bool text_biasing_borked;
    KludgeyTextBiaser ktb;
-
 #if 0
    /* This doesn't work properly because it assumes all entries are
       packed end to end, with no holes.  But that doesn't always
@@ -3085,35 +3266,57 @@
       huge and presumably will not occur in any valid DWARF3 file --
       it would need to have a .debug_info section 4GB long for that to
       happen.  These type entries end up in the DebugInfo. */
-   admin = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), sizeof(TyAdmin) );
-   { TyAdmin tyad;
-     Type* tVoid = ML_(new_Type)();
-     tVoid->tag = Ty_Void;
-     tVoid->Ty.Void.isFake = True;
-     VG_(memset)( &tyad, 0, sizeof(tyad) );
-     tyad.cuOff   = (UWord)D3_FAKEVOID_CUOFF;
-     tyad.payload = tVoid;
-     tyad.tag     = TyA_Type;
-     VG_(addToXA)( admin, &tyad );
+   tyents = VG_(newXA)( ML_(dinfo_zalloc), 
+                        "di.readdwarf3.ndrw.1 (TyEnt temp array)",
+                        ML_(dinfo_free), sizeof(TyEnt) );
+   { TyEnt tyent;
+     VG_(memset)(&tyent, 0, sizeof(tyent));
+     tyent.tag   = Te_TyVoid;
+     tyent.cuOff = D3_FAKEVOID_CUOFF;
+     tyent.Te.TyVoid.isFake = True;
+     VG_(addToXA)( tyents, &tyent );
    }
+   { TyEnt tyent;
+     VG_(memset)(&tyent, 0, sizeof(tyent));
+     tyent.tag   = Te_UNKNOWN;
+     tyent.cuOff = D3_INVALID_CUOFF;
+     VG_(addToXA)( tyents, &tyent );
+   }
+
+   /* This is a tree used to unique-ify the range lists that are
+      manufactured by parse_var_DIE.  References to the keys in the
+      tree wind up in .rngMany fields in TempVars.  We'll need to
+      delete this tree, and the XArrays attached to it, at the end of
+      this function. */
+   rangestree = VG_(newFM)( ML_(dinfo_zalloc),
+                            "di.readdwarf3.ndrw.2 (rangestree)",
+                            ML_(dinfo_free),
+                            (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
 
    /* List of variables we're accumulating.  These don't end up in the
       DebugInfo; instead their contents are handed to ML_(addVar) and
       the list elements are then deleted. */
-   tempvars = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), 
+   tempvars = VG_(newXA)( ML_(dinfo_zalloc),
+                          "di.readdwarf3.ndrw.3 (TempVar*s array)",
+                          ML_(dinfo_free), 
                           sizeof(TempVar*) );
 
    /* List of GExprs we're accumulating.  These wind up in the
       DebugInfo. */
-   gexprs = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), sizeof(GExpr*) );
+   gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
+                        ML_(dinfo_free), sizeof(GExpr*) );
 
    /* We need a D3TypeParser to keep track of partially constructed
       types.  It'll be discarded as soon as we've completed the CU,
-      since the resulting information is tipped in to 'admin' as it is
-      generated. */
+      since the resulting information is tipped in to 'tyents' as it
+      is generated. */
    VG_(memset)( &typarser, 0, sizeof(typarser) );
    typarser.sp = -1;
    typarser.language = '?';
+   for (i = 0; i < N_D3_TYPE_STACK; i++) {
+      typarser.qparentE[i].tag   = Te_EMPTY;
+      typarser.qparentE[i].cuOff = D3_INVALID_CUOFF;
+   }
 
    VG_(memset)( &varparser, 0, sizeof(varparser) );
    varparser.sp = -1;
@@ -3143,7 +3346,8 @@
          vg_assert(varparser.level[i] == 0);
       }
       for (i = 0; i < N_D3_TYPE_STACK; i++) {
-         vg_assert(typarser.qparent[i] == NULL);
+         vg_assert(typarser.qparentE[i].cuOff == D3_INVALID_CUOFF);
+         vg_assert(typarser.qparentE[i].tag   == Te_EMPTY);
          vg_assert(typarser.qlevel[i] == 0);
       }
 
@@ -3187,13 +3391,16 @@
          etc. */
       vg_assert(!varparser.filenameTable );
       varparser.filenameTable 
-         = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
+         = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5",
+                       ML_(dinfo_free),
                        sizeof(UChar*) );
-      vg_assert(varparser.filenameTable );
+      vg_assert(varparser.filenameTable);
 
       /* Now read the one-and-only top-level DIE for this CU. */
       vg_assert(varparser.sp == 0);
-      read_DIE( admin, tempvars, gexprs, &typarser, &varparser,
+      read_DIE( rangestree,
+                tyents, tempvars, gexprs,
+                &typarser, &varparser,
                 &info, td3, &cc, 0 );
 
       cu_offset_now = get_position_of_Cursor( &info );
@@ -3224,28 +3431,78 @@
 
    /* From here on we're post-processing the stuff we got
       out of the .debug_info section. */
-
    if (td3) {
       TRACE_D3("\n");
-      TRACE_D3("------ Acquired the following type entities: ------\n");
-      n = VG_(sizeXA)( admin );
-      for (i = 0; i < n; i++) {
-         TyAdmin* tyad = (TyAdmin*)VG_(indexXA)( admin, i );
-         TRACE_D3("   ");
-         ML_(pp_TyAdmin)( tyad );
-         TRACE_D3("\n");
-      }
+      ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
       TRACE_D3("\n");
-      TRACE_D3("------ Resolving type entries ------\n");
+      TRACE_D3("------ Compressing type entries ------\n");
    }
 
-   /* See "Comment_Regarding_DWARF3_Text_Biasing" above. */
+   tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
+                                     sizeof(TyEntIndexCache) );
+   ML_(TyEntIndexCache__invalidate)( tyents_cache );
+   dedup_types( td3, tyents, tyents_cache );
+   if (td3) {
+      TRACE_D3("\n");
+      ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
+   }
+
+   TRACE_D3("\n");
+   TRACE_D3("------ Resolving the types of variables ------\n" );
+   resolve_variable_types( barf, tyents, tyents_cache, tempvars );
+
+   /* Copy all the non-INDIR tyents into a new table.  For large
+      .so's, about 90% of the tyents will by now have been resolved to
+      INDIRs, and we no longer need them, and so don't need to store
+      them. */
+   tyents_to_keep
+      = VG_(newXA)( ML_(dinfo_zalloc), 
+                    "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
+                    ML_(dinfo_free), sizeof(TyEnt) );
+   n = VG_(sizeXA)( tyents );
+   for (i = 0; i < n; i++) {
+      TyEnt* ent = VG_(indexXA)( tyents, i );
+      if (ent->tag != Te_INDIR)
+         VG_(addToXA)( tyents_to_keep, ent );
+   }
+
+   VG_(deleteXA)( tyents );
+   tyents = NULL;
+   ML_(dinfo_free)( tyents_cache );
+   tyents_cache = NULL;
+
+   /* Sort tyents_to_keep so we can lookup in it.  A complete (if
+      minor) waste of time, since tyents itself is sorted, but
+      necessary since VG_(lookupXA) refuses to cooperate if we
+      don't. */
+   VG_(setCmpFnXA)(
+      tyents_to_keep,
+      (Int(*)(void*,void*)) ML_(TyEnt__cmp_by_cuOff_only)
+   );
+   VG_(sortXA)( tyents_to_keep );
+
+   /* Enable cacheing on tyents_to_keep */
+   tyents_to_keep_cache
+      = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
+                           sizeof(TyEntIndexCache) );
+   ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
+
+   /* And record the tyents in the DebugInfo.  We do this before
+      starting to hand variables to ML_(addVar), since if ML_(addVar)
+      wants to do debug printing (of the types of said vars) then it
+      will need the tyents.*/
+   vg_assert(!di->admin_tyents);
+   di->admin_tyents = tyents_to_keep;
+
+   /* Bias all the location expressions.  See
+      "Comment_Regarding_DWARF3_Text_Biasing" above. */
+   TRACE_D3("\n");
+   TRACE_D3("------ Biasing the location expressions ------\n" );
    VG_(memset)( &ktb, 0, sizeof(ktb ));
    ktb.rx_map_avma = di->rx_map_avma;
    ktb.rx_map_size = di->rx_map_size;
    ktb.text_bias   = di->text_bias;
 
-   resolve_type_entities( admin, tempvars );
    n = VG_(sizeXA)( gexprs );
    for (i = 0; i < n; i++) {
       gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
@@ -3261,7 +3518,8 @@
       ascending order, there is no need to sort the array after
       construction.  The ascendingness is however asserted for. */
    dioff_lookup_tab
-      = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), 
+      = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
+                    ML_(dinfo_free), 
                     sizeof(TempVar*) );
    vg_assert(dioff_lookup_tab);
 
@@ -3285,6 +3543,7 @@
       each var and hand it to ML_(addVar). */
    n = VG_(sizeXA)( tempvars );
    for (j = 0; j < n; j++) {
+      TyEnt* ent;
       varp = *(TempVar**)VG_(indexXA)( tempvars, j );
 
       /* Possibly show .. */
@@ -3294,7 +3553,7 @@
                      varp->level,
                      varp->name ? varp->name : (UChar*)"<anon_var>" );
          if (varp->typeR) {
-            ML_(pp_Type_C_ishly)( varp->typeR );
+            ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
          } else {
             VG_(printf)("NULL");
          }
@@ -3368,11 +3627,20 @@
          varp->name = ML_(addStr)( di, "<anon_var>", -1 );
 
       /* So now does it have enough info to be useful? */
-      /* NOTE: re typeR: this is a hack.  If typeR is NULL then the
-         type didn't get resolved.  Really, in that case something's
-         broken earlier on, and should be fixed, rather than just
-         skipping the variable. */
-      if (!varp->typeR) continue;
+      /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
+         the type didn't get resolved.  Really, in that case
+         something's broken earlier on, and should be fixed, rather
+         than just skipping the variable. */
+      ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
+                                         tyents_to_keep_cache, 
+                                         varp->typeR );
+      /* The next two assertions should be guaranteed by 
+         our previous call to resolve_variable_types. */
+      vg_assert(ent);
+      vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
+
+      if (ent->tag == Te_UNKNOWN) continue;
+
       vg_assert(varp->gexpr);
       vg_assert(varp->name);
       vg_assert(varp->typeR);
@@ -3439,7 +3707,7 @@
            ML_(addVar)(
               di, varp->level, 
                   pcMin, pcMax,
-                  varp->name, (void*)varp->typeR,
+                  varp->name,  varp->typeR,
                   varp->gexpr, varp->fbGX,
                   varp->fName, varp->fLine, td3 
            );
@@ -3475,20 +3743,29 @@
    n = VG_(sizeXA)( tempvars );
    for (i = 0; i < n; i++) {
       varp = *(TempVar**)VG_(indexXA)( tempvars, i );
-      if (varp->rngMany)
-         VG_(deleteXA)(varp->rngMany);
       ML_(dinfo_free)(varp);
    }
    VG_(deleteXA)( tempvars );
    tempvars = NULL;
 
-   /* And get rid of the temporary mapping table. */
+   /* and the temp lookup table */
    VG_(deleteXA)( dioff_lookup_tab );
 
-   /* record the TyAdmins and the GExprs in di so they can be freed
-      later */
-   vg_assert(!di->admin_tyadmins);
-   di->admin_tyadmins = admin;
+   /* and the ranges tree.  Note that we need to also free the XArrays
+      which constitute the keys, hence pass VG_(deleteXA) as a
+      key-finalizer. */
+   VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
+
+   /* and the tyents_to_keep cache */
+   ML_(dinfo_free)( tyents_to_keep_cache );
+   tyents_to_keep_cache = NULL;
+
+   /* and the file name table (just the array, not the entries 
+      themselves). */
+   vg_assert(varparser.filenameTable);
+   VG_(deleteXA)( varparser.filenameTable );
+
+   /* record the GExprs in di so they can be freed later */
    vg_assert(!di->admin_gexprs);
    di->admin_gexprs = gexprs;
 }
diff --git a/coregrind/m_debuginfo/readelf.c b/coregrind/m_debuginfo/readelf.c
index 04d17e5..ea7a2c1 100644
--- a/coregrind/m_debuginfo/readelf.c
+++ b/coregrind/m_debuginfo/readelf.c
@@ -639,7 +639,8 @@
 
    oset = VG_(OSetGen_Create)( offsetof(TempSym,key), 
                                (OSetCmp_t)cmp_TempSymKey, 
-                               ML_(dinfo_zalloc), ML_(dinfo_free) );
+                               ML_(dinfo_zalloc), "di.respl.1",
+                               ML_(dinfo_free) );
    vg_assert(oset);
 
    /* Perhaps should start at i = 1; ELF docs suggest that entry
@@ -905,7 +906,7 @@
                       Char* objpath, Char* debugname, 
                       UInt crc, /*OUT*/UWord* size )
 {
-   Char *objdir = ML_(dinfo_strdup)(objpath);
+   Char *objdir = ML_(dinfo_strdup)("di.fdf.1", objpath);
    Char *objdirptr;
    Char *debugpath;
    Addr addr = 0;
@@ -914,6 +915,7 @@
       *objdirptr = '\0';
 
    debugpath = ML_(dinfo_zalloc)(
+                  "di.fdf.2",
                   VG_(strlen)(objdir) + VG_(strlen)(debugname) + 32);
    
    VG_(sprintf)(debugpath, "%s/%s", objdir, debugname);
@@ -1239,7 +1241,7 @@
             }
             if (stroff != -1 && strtab != NULL) {
                TRACE_SYMTAB("Found soname = %s\n", strtab+stroff);
-               di->soname = ML_(dinfo_strdup)(strtab+stroff);
+               di->soname = ML_(dinfo_strdup)("di.redi.1", strtab+stroff);
             }
          }
       } /* for (i = 0; i < phdr_nent; i++) ... */
diff --git a/coregrind/m_debuginfo/readstabs.c b/coregrind/m_debuginfo/readstabs.c
index fb24ac9..20b8869 100644
--- a/coregrind/m_debuginfo/readstabs.c
+++ b/coregrind/m_debuginfo/readstabs.c
@@ -178,7 +178,7 @@
                   qbuflen = 16;
                while ((qidx + qlen) >= qbuflen)
                   qbuflen *= 2;
-               n = ML_(dinfo_zalloc)(qbuflen);
+               n = ML_(dinfo_zalloc)("di.readstabs.rds.1", qbuflen);
                VG_(memcpy)(n, qbuf, qidx);
                
                if (qbuf != NULL)
diff --git a/coregrind/m_debuginfo/readxcoff.c b/coregrind/m_debuginfo/readxcoff.c
index a21fe3c..e444213 100644
--- a/coregrind/m_debuginfo/readxcoff.c
+++ b/coregrind/m_debuginfo/readxcoff.c
@@ -569,8 +569,8 @@
       add the rest to 'syms'.
       ---------------------------------------------------------- */
 
-   syms = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free), 
-                      sizeof(XCoffSym) );
+   syms = VG_(newXA)( ML_(dinfo_zalloc), "di.readxcoff.rst.1", 
+                      ML_(dinfo_free), sizeof(XCoffSym) );
 
    if (SHOW && SHOW_SYMS_P1) {
       VG_(printf)("--- BEGIN Phase1 (find text symbol starts) ---\n");
@@ -2149,7 +2149,7 @@
    Int    i;
    SysRes sr, fd;
 
-   struct vki_stat stat_buf;
+   struct vg_stat stat_buf;
 
    vg_assert(o_name);
 
@@ -2460,14 +2460,14 @@
          if (di->memname) {
             /* set the soname to "archive.a(member.o)" */
             Int nbytes = VG_(strlen)(p) + 1 + VG_(strlen)(di->memname) + 1 + 1;
-            UChar* so = ML_(dinfo_zalloc)(nbytes);
+            UChar* so = ML_(dinfo_zalloc)("di.readxcoff.rxdi.1", nbytes);
             vg_assert(so);
             VG_(sprintf)(so, "%s(%s)", p, di->memname);
             vg_assert(VG_(strlen)(so) == nbytes-1);
             di->soname = so;
          } else {
             /* no member name, hence soname = "archive.a" */
-            di->soname = ML_(dinfo_strdup)(p);
+            di->soname = ML_(dinfo_strdup)("di.readxcoff.rxdi.2", p);
          }
       }
       if (SHOW)
diff --git a/coregrind/m_debuginfo/storage.c b/coregrind/m_debuginfo/storage.c
index 3fe0b3b..70a7a42 100644
--- a/coregrind/m_debuginfo/storage.c
+++ b/coregrind/m_debuginfo/storage.c
@@ -182,7 +182,7 @@
    if (di->strchunks == NULL || 
        (di->strchunks->strtab_used 
         + space_needed) > SEGINFO_STRCHUNKSIZE) {
-      chunk = ML_(dinfo_zalloc)(sizeof(*chunk));
+      chunk = ML_(dinfo_zalloc)("di.storage.addStr.1", sizeof(*chunk));
       chunk->strtab_used = 0;
       chunk->next = di->strchunks;
       di->strchunks = chunk;
@@ -211,7 +211,8 @@
    if (di->symtab_used == di->symtab_size) {
       new_sz = 2 * di->symtab_size;
       if (new_sz == 0) new_sz = 500;
-      new_tab = ML_(dinfo_zalloc)( new_sz * sizeof(DiSym) );
+      new_tab = ML_(dinfo_zalloc)( "di.storage.addSym.1", 
+                                   new_sz * sizeof(DiSym) );
       if (di->symtab != NULL) {
          for (i = 0; i < di->symtab_used; i++)
             new_tab[i] = di->symtab[i];
@@ -240,7 +241,8 @@
    if (di->loctab_used == di->loctab_size) {
       new_sz = 2 * di->loctab_size;
       if (new_sz == 0) new_sz = 500;
-      new_tab = ML_(dinfo_zalloc)( new_sz * sizeof(DiLoc) );
+      new_tab = ML_(dinfo_zalloc)( "di.storage.addLoc.1",
+                                   new_sz * sizeof(DiLoc) );
       if (di->loctab != NULL) {
          for (i = 0; i < di->loctab_used; i++)
             new_tab[i] = di->loctab[i];
@@ -399,7 +401,8 @@
    if (di->cfsi_used == di->cfsi_size) {
       new_sz = 2 * di->cfsi_size;
       if (new_sz == 0) new_sz = 20;
-      new_tab = ML_(dinfo_zalloc)( new_sz * sizeof(DiCfSI) );
+      new_tab = ML_(dinfo_zalloc)( "di.storage.addDiCfSI.1",
+                                   new_sz * sizeof(DiCfSI) );
       if (di->cfsi != NULL) {
          for (i = 0; i < di->cfsi_used; i++)
             new_tab[i] = di->cfsi[i];
@@ -614,7 +617,7 @@
       vg_assert(nyu->aMin <= nyu->aMax);
       /* copy vars into it */
       vg_assert(first->vars);
-      nyu->vars = VG_(cloneXA)( first->vars );
+      nyu->vars = VG_(cloneXA)( "di.storage.avta.1", first->vars );
       vg_assert(nyu->vars);
       VG_(OSetGen_Insert)( scope, nyu );
       first = nyu;
@@ -644,7 +647,7 @@
       vg_assert(nyu->aMin <= nyu->aMax);
       /* copy vars into it */
       vg_assert(last->vars);
-      nyu->vars = VG_(cloneXA)( last->vars );
+      nyu->vars = VG_(cloneXA)( "di.storage.avta.2", last->vars );
       vg_assert(nyu->vars);
       VG_(OSetGen_Insert)( scope, nyu );
       last = nyu;
@@ -711,7 +714,7 @@
                   Addr   aMin,
                   Addr   aMax,
                   UChar* name, /* in di's .strchunks */
-                  Type*  type,
+                  UWord  typeR, /* a cuOff */
                   GExpr* gexpr,
                   GExpr* fbGX,
                   UChar* fileName, /* where decl'd - may be NULL.
@@ -722,11 +725,14 @@
    OSet* /* of DiAddrRange */ scope;
    DiVariable var;
    Bool       all;
+   TyEnt*     ent;
+
+   tl_assert(di && di->admin_tyents);
 
    if (0) {
       VG_(printf)("  ML_(addVar): level %d  %#lx-%#lx  %s :: ",
                   level, aMin, aMax, name );
-      ML_(pp_Type_C_ishly)( type );
+      ML_(pp_TyEnt_C_ishly)( di->admin_tyents, typeR );
       VG_(printf)("\n  Var=");
       ML_(pp_GX)(gexpr);
       VG_(printf)("\n");
@@ -743,9 +749,12 @@
    vg_assert(level >= 0);
    vg_assert(aMin <= aMax);
    vg_assert(name);
-   vg_assert(type);
    vg_assert(gexpr);
 
+   ent = ML_(TyEnts__index_by_cuOff)( di->admin_tyents, NULL, typeR);
+   tl_assert(ent);
+   vg_assert(ML_(TyEnt__is_type)(ent));
+
    /* "Comment_Regarding_Text_Range_Checks" (is referred to elsewhere)
       ----------------------------------------------------------------
       Ignore any variables whose aMin .. aMax (that is, range of text
@@ -781,7 +790,7 @@
       it.  We will never be able to actually relate a data address to
       a data object with zero size, so there's no point in storing
       info on it. */
-   if (ML_(sizeOfType)(type).b != True) {
+   if (ML_(sizeOfType)(di->admin_tyents, typeR).b != True) {
       static Int complaints = 10;
       if (VG_(clo_verbosity) >= 2 && complaints > 0) {
          VG_(message)(Vg_DebugMsg, 
@@ -794,7 +803,9 @@
    }
 
    if (!di->varinfo) {
-      di->varinfo = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
+      di->varinfo = VG_(newXA)( ML_(dinfo_zalloc), 
+                                "di.storage.addVar.1",
+                                ML_(dinfo_free),
                                 sizeof(OSet*) );
    }
 
@@ -804,7 +815,8 @@
       DiAddrRange* nyu;
       scope = VG_(OSetGen_Create)( offsetof(DiAddrRange,aMin), 
                                    ML_(cmp_for_DiAddrRange_range),
-                                   ML_(dinfo_zalloc), ML_(dinfo_free) );
+                                   ML_(dinfo_zalloc), "di.storage.addVar.2",
+                                   ML_(dinfo_free) );
       vg_assert(scope);
       if (0) VG_(printf)("create: scope = %p, adding at %ld\n",
                          scope, VG_(sizeXA)(di->varinfo));
@@ -818,7 +830,8 @@
       vg_assert(nyu);
       nyu->aMin = (Addr)0;
       nyu->aMax = ~(Addr)0;
-      nyu->vars = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
+      nyu->vars = VG_(newXA)( ML_(dinfo_zalloc), "di.storage.addVar.3",
+                              ML_(dinfo_free),
                               sizeof(DiVariable) );
       vg_assert(nyu->vars);
       VG_(OSetGen_Insert)( scope, nyu );
@@ -829,7 +842,7 @@
    vg_assert(scope);
 
    var.name     = name;
-   var.type     = type;
+   var.typeR    = typeR;
    var.gexpr    = gexpr;
    var.fbGX     = fbGX;
    var.fileName = fileName;
diff --git a/coregrind/m_debuginfo/tytypes.c b/coregrind/m_debuginfo/tytypes.c
index 6036ffc..a7a6652 100644
--- a/coregrind/m_debuginfo/tytypes.c
+++ b/coregrind/m_debuginfo/tytypes.c
@@ -44,320 +44,566 @@
 #include "priv_tytypes.h"      /* self */
 
 
-TyAdmin* ML_(new_TyAdmin) ( UWord cuOff ) {
-   TyAdmin* admin = ML_(dinfo_zalloc)( sizeof(TyAdmin) );
-   admin->cuOff = cuOff;
-   return admin;
-}
-TyAtom* ML_(new_TyAtom) ( UChar* name, Long value ) {
-   TyAtom* atom = ML_(dinfo_zalloc)( sizeof(TyAtom) );
-   atom->name  = name;
-   atom->value = value;
-   return atom;
-}
-TyField* ML_(new_TyField) ( UChar* name,
-                            Type* typeR, D3Expr* loc ) {
-   TyField* field = ML_(dinfo_zalloc)( sizeof(TyField) );
-   field->name  = name;
-   field->typeR = typeR;
-   field->loc   = loc;
-   return field;
-}
-TyBounds* ML_(new_TyBounds) ( void ) {
-   TyBounds* bounds = ML_(dinfo_zalloc)( sizeof(TyBounds) );
-   bounds->magic = TyBounds_MAGIC;
-   return bounds;
-}
-D3Expr* ML_(new_D3Expr) ( UChar* bytes, UWord nbytes ) {
-   D3Expr* expr = ML_(dinfo_zalloc)( sizeof(D3Expr) );
-   expr->bytes = bytes;
-   expr->nbytes = nbytes;
-   return expr;
-}
-Type* ML_(new_Type) ( void ) {
-   Type* type = ML_(dinfo_zalloc)( sizeof(Type) );
-   return type;
+/* Does this TyEnt denote a type, as opposed to some other kind of
+   thing? */
+
+Bool ML_(TyEnt__is_type)( TyEnt* te )
+{
+   switch (te->tag) {
+      case Te_EMPTY: case Te_INDIR: case Te_UNKNOWN: 
+      case Te_Atom:  case Te_Field: case Te_Bound:
+         return False;
+      case Te_TyBase:   case Te_TyPorR: case Te_TyTyDef:
+      case Te_TyStOrUn: case Te_TyEnum: case Te_TyArray:
+      case Te_TyFn:     case Te_TyQual: case Te_TyVoid:
+         return True;
+      default:
+         vg_assert(0);
+   }
 }
 
-static void delete_TyAtom ( TyAtom* atom ) {
-   /* .name is in DebugInfo.strchunks */
-   ML_(dinfo_free)(atom);
+
+/* Print a TyEnt, debug-style. */
+
+static void pp_XArray_of_cuOffs ( XArray* xa )
+{
+   Word i;
+   VG_(printf)("{");
+   for (i = 0; i < VG_(sizeXA)(xa); i++) {
+      UWord cuOff = *(UWord*)VG_(indexXA)(xa, i);
+      VG_(printf)("0x%05lx", cuOff);
+      if (i+1 < VG_(sizeXA)(xa))
+         VG_(printf)(",");
+   }
+   VG_(printf)("}");
 }
-static void delete_TyField ( TyField* field ) {
-   /* .name is in DebugInfo.strchunks */
-   /* typeR and loc will be on the admin list; no need to free */
-   ML_(dinfo_free)(field);
-}
-static void delete_TyBounds ( TyBounds* bounds ) {
-   ML_(dinfo_free)(bounds);
-}
-static void delete_D3Expr ( D3Expr* expr ) {
-   /* .bytes is in DebugInfo.strchunks */
-   ML_(dinfo_free)(expr);
-}
-static void delete_Type ( Type* ty ) {
-   switch (ty->tag) {
-      case Ty_Base:
-         /* .name is in DebugInfo.strchunks */
+
+void ML_(pp_TyEnt)( TyEnt* te )
+{
+   VG_(printf)("0x%05lx  ", te->cuOff);
+   switch (te->tag) {
+      case Te_EMPTY:
+         VG_(printf)("EMPTY");
          break;
-      case Ty_PorR:
-         /* typeR will be on the admin list */
+      case Te_INDIR:
+         VG_(printf)("INDIR(0x%05lx)", te->Te.INDIR.indR);
          break;
-      case Ty_TyDef:
-         /* .name is in DebugInfo.strchunks */
-         /* typeR will be on the admin list */
+      case Te_UNKNOWN:
+         VG_(printf)("UNKNOWN");
          break;
-      case Ty_StOrUn:
-         /* .name is in DebugInfo.strchunks */
-         /* Just dump the containing XArray.  The fields themselves
-            will be on the admin list. */
-         if (ty->Ty.StOrUn.fields)
-            VG_(deleteXA)(ty->Ty.StOrUn.fields);
+      case Te_Atom:
+         VG_(printf)("Te_Atom(%lld,\"%s\")",
+                     te->Te.Atom.value, te->Te.Atom.name);
          break;
-      case Ty_Enum:
-         /* .name is in DebugInfo.strchunks */
-         if (ty->Ty.Enum.atomRs)
-            VG_(deleteXA)( ty->Ty.Enum.atomRs);
-         /* Just dump the containing XArray.  The atoms themselves
-            will be on the admin list. */
+      case Te_Field:
+         VG_(printf)("Te_Field(ty=0x%05lx,nLoc=%lu,loc=%p,\"%s\")",
+                     te->Te.Field.typeR, te->Te.Field.nLoc,
+                     te->Te.Field.loc,
+                     te->Te.Field.name ? te->Te.Field.name : (UChar*)"");
          break;
-      case Ty_Array:
-         if (ty->Ty.Array.bounds)
-            VG_(deleteXA)( ty->Ty.Array.bounds);
-         /* Just dump the containing XArray.  The bounds themselves
-            will be on the admin list. */
+      case Te_Bound:
+         VG_(printf)("Te_Bound[");
+         if (te->Te.Bound.knownL)
+            VG_(printf)("%lld", te->Te.Bound.boundL);
+         else
+            VG_(printf)("??");
+         VG_(printf)(",");
+         if (te->Te.Bound.knownU)
+            VG_(printf)("%lld", te->Te.Bound.boundU);
+         else
+            VG_(printf)("??");
+         VG_(printf)("]");
          break;
-      case Ty_Fn:
+      case Te_TyBase:
+         VG_(printf)("Te_TyBase(%d,%c,\"%s\")",
+                     te->Te.TyBase.szB, te->Te.TyBase.enc,
+                     te->Te.TyBase.name ? te->Te.TyBase.name
+                                        : (UChar*)"(null)" );
          break;
-      case Ty_Qual:
-         /* typeR will be on the admin list */
+      case Te_TyPorR:
+         VG_(printf)("Te_TyPorR(%d,%c,0x%05lx)",
+                     te->Te.TyPorR.szB,
+                     te->Te.TyPorR.isPtr ? 'P' : 'R',
+                     te->Te.TyPorR.typeR);
          break;
-      case Ty_Void:
+      case Te_TyTyDef:
+         VG_(printf)("Te_TyTyDef(0x%05lx,\"%s\")",
+                     te->Te.TyTyDef.typeR,
+                     te->Te.TyTyDef.name ? te->Te.TyTyDef.name
+                                         : (UChar*)"" );
+         break;
+      case Te_TyStOrUn:
+         if (te->Te.TyStOrUn.complete) {
+            VG_(printf)("Te_TyStOrUn(%ld,%c,%p,\"%s\")",
+                        te->Te.TyStOrUn.szB, 
+                        te->Te.TyStOrUn.isStruct ? 'S' : 'U',
+                        te->Te.TyStOrUn.fieldRs,
+                        te->Te.TyStOrUn.name ? te->Te.TyStOrUn.name
+                                             : (UChar*)"" );
+            if (te->Te.TyStOrUn.fieldRs)
+               pp_XArray_of_cuOffs( te->Te.TyStOrUn.fieldRs );
+         } else {
+            VG_(printf)("Te_TyStOrUn(INCOMPLETE,\"%s\")",
+                        te->Te.TyStOrUn.name);
+         }
+         break;
+      case Te_TyEnum:
+         VG_(printf)("Te_TyEnum(%d,%p,\"%s\")",
+                     te->Te.TyEnum.szB, te->Te.TyEnum.atomRs,
+                     te->Te.TyEnum.name ? te->Te.TyEnum.name
+                                        : (UChar*)"" );
+         if (te->Te.TyEnum.atomRs)
+            pp_XArray_of_cuOffs( te->Te.TyEnum.atomRs );
+         break;
+      case Te_TyArray:
+         VG_(printf)("Te_TyArray(0x%05lx,%p)",
+                     te->Te.TyArray.typeR, te->Te.TyArray.boundRs);
+         if (te->Te.TyArray.boundRs)
+            pp_XArray_of_cuOffs( te->Te.TyArray.boundRs );
+         break;
+      case Te_TyFn:
+         VG_(printf)("Te_TyFn");
+         break;
+      case Te_TyQual:
+         VG_(printf)("Te_TyQual(%c,0x%05lx)", te->Te.TyQual.qual,
+                     te->Te.TyQual.typeR);
+         break;
+      case Te_TyVoid:
+         VG_(printf)("Te_TyVoid%s",
+                     te->Te.TyVoid.isFake ? "(fake)" : "");
          break;
       default:
          vg_assert(0);
    }
 }
 
-void ML_(delete_payload_of_TyAdmin) ( TyAdmin* ad ) {
-   vg_assert(ad);
-   vg_assert(ad->payload);
-   switch (ad->tag) {
-      case TyA_Type:   delete_Type(ad->payload);     break;
-      case TyA_Atom:   delete_TyAtom(ad->payload);   break;
-      case TyA_Expr:   delete_D3Expr(ad->payload);   break;
-      case TyA_Field:  delete_TyField(ad->payload);  break;
-      case TyA_Bounds: delete_TyBounds(ad->payload); break;
-      default:         vg_assert(0);
+
+/* Print a whole XArray of TyEnts, debug-style */
+
+void ML_(pp_TyEnts)( XArray* tyents, HChar* who )
+{
+   Word i, n;
+   VG_(printf)("------ %s ------\n", who);
+   n = VG_(sizeXA)( tyents );
+   for (i = 0; i < n; i++) {
+      TyEnt* tyent = (TyEnt*)VG_(indexXA)( tyents, i );
+      VG_(printf)("   [%5ld]  ", i);
+      ML_(pp_TyEnt)( tyent );
+      VG_(printf)("\n");
    }
 }
 
 
-static void pp_XArray_of_pointersOrRefs ( XArray* xa ) {
-   Word i;
-   VG_(printf)("{");
-   for (i = 0; i < VG_(sizeXA)(xa); i++) {
-      void* ptr = *(void**) VG_(indexXA)(xa, i);
-      VG_(printf)("0x%05lx", (unsigned long)(ptr));
-      if (i+1 < VG_(sizeXA)(xa))
-         VG_(printf)(",");
-   }
-   VG_(printf)("}");
-}
-void ML_(pp_TyAtom) ( TyAtom* atom ) {
-   VG_(printf)("TyAtom(%lld,\"%s\")", atom->value, atom->name);
-}
-void ML_(pp_D3Expr) ( D3Expr* expr ) {
-   VG_(printf)("D3Expr(%p,%lu)", expr->bytes, expr->nbytes);
-}
-void ML_(pp_TyField) ( TyField* field ) {
-   VG_(printf)("TyField(0x%05lx,%p,\"%s\")",
-               (unsigned long)(field->typeR), field->loc,
-               field->name ? field->name : (UChar*)"");
-}
-void ML_(pp_TyBounds) ( TyBounds* bounds ) {
-   vg_assert(bounds->magic == TyBounds_MAGIC);
-   VG_(printf)("TyBounds[");
-   if (bounds->knownL)
-      VG_(printf)("%lld", bounds->boundL);
-   else
-      VG_(printf)("??");
-   VG_(printf)(",");
-   if (bounds->knownU)
-      VG_(printf)("%lld", bounds->boundU);
-   else
-      VG_(printf)("??");
-   VG_(printf)("]");
-}
+/* Print a TyEnt, C style, chasing stuff as necessary. */
 
-static void pp_TyBounds_C_ishly ( TyBounds* bounds ) {
-   vg_assert(bounds->magic == TyBounds_MAGIC);
-   if (bounds->knownL && bounds->knownU && bounds->boundL == 0) {
-      VG_(printf)("[%lld]", 1 + bounds->boundU);
+static void pp_TyBound_C_ishly ( XArray* tyents, UWord cuOff )
+{
+   TyEnt* ent = ML_(TyEnts__index_by_cuOff)( tyents, NULL, cuOff );
+   if (!ent) {
+      VG_(printf)("**bounds-have-invalid-cuOff**");
+      return;
+   }
+   vg_assert(ent->tag == Te_Bound);
+   if (ent->Te.Bound.knownL && ent->Te.Bound.knownU
+       && ent->Te.Bound.boundL == 0) {
+      VG_(printf)("[%lld]", 1 + ent->Te.Bound.boundU);
    }
    else
-   if (bounds->knownL && (!bounds->knownU) && bounds->boundL == 0) {
+   if (ent->Te.Bound.knownL && (!ent->Te.Bound.knownU) 
+       && ent->Te.Bound.boundL == 0) {
       VG_(printf)("[]");
    }
    else
-      ML_(pp_TyBounds)( bounds );
+      ML_(pp_TyEnt)( ent );
 }
 
-
-void ML_(pp_Type) ( Type* ty )
+void ML_(pp_TyEnt_C_ishly)( XArray* /* of TyEnt */ tyents,
+                            UWord cuOff )
 {
-   if (!ty) {
-      VG_(printf)("**type=NULL**");
+   TyEnt* ent = ML_(TyEnts__index_by_cuOff)( tyents, NULL, cuOff );
+   if (!ent) {
+      VG_(printf)("**type-has-invalid-cuOff**");
       return;
    }
-   switch (ty->tag) {
-      case Ty_Base:
-         VG_(printf)("Ty_Base(%d,%c,\"%s\")",
-                     ty->Ty.Base.szB, ty->Ty.Base.enc,
-                     ty->Ty.Base.name ? ty->Ty.Base.name
-                                        : (UChar*)"(null)" );
+   switch (ent->tag) {
+      case Te_TyBase:
+         if (!ent->Te.TyBase.name) goto unhandled;
+         VG_(printf)("%s", ent->Te.TyBase.name);
          break;
-      case Ty_PorR:
-         VG_(printf)("Ty_PorR(%d,%c,0x%05lx)",
-                     ty->Ty.PorR.szB,
-                     ty->Ty.PorR.isPtr ? 'P' : 'R',
-                     (unsigned long)(ty->Ty.PorR.typeR));
+      case Te_TyPorR:
+         ML_(pp_TyEnt_C_ishly)(tyents, ent->Te.TyPorR.typeR);
+         VG_(printf)("%s", ent->Te.TyPorR.isPtr ? "*" : "&");
          break;
-      case Ty_Enum:
-         VG_(printf)("Ty_Enum(%d,%p,\"%s\")",
-                     ty->Ty.Enum.szB, ty->Ty.Enum.atomRs,
-                     ty->Ty.Enum.name ? ty->Ty.Enum.name
-                                        : (UChar*)"" );
-         if (ty->Ty.Enum.atomRs)
-            pp_XArray_of_pointersOrRefs( ty->Ty.Enum.atomRs );
+      case Te_TyEnum:
+         if (!ent->Te.TyEnum.name) goto unhandled;
+         VG_(printf)("enum %s", ent->Te.TyEnum.name);
          break;
-      case Ty_StOrUn:
-         if (ty->Ty.StOrUn.complete) {
-            VG_(printf)("Ty_StOrUn(%ld,%c,%p,\"%s\")",
-                        ty->Ty.StOrUn.szB, 
-                        ty->Ty.StOrUn.isStruct ? 'S' : 'U',
-                        ty->Ty.StOrUn.fields,
-                        ty->Ty.StOrUn.name ? ty->Ty.StOrUn.name
-                                             : (UChar*)"" );
-            if (ty->Ty.StOrUn.fields)
-               pp_XArray_of_pointersOrRefs( ty->Ty.StOrUn.fields );
-         } else {
-            VG_(printf)("Ty_StOrUn(INCOMPLETE,\"%s\")",
-                        ty->Ty.StOrUn.name);
-         }
-         break;
-      case Ty_Array:
-         VG_(printf)("Ty_Array(0x%05lx,%p)",
-                     (unsigned long)(ty->Ty.Array.typeR), ty->Ty.Array.bounds);
-         if (ty->Ty.Array.bounds)
-            pp_XArray_of_pointersOrRefs( ty->Ty.Array.bounds );
-         break;
-      case Ty_TyDef:
-         VG_(printf)("Ty_TyDef(0x%05lx,\"%s\")",
-                     (unsigned long)(ty->Ty.TyDef.typeR),
-                     ty->Ty.TyDef.name ? ty->Ty.TyDef.name
-                                         : (UChar*)"" );
-         break;
-      case Ty_Fn:
-         VG_(printf)("Ty_Fn");
-         break;
-      case Ty_Qual:
-         VG_(printf)("Ty_Qual(%c,0x%05lx)", ty->Ty.Qual.qual,
-                     (unsigned long)(ty->Ty.Qual.typeR));
-         break;
-      case Ty_Void:
-         VG_(printf)("Ty_Void%s",
-                     ty->Ty.Void.isFake ? "(fake)" : "");
-         break;
-      default: VG_(printf)("pp_Type:???");
-         break;
-   }
-}
-void ML_(pp_TyAdmin) ( TyAdmin* admin ) {
-   if (admin->cuOff != -1UL) {
-      VG_(printf)("<%05lx,%p> ", admin->cuOff, admin->payload);
-   } else {
-      VG_(printf)("<ff..f,%p> ", admin->payload);
-   }
-   switch (admin->tag) {
-      case TyA_Type:   ML_(pp_Type)(admin->payload);     break;
-      case TyA_Atom:   ML_(pp_TyAtom)(admin->payload);   break;
-      case TyA_Expr:   ML_(pp_D3Expr)(admin->payload);   break;
-      case TyA_Field:  ML_(pp_TyField)(admin->payload);  break;
-      case TyA_Bounds: ML_(pp_TyBounds)(admin->payload); break;
-      default:         VG_(printf)("pp_TyAdmin:???");    break;
-   }
-}
-
-/* NOTE: this assumes that the types have all been 'resolved' (that
-   is, inter-type references expressed as .debug_info offsets have
-   been converted into pointers) */
-void ML_(pp_Type_C_ishly) ( Type* ty )
-{
-   if (!ty) {
-      VG_(printf)("**type=NULL**");
-      return;
-   }
-   switch (ty->tag) {
-      case Ty_Base:
-         if (!ty->Ty.Base.name) goto unhandled;
-         VG_(printf)("%s", ty->Ty.Base.name);
-         break;
-      case Ty_PorR:
-         ML_(pp_Type_C_ishly)(ty->Ty.PorR.typeR);
-         VG_(printf)("%s", ty->Ty.PorR.isPtr ? "*" : "&");
-         break;
-      case Ty_Enum:
-         if (!ty->Ty.Enum.name) goto unhandled;
-         VG_(printf)("enum %s", ty->Ty.Enum.name);
-         break;
-      case Ty_StOrUn:
-         if (!ty->Ty.StOrUn.name) goto unhandled;
+      case Te_TyStOrUn:
+         if (!ent->Te.TyStOrUn.name) goto unhandled;
          VG_(printf)("%s %s",
-                     ty->Ty.StOrUn.isStruct ? "struct" : "union",
-                     ty->Ty.StOrUn.name);
+                     ent->Te.TyStOrUn.isStruct ? "struct" : "union",
+                     ent->Te.TyStOrUn.name);
          break;
-      case Ty_Array:
-         ML_(pp_Type_C_ishly)(ty->Ty.Array.typeR);
-         if (ty->Ty.Array.bounds) {
+      case Te_TyArray:
+         ML_(pp_TyEnt_C_ishly)(tyents, ent->Te.TyArray.typeR);
+         if (ent->Te.TyArray.boundRs) {
             Word    w;
-            XArray* xa = ty->Ty.Array.bounds;
+            XArray* xa = ent->Te.TyArray.boundRs;
             for (w = 0; w < VG_(sizeXA)(xa); w++) {
-               pp_TyBounds_C_ishly( *(TyBounds**)VG_(indexXA)(xa, w) );
+               pp_TyBound_C_ishly( tyents, *(UWord*)VG_(indexXA)(xa, w) );
             }
          } else {
             VG_(printf)("%s", "[??]");
          }
          break;
-      case Ty_TyDef:
-         if (!ty->Ty.TyDef.name) goto unhandled;
-         VG_(printf)("%s", ty->Ty.TyDef.name);
+      case Te_TyTyDef:
+         if (!ent->Te.TyTyDef.name) goto unhandled;
+         VG_(printf)("%s", ent->Te.TyTyDef.name);
          break;
-      case Ty_Fn:
+      case Te_TyFn:
          VG_(printf)("%s", "<function_type>");
          break;
-      case Ty_Qual:
-         switch (ty->Ty.Qual.qual) {
+      case Te_TyQual:
+         switch (ent->Te.TyQual.qual) {
             case 'C': VG_(printf)("const "); break;
             case 'V': VG_(printf)("volatile "); break;
             default: goto unhandled;
          }
-         ML_(pp_Type_C_ishly)(ty->Ty.Qual.typeR);
+         ML_(pp_TyEnt_C_ishly)(tyents, ent->Te.TyQual.typeR);
          break;
-      case Ty_Void:
+      case Te_TyVoid:
          VG_(printf)("%svoid",
-                     ty->Ty.Void.isFake ? "fake" : "");
+                     ent->Te.TyVoid.isFake ? "fake" : "");
          break;
-      default: VG_(printf)("pp_Type_C_ishly:???");
-         break;
+      default:
+         goto unhandled;
    }
    return;
 
   unhandled:
-   ML_(pp_Type)(ty);
+   VG_(printf)("pp_TyEnt_C_ishly:unhandled: ");
+   ML_(pp_TyEnt)(ent);
+   vg_assert(0);
 }
 
 
+/* 'ents' is an XArray of TyEnts, sorted by their .cuOff fields.  Find
+   the entry which has .cuOff field as specified.  Returns NULL if not
+   found.  Asserts if more than one entry has the specified .cuOff
+   value. */
+
+void ML_(TyEntIndexCache__invalidate) ( TyEntIndexCache* cache )
+{
+   Word i;
+   for (i = 0; i < N_TYENT_INDEX_CACHE; i++) {
+      cache->ce[i].cuOff0 = 0;    /* not actually necessary */
+      cache->ce[i].ent0   = NULL; /* "invalid entry" */
+      cache->ce[i].cuOff1 = 0;    /* not actually necessary */
+      cache->ce[i].ent1   = NULL; /* "invalid entry" */
+   }
+}
+
+TyEnt* ML_(TyEnts__index_by_cuOff) ( XArray* /* of TyEnt */ ents,
+                                     TyEntIndexCache* cache,
+                                     UWord cuOff_to_find )
+{
+   Bool  found;
+   Word  first, last;
+   TyEnt key, *res;
+
+   /* crude stats, aggregated over all caches */
+   static UWord cacheQs = 0 - 1;
+   static UWord cacheHits = 0;
+
+   if (0 && 0 == (cacheQs & 0xFFFF))
+      VG_(printf)("cache: %'lu queries, %'lu misses\n", 
+                  cacheQs, cacheQs - cacheHits);
+
+   if (LIKELY(cache != NULL)) {
+      UWord h = cuOff_to_find % (UWord)N_TYENT_INDEX_CACHE;
+      cacheQs++;
+      // dude, like, way 0, dude.
+      if (cache->ce[h].cuOff0 == cuOff_to_find && cache->ce[h].ent0 != NULL) {
+         // dude, way 0 is a total hit!
+         cacheHits++;
+         return cache->ce[h].ent0;
+      }
+      // dude, check out way 1, dude.
+      if (cache->ce[h].cuOff1 == cuOff_to_find && cache->ce[h].ent1 != NULL) {
+         // way 1 hit
+         UWord  tc;
+         TyEnt* te;
+         cacheHits++;
+         // dude, way 1 is the new way 0.  move with the times, dude.
+         tc = cache->ce[h].cuOff0;
+         te = cache->ce[h].ent0;
+         cache->ce[h].cuOff0 = cache->ce[h].cuOff1;
+         cache->ce[h].ent0   = cache->ce[h].ent1;
+         cache->ce[h].cuOff1 = tc;
+         cache->ce[h].ent1   = te;
+         return cache->ce[h].ent0;
+      }
+   }
+
+   /* We'll have to do it the hard way */
+   key.cuOff = cuOff_to_find;
+   key.tag   = Te_EMPTY;
+   found = VG_(lookupXA)( ents, &key, &first, &last );
+   //found = VG_(lookupXA_UNBOXED)( ents, cuOff_to_find, &first, &last, 
+   //                               offsetof(TyEnt,cuOff) );
+   if (!found)
+      return NULL;
+   /* If this fails, the array is invalid in the sense that there is
+      more than one entry with .cuOff == cuOff_to_find. */
+   vg_assert(first == last);
+   res = (TyEnt*)VG_(indexXA)( ents, first );
+
+   if (LIKELY(cache != NULL) && LIKELY(res != NULL)) {
+      /* this is a bit stupid, computing this twice.  Oh well.
+         Perhaps some magic gcc transformation will common them up.
+         re "res != NULL", since .ent of NULL denotes 'invalid entry',
+         we can't cache the result when res == NULL. */
+      UWord h = cuOff_to_find % (UWord)N_TYENT_INDEX_CACHE;
+      cache->ce[h].cuOff1 = cache->ce[h].cuOff0;
+      cache->ce[h].ent1   = cache->ce[h].ent0;
+      cache->ce[h].cuOff0 = cuOff_to_find;
+      cache->ce[h].ent0   = res;
+   }
+
+   return res;
+}
+
+
+/* Generates a total ordering on TyEnts based only on their .cuOff
+   fields. */
+
+Word ML_(TyEnt__cmp_by_cuOff_only) ( TyEnt* te1, TyEnt* te2 )
+{
+   if (te1->cuOff < te2->cuOff) return -1;
+   if (te1->cuOff > te2->cuOff) return 1;
+   return 0;
+}
+
+
+/* Generates a total ordering on TyEnts based on everything except
+   their .cuOff fields. */
+static __attribute__((always_inline)) Word UWord__cmp ( UWord a, UWord b ) {
+   if (a < b) return -1;
+   if (a > b) return 1;
+   return 0;
+}
+static __attribute__((always_inline)) Word Long__cmp ( Long a, Long b ) {
+   if (a < b) return -1;
+   if (a > b) return 1;
+   return 0;
+}
+static __attribute__((always_inline)) Word Bool__cmp ( Bool a, Bool b ) {
+   vg_assert( ((UWord)a) <= 1 );
+   vg_assert( ((UWord)b) <= 1 );
+   if (a < b) return -1;
+   if (a > b) return 1;
+   return 0;
+}
+static __attribute__((always_inline)) Word UChar__cmp ( UChar a, UChar b ) {
+   if (a < b) return -1;
+   if (a > b) return 1;
+   return 0;
+}
+static __attribute__((always_inline)) Word Int__cmp ( Int a, Int b ) {
+   if (a < b) return -1;
+   if (a > b) return 1;
+   return 0;
+}
+static Word XArray_of_UWord__cmp ( XArray* a, XArray* b ) {
+   Word i, r;
+   Word aN = VG_(sizeXA)( a );
+   Word bN = VG_(sizeXA)( b );
+   if (aN < bN) return -1;
+   if (aN > bN) return 1;
+   for (i = 0; i < aN; i++) {
+      r = UWord__cmp( *(UWord*)VG_(indexXA)( a, i ),
+                      *(UWord*)VG_(indexXA)( b, i ) );
+      if (r != 0) return r;
+   }
+   return 0;
+}
+static Word Bytevector__cmp ( UChar* a, UChar* b, Word n ) {
+   Word i, r;
+   vg_assert(n >= 0);
+   for (i = 0; i < n; i++) {
+      r = UChar__cmp( a[i], b[i] );
+      if (r != 0) return r;
+   }
+   return 0;
+}
+static Word Asciiz__cmp ( UChar* a, UChar* b ) {
+   /* A wrapper around strcmp that handles NULL strings safely. */
+   if (a == NULL && b == NULL) return 0;
+   if (a == NULL && b != NULL) return -1;
+   if (a != NULL && b == NULL) return 1;
+   return VG_(strcmp)(a, b);
+}
+
+Word ML_(TyEnt__cmp_by_all_except_cuOff) ( TyEnt* te1, TyEnt* te2 )
+{
+   Word r;
+   if (te1->tag < te2->tag) return -1;
+   if (te1->tag > te2->tag) return 1;
+   switch (te1->tag) {
+   case Te_EMPTY:
+      return 0;
+   case Te_INDIR:
+      r = UWord__cmp(te1->Te.INDIR.indR, te2->Te.INDIR.indR);
+      return r;
+   case Te_Atom:
+      r = Long__cmp(te1->Te.Atom.value, te2->Te.Atom.value);
+      if (r != 0) return r;
+      r = Asciiz__cmp(te1->Te.Atom.name, te2->Te.Atom.name);
+      return r;
+   case Te_Field:
+      r = Bool__cmp(te1->Te.Field.isStruct, te2->Te.Field.isStruct);
+      if (r != 0) return r;
+      r = UWord__cmp(te1->Te.Field.typeR, te2->Te.Field.typeR);
+      if (r != 0) return r;
+      r = Asciiz__cmp(te1->Te.Field.name, te2->Te.Field.name);
+      if (r != 0) return r;
+      r = UWord__cmp(te1->Te.Field.nLoc, te2->Te.Field.nLoc);
+      if (r != 0) return r;
+      r = Bytevector__cmp(te1->Te.Field.loc, te2->Te.Field.loc,
+                          te1->Te.Field.nLoc);
+      return r;
+   case Te_Bound:
+      r = Bool__cmp(te1->Te.Bound.knownL, te2->Te.Bound.knownL);
+      if (r != 0) return r;
+      r = Bool__cmp(te1->Te.Bound.knownU, te2->Te.Bound.knownU);
+      if (r != 0) return r;
+      r = Long__cmp(te1->Te.Bound.boundL, te2->Te.Bound.boundL);
+      if (r != 0) return r;
+      r = Long__cmp(te1->Te.Bound.boundU, te2->Te.Bound.boundU);
+      return r;
+   case Te_TyBase:
+      r = UChar__cmp(te1->Te.TyBase.enc, te2->Te.TyBase.enc);
+      if (r != 0) return r;
+      r = Int__cmp(te1->Te.TyBase.szB, te2->Te.TyBase.szB);
+      if (r != 0) return r;
+      r = Asciiz__cmp(te1->Te.TyBase.name, te2->Te.TyBase.name);
+      return r;
+   case Te_TyPorR:
+      r = Int__cmp(te1->Te.TyPorR.szB, te2->Te.TyPorR.szB);
+      if (r != 0) return r;
+      r = UWord__cmp(te1->Te.TyPorR.typeR, te2->Te.TyPorR.typeR);
+      if (r != 0) return r;
+      r = Bool__cmp(te1->Te.TyPorR.isPtr, te2->Te.TyPorR.isPtr);
+      return r;
+   case Te_TyTyDef:
+      r = UWord__cmp(te1->Te.TyTyDef.typeR, te2->Te.TyTyDef.typeR);
+      if (r != 0) return r;
+      r = Asciiz__cmp(te1->Te.TyTyDef.name, te2->Te.TyTyDef.name);
+      return r;
+   case Te_TyStOrUn:
+      r = Bool__cmp(te1->Te.TyStOrUn.isStruct, te2->Te.TyStOrUn.isStruct);
+      if (r != 0) return r;
+      r = Bool__cmp(te1->Te.TyStOrUn.complete, te2->Te.TyStOrUn.complete);
+      if (r != 0) return r;
+      r = UWord__cmp(te1->Te.TyStOrUn.szB, te2->Te.TyStOrUn.szB);
+      if (r != 0) return r;
+      r = Asciiz__cmp(te1->Te.TyStOrUn.name, te2->Te.TyStOrUn.name);
+      if (r != 0) return r;
+      r = XArray_of_UWord__cmp(te1->Te.TyStOrUn.fieldRs,
+                               te2->Te.TyStOrUn.fieldRs);
+      return r;
+   case Te_TyEnum:
+      r = Int__cmp(te1->Te.TyEnum.szB, te2->Te.TyEnum.szB);
+      if (r != 0) return r;
+      r = Asciiz__cmp(te1->Te.TyEnum.name, te2->Te.TyEnum.name);
+      if (r != 0) return r;
+      r = XArray_of_UWord__cmp(te1->Te.TyEnum.atomRs, te2->Te.TyEnum.atomRs);
+      return r;
+   case Te_TyArray:
+      r = UWord__cmp(te1->Te.TyArray.typeR, te2->Te.TyArray.typeR);
+      if (r != 0) return r;
+      r = XArray_of_UWord__cmp(te1->Te.TyArray.boundRs,
+                               te2->Te.TyArray.boundRs);
+      return r;
+   case Te_TyFn:
+      return 0;
+   case Te_TyQual:
+      r = UWord__cmp(te1->Te.TyQual.typeR, te2->Te.TyQual.typeR);
+      if (r != 0) return r;
+      r = UChar__cmp(te1->Te.TyQual.qual, te2->Te.TyQual.qual);
+      return r;
+   case Te_TyVoid:
+      r = Bool__cmp(te1->Te.TyVoid.isFake, te2->Te.TyVoid.isFake);
+      return r;
+   default:
+      vg_assert(0);
+   }
+}
+
+
+/* Free up all directly or indirectly heap-allocated stuff attached to
+   this TyEnt, and set its tag to Te_EMPTY.  The .cuOff field is
+   unchanged. */
+
+void ML_(TyEnt__make_EMPTY) ( TyEnt* te )
+{
+   UWord saved_cuOff;
+   /* First, free up any fields in mallocville. */
+   switch (te->tag) {
+      case Te_EMPTY:
+         break;
+      case Te_INDIR:
+         break;
+      case Te_UNKNOWN:
+         break;
+      case Te_Atom:
+         if (te->Te.Atom.name) ML_(dinfo_free)(te->Te.Atom.name);
+         break;
+      case Te_Field:
+         if (te->Te.Field.name) ML_(dinfo_free)(te->Te.Field.name);
+         if (te->Te.Field.loc) ML_(dinfo_free)(te->Te.Field.loc);
+         break;
+      case Te_Bound:
+         break;
+      case Te_TyBase:
+         if (te->Te.TyBase.name) ML_(dinfo_free)(te->Te.TyBase.name);
+         break;
+      case Te_TyPorR:
+         break;
+      case Te_TyTyDef:
+         if (te->Te.TyTyDef.name) ML_(dinfo_free)(te->Te.TyTyDef.name);
+         break;
+      case Te_TyStOrUn:
+         if (te->Te.TyStOrUn.name) ML_(dinfo_free)(te->Te.TyStOrUn.name);
+         if (te->Te.TyStOrUn.fieldRs) VG_(deleteXA)(te->Te.TyStOrUn.fieldRs);
+         break;
+      case Te_TyEnum:
+         if (te->Te.TyEnum.name) ML_(dinfo_free)(te->Te.TyEnum.name);
+         if (te->Te.TyEnum.atomRs) VG_(deleteXA)(te->Te.TyEnum.atomRs);
+         break;
+      case Te_TyArray:
+         if (te->Te.TyArray.boundRs) VG_(deleteXA)(te->Te.TyArray.boundRs);
+         break;
+      case Te_TyFn:
+         break;
+      case Te_TyQual:
+         break;
+      case Te_TyVoid:
+         break;
+      default:
+         vg_assert(0);
+   }
+   /* Now clear it out and set to Te_EMPTY. */
+   saved_cuOff = te->cuOff;
+   VG_(memset)(te, 0, sizeof(*te));
+   te->cuOff = saved_cuOff;
+   te->tag = Te_EMPTY;
+}
+
+
+/* How big is this type?  If .b in the returned struct is False, the
+   size is unknown. */
+
 static MaybeUWord mk_MaybeUWord_Nothing ( void ) {
    MaybeUWord muw;
    muw.w = 0;
@@ -377,56 +623,71 @@
    return muw1;
 }
 
-/* How big is this type?  (post-resolved only) */
-/* FIXME: check all pointers before dereferencing */
-MaybeUWord ML_(sizeOfType)( Type* ty )
+MaybeUWord ML_(sizeOfType)( XArray* /* of TyEnt */ tyents,
+                            UWord cuOff )
 {
    Word       i;
    MaybeUWord eszB;
-   vg_assert(ty);
-   switch (ty->tag) {
-      case Ty_Base:
-         vg_assert(ty->Ty.Base.szB > 0);
-         return mk_MaybeUWord_Just( ty->Ty.Base.szB );
-      case Ty_Qual:
-         return ML_(sizeOfType)( ty->Ty.Qual.typeR );
-      case Ty_TyDef:
-         if (!ty->Ty.TyDef.typeR)
+   TyEnt*     ent = ML_(TyEnts__index_by_cuOff)(tyents, NULL, cuOff);
+   TyEnt*     ent2;
+   vg_assert(ent);
+   vg_assert(ML_(TyEnt__is_type)(ent));
+   switch (ent->tag) {
+      case Te_TyBase:
+         vg_assert(ent->Te.TyBase.szB > 0);
+         return mk_MaybeUWord_Just( ent->Te.TyBase.szB );
+      case Te_TyQual:
+         return ML_(sizeOfType)( tyents, ent->Te.TyQual.typeR );
+      case Te_TyTyDef:
+         ent2 = ML_(TyEnts__index_by_cuOff)(tyents, NULL,
+                                            ent->Te.TyTyDef.typeR);
+         vg_assert(ent2);
+         if (ent2->tag == Te_UNKNOWN)
             return mk_MaybeUWord_Nothing(); /*UNKNOWN*/
-         return ML_(sizeOfType)( ty->Ty.TyDef.typeR );
-      case Ty_PorR:
-         vg_assert(ty->Ty.PorR.szB == 4 || ty->Ty.PorR.szB == 8);
-         return mk_MaybeUWord_Just( ty->Ty.PorR.szB );
-      case Ty_StOrUn:
-         return ty->Ty.StOrUn.complete
-                   ? mk_MaybeUWord_Just( ty->Ty.StOrUn.szB )
+         return ML_(sizeOfType)( tyents, ent->Te.TyTyDef.typeR );
+      case Te_TyPorR:
+         vg_assert(ent->Te.TyPorR.szB == 4 || ent->Te.TyPorR.szB == 8);
+         return mk_MaybeUWord_Just( ent->Te.TyPorR.szB );
+      case Te_TyStOrUn:
+         return ent->Te.TyStOrUn.complete
+                   ? mk_MaybeUWord_Just( ent->Te.TyStOrUn.szB )
                    : mk_MaybeUWord_Nothing();
-      case Ty_Enum:
-         return mk_MaybeUWord_Just( ty->Ty.Enum.szB );
-      case Ty_Array:
-         if (!ty->Ty.Array.typeR)
+      case Te_TyEnum:
+         return mk_MaybeUWord_Just( ent->Te.TyEnum.szB );
+      case Te_TyArray:
+         ent2 = ML_(TyEnts__index_by_cuOff)(tyents, NULL,
+                                            ent->Te.TyArray.typeR);
+         vg_assert(ent2);
+         if (ent2->tag == Te_UNKNOWN)
             return mk_MaybeUWord_Nothing(); /*UNKNOWN*/
-         eszB = ML_(sizeOfType)( ty->Ty.Array.typeR );
-         for (i = 0; i < VG_(sizeXA)( ty->Ty.Array.bounds ); i++) {
-            TyBounds* bo
-               = *(TyBounds**)VG_(indexXA)(ty->Ty.Array.bounds, i);
+         eszB = ML_(sizeOfType)( tyents, ent->Te.TyArray.typeR );
+         for (i = 0; i < VG_(sizeXA)( ent->Te.TyArray.boundRs ); i++) {
+            UWord bo_cuOff
+               = *(UWord*)VG_(indexXA)(ent->Te.TyArray.boundRs, i);
+            TyEnt* bo
+              = ML_(TyEnts__index_by_cuOff)( tyents, NULL, bo_cuOff );
             vg_assert(bo);
-            if (!(bo->knownL && bo->knownU))
+            vg_assert(bo->tag == Te_Bound);
+            if (!(bo->Te.Bound.knownL && bo->Te.Bound.knownU))
                return mk_MaybeUWord_Nothing(); /*UNKNOWN*/
             eszB = mul_MaybeUWord( 
                       eszB,
-                      mk_MaybeUWord_Just( bo->boundU - bo->boundL + 1 ));
+                      mk_MaybeUWord_Just( bo->Te.Bound.boundU 
+                                          - bo->Te.Bound.boundL + 1 ));
          }
          return eszB;
       default:
          VG_(printf)("ML_(sizeOfType): unhandled: ");
-         ML_(pp_Type)(ty);
+         ML_(pp_TyEnt)(ent);
          VG_(printf)("\n");
          vg_assert(0);
    }
 }
 
 
+/* Describe where in the type 'offset' falls.  Caller must
+   deallocate the resulting XArray. */
+
 static void copy_UWord_into_XA ( XArray* /* of UChar */ xa,
                                  UWord uw ) {
    UChar buf[32];
@@ -435,42 +696,52 @@
    VG_(addBytesToXA)( xa, buf, VG_(strlen)(buf));
 }
 
-/* Describe where in the type 'offset' falls.  Caller must
-   deallocate the resulting XArray. */
 XArray* /*UChar*/ ML_(describe_type)( /*OUT*/OffT* residual_offset,
-                                      Type* ty, OffT offset )
+                                      XArray* /* of TyEnt */ tyents,
+                                      UWord ty_cuOff, 
+                                      OffT offset )
 {
-   XArray* xa = VG_(newXA)( ML_(dinfo_zalloc), ML_(dinfo_free),
+   TyEnt*  ty;
+   XArray* xa = VG_(newXA)( ML_(dinfo_zalloc), "di.tytypes.dt.1",
+                            ML_(dinfo_free),
                             sizeof(UChar) );
    vg_assert(xa);
 
+   ty = ML_(TyEnts__index_by_cuOff)(tyents, NULL, ty_cuOff);
+
    while (True) {
       vg_assert(ty);
+      vg_assert(ML_(TyEnt__is_type)(ty));
 
       switch (ty->tag) {
 
          /* These are all atomic types; there is nothing useful we can
             do. */
-         case Ty_Enum:
-         case Ty_Fn:
-         case Ty_Void:
-         case Ty_PorR:
-         case Ty_Base:
+         case Te_TyEnum:
+         case Te_TyFn:
+         case Te_TyVoid:
+         case Te_TyPorR:
+         case Te_TyBase:
             goto done;
 
-         case Ty_StOrUn: {
+         case Te_TyStOrUn: {
             Word       i;
             GXResult   res;
             MaybeUWord muw;
-            TyField    *field = NULL, *fields;
+            XArray*    fieldRs;
+            UWord      fieldR;
+            TyEnt*     field = NULL;
             OffT       offMin = 0, offMax1 = 0;
-            if (!ty->Ty.StOrUn.isStruct) goto done;
-            fields = ty->Ty.StOrUn.fields;
-            if ((!fields) || VG_(sizeXA)(fields) == 0) goto done;
-            for (i = 0; i < VG_(sizeXA)( fields ); i++ ) {
-               field = *(TyField**)VG_(indexXA)( fields, i );
+            if (!ty->Te.TyStOrUn.isStruct) goto done;
+            fieldRs = ty->Te.TyStOrUn.fieldRs;
+            if ((!fieldRs) || VG_(sizeXA)(fieldRs) == 0) goto done;
+            for (i = 0; i < VG_(sizeXA)( fieldRs ); i++ ) {
+               fieldR = *(UWord*)VG_(indexXA)( fieldRs, i );
+               field = ML_(TyEnts__index_by_cuOff)(tyents, NULL, fieldR);
                vg_assert(field);
-               vg_assert(field->loc);
+               vg_assert(field->tag == Te_Field);
+               vg_assert(field->Te.Field.loc);
+               vg_assert(field->Te.Field.nLoc > 0);
                /* Re data_bias in this call, we should really send in
                   a legitimate value.  But the expression is expected
                   to be a constant expression, evaluation of which
@@ -479,7 +750,7 @@
                   to this point (if, indeed, it has any meaning; from
                   which DebugInfo would we take the data bias? */
                res = ML_(evaluate_Dwarf3_Expr)(
-                       field->loc->bytes, field->loc->nbytes,
+                       field->Te.Field.loc, field->Te.Field.nLoc,
                        NULL/*fbGX*/, NULL/*RegSummary*/,
                        0/*data_bias*/,
                        True/*push_initial_zero*/);
@@ -490,7 +761,7 @@
                }
                if (res.kind != GXR_Value)
                   continue;
-               muw = ML_(sizeOfType)( field->typeR );
+               muw = ML_(sizeOfType)( tyents, field->Te.Field.typeR );
                if (muw.b != True)
                   goto done; /* size of field is unknown (?!) */
                offMin  = res.word;
@@ -502,39 +773,51 @@
                   break;
             }
             /* Did we find a suitable field? */
-            vg_assert(i >= 0 && i <= VG_(sizeXA)( fields ));
-            if (i == VG_(sizeXA)( fields ))
+            vg_assert(i >= 0 && i <= VG_(sizeXA)( fieldRs ));
+            if (i == VG_(sizeXA)( fieldRs ))
                goto done; /* No.  Give up. */
             /* Yes.  'field' is it. */
-            if (!field->name) goto done;
+            vg_assert(field);
+            if (!field->Te.Field.name) goto done;
             VG_(addBytesToXA)( xa, ".", 1 );
-            VG_(addBytesToXA)( xa, field->name,
-                               VG_(strlen)(field->name) );
+            VG_(addBytesToXA)( xa, field->Te.Field.name,
+                               VG_(strlen)(field->Te.Field.name) );
             offset -= offMin;
-            ty = field->typeR;
-            if (!ty) goto done;
+            ty = ML_(TyEnts__index_by_cuOff)(tyents, NULL,
+                                             field->Te.Field.typeR );
+            tl_assert(ty);
+            if (ty->tag == Te_UNKNOWN) goto done;
             /* keep going; look inside the field. */
             break;
          }
 
-         case Ty_Array: {
+         case Te_TyArray: {
             MaybeUWord muw;
-            TyBounds*  bounds;
             UWord      size, eszB, ix;
+            UWord      boundR;
+            TyEnt*     elemTy;
+            TyEnt*     bound;
             /* Just deal with the simple, common C-case: 1-D array,
                zero based, known size. */
-            if (!(ty->Ty.Array.typeR && ty->Ty.Array.bounds))
+            elemTy = ML_(TyEnts__index_by_cuOff)(tyents, NULL, 
+                                                 ty->Te.TyArray.typeR);
+            vg_assert(elemTy);
+            if (elemTy->tag == Te_UNKNOWN) goto done;
+            vg_assert(ML_(TyEnt__is_type)(elemTy));
+            if (!ty->Te.TyArray.boundRs)
                goto done;
-            if (VG_(sizeXA)( ty->Ty.Array.bounds ) != 1) goto done;
-            bounds = *(TyBounds**)VG_(indexXA)( ty->Ty.Array.bounds, 0 );
-            vg_assert(bounds);
-            vg_assert(bounds->magic == TyBounds_MAGIC);
-            if (!(bounds->knownL && bounds->knownU && bounds->boundL == 0
-                  && bounds->boundU >= bounds->boundL))
+            if (VG_(sizeXA)( ty->Te.TyArray.boundRs ) != 1) goto done;
+            boundR = *(UWord*)VG_(indexXA)( ty->Te.TyArray.boundRs, 0 );
+            bound = ML_(TyEnts__index_by_cuOff)(tyents, NULL, boundR);
+            vg_assert(bound);
+            vg_assert(bound->tag == Te_Bound);
+            if (!(bound->Te.Bound.knownL && bound->Te.Bound.knownU
+                  && bound->Te.Bound.boundL == 0
+                  && bound->Te.Bound.boundU >= bound->Te.Bound.boundL))
                goto done;
-            size = bounds->boundU - bounds->boundL + 1;
+            size = bound->Te.Bound.boundU - bound->Te.Bound.boundL + 1;
             vg_assert(size >= 1);
-            muw = ML_(sizeOfType)( ty->Ty.Array.typeR );
+            muw = ML_(sizeOfType)( tyents, ty->Te.TyArray.typeR );
             if (muw.b != True)
                goto done; /* size of element type not known */
             eszB = muw.w;
@@ -543,27 +826,31 @@
             VG_(addBytesToXA)( xa, "[", 1 );
             copy_UWord_into_XA( xa, ix );
             VG_(addBytesToXA)( xa, "]", 1 );
-            ty = ty->Ty.Array.typeR;
+            ty = elemTy;
             offset -= ix * eszB;
             /* keep going; look inside the array element. */
             break;
          }
 
-         case Ty_Qual: {
-            if (!ty->Ty.Qual.typeR) goto done;
-            ty = ty->Ty.Qual.typeR;
+         case Te_TyQual: {
+            ty = ML_(TyEnts__index_by_cuOff)(tyents, NULL,
+                                             ty->Te.TyQual.typeR);
+            tl_assert(ty);
+            if (ty->tag == Te_UNKNOWN) goto done;
             break;
          }
 
-         case Ty_TyDef: {
-            if (!ty->Ty.TyDef.typeR) goto done;
-            ty = ty->Ty.TyDef.typeR;
+         case Te_TyTyDef: {
+            ty = ML_(TyEnts__index_by_cuOff)(tyents, NULL,
+                                             ty->Te.TyTyDef.typeR);
+            tl_assert(ty);
+            if (ty->tag == Te_UNKNOWN) goto done;
             break;
          }
 
          default: {
             VG_(printf)("ML_(describe_type): unhandled: ");
-            ML_(pp_Type)(ty);
+            ML_(pp_TyEnt)(ty);
             VG_(printf)("\n");
             vg_assert(0);
          }
diff --git a/coregrind/m_demangle/cp-demangle.c b/coregrind/m_demangle/cp-demangle.c
index f9ed189..6d3e1d5 100644
--- a/coregrind/m_demangle/cp-demangle.c
+++ b/coregrind/m_demangle/cp-demangle.c
@@ -51,9 +51,9 @@
 #ifndef STANDALONE
 #define size_t       Int
 
-#define malloc(s)    VG_(arena_malloc) (VG_AR_DEMANGLE, s)
-#define free(p)      VG_(arena_free)   (VG_AR_DEMANGLE, p)
-#define realloc(p,s) VG_(arena_realloc)(VG_AR_DEMANGLE, p, s)
+#define malloc(_cc,s)    VG_(arena_malloc) (VG_AR_DEMANGLE, _cc, s)
+#define free(p)          VG_(arena_free)   (VG_AR_DEMANGLE, p)
+#define realloc(_cc,p,s) VG_(arena_realloc)(VG_AR_DEMANGLE, _cc, p, s)
 #endif
 
 /* If CP_DEMANGLE_DEBUG is defined, a trace of the grammar evaluation,
@@ -423,7 +423,8 @@
 string_list_new (length)
      int length;
 {
-  string_list_t s = (string_list_t) malloc (sizeof (struct string_list_def));
+  string_list_t s = (string_list_t) malloc ("demangle.sln.1",
+                                            sizeof (struct string_list_def));
   if (s == NULL)
     return NULL;
   s->caret_position = 0;
@@ -594,7 +595,7 @@
 	sizeof (struct substitution_def) * dm->substitutions_allocated;
 
       dm->substitutions = (struct substitution_def *)
-	realloc (dm->substitutions, new_array_size);
+	realloc ("demangle.sa.1", dm->substitutions, new_array_size);
       if (dm->substitutions == NULL)
 	/* Realloc failed.  */
 	{
@@ -672,7 +673,8 @@
 template_arg_list_new ()
 {
   template_arg_list_t new_list =
-    (template_arg_list_t) malloc (sizeof (struct template_arg_list_def));
+    (template_arg_list_t) malloc ("demangle.talt.1",
+                                  sizeof (struct template_arg_list_def));
   if (new_list == NULL)
     return NULL;
   /* Initialize the new list to have no arguments.  */
@@ -820,7 +822,8 @@
      int style;
 {
   demangling_t dm;
-  dm = (demangling_t) malloc (sizeof (struct demangling_def));
+  dm = (demangling_t) malloc ("demangle.dn.1",
+                              sizeof (struct demangling_def));
   if (dm == NULL)
     return NULL;
 
@@ -834,7 +837,8 @@
   if (dm->last_source_name == NULL)
     return NULL;
   dm->substitutions = (struct substitution_def *)
-    malloc (dm->substitutions_allocated * sizeof (struct substitution_def));
+    malloc ("demangle.dn.2",
+            dm->substitutions_allocated * sizeof (struct substitution_def));
   if (dm->substitutions == NULL)
     {
       dyn_string_delete (dm->last_source_name);
diff --git a/coregrind/m_demangle/cplus-dem.c b/coregrind/m_demangle/cplus-dem.c
index 2644cd7..06c03b4 100644
--- a/coregrind/m_demangle/cplus-dem.c
+++ b/coregrind/m_demangle/cplus-dem.c
@@ -76,10 +76,10 @@
 #ifndef STANDALONE
 #define size_t  Int
 
-#define xstrdup(ptr)        VG_(arena_strdup) (VG_AR_DEMANGLE, ptr)
-#define free(ptr)           VG_(arena_free)   (VG_AR_DEMANGLE, ptr)
-#define xmalloc(size)       VG_(arena_malloc) (VG_AR_DEMANGLE, size)
-#define xrealloc(ptr, size) VG_(arena_realloc)(VG_AR_DEMANGLE, ptr, size)
+#define xstrdup(_cc,ptr)        VG_(arena_strdup) (VG_AR_DEMANGLE, _cc, ptr)
+#define free(ptr)               VG_(arena_free)   (VG_AR_DEMANGLE, ptr)
+#define xmalloc(_cc,size)       VG_(arena_malloc) (VG_AR_DEMANGLE, _cc, size)
+#define xrealloc(_cc,ptr, size) VG_(arena_realloc)(VG_AR_DEMANGLE, _cc, ptr, size)
 
 #define abort() vg_assert(0)
 #undef strstr
@@ -948,7 +948,7 @@
   struct work_stuff work[1];
 
   if (current_demangling_style == no_demangling)
-    return xstrdup (mangled);
+    return xstrdup ("demangle.cd.1", mangled);
 
   memset ((char *) work, 0, sizeof (work));
   work->options = options;
@@ -995,7 +995,7 @@
       *size *= 2;
       if (*size < min_size)
 	*size = min_size;
-      *old_vect = xrealloc (*old_vect, *size * element_size);
+      *old_vect = xrealloc ("demangle.gv.1", *old_vect, *size * element_size);
     }
 }
 
@@ -1219,55 +1219,60 @@
   /* Deep-copy dynamic storage.  */
   if (from->typevec_size)
     to->typevec
-      = (char **) xmalloc (from->typevec_size * sizeof (to->typevec[0]));
+      = (char **) xmalloc ("demangle.wsctf.1",
+                           from->typevec_size * sizeof (to->typevec[0]));
 
   for (i = 0; i < from->ntypes; i++)
     {
       int len = strlen (from->typevec[i]) + 1;
 
-      to->typevec[i] = xmalloc (len);
+      to->typevec[i] = xmalloc ("demangle.wsctf.2", len);
       memcpy (to->typevec[i], from->typevec[i], len);
     }
 
   if (from->ksize)
     to->ktypevec
-      = (char **) xmalloc (from->ksize * sizeof (to->ktypevec[0]));
+      = (char **) xmalloc ("demangle.wsctf.3",
+                           from->ksize * sizeof (to->ktypevec[0]));
 
   for (i = 0; i < from->numk; i++)
     {
       int len = strlen (from->ktypevec[i]) + 1;
 
-      to->ktypevec[i] = xmalloc (len);
+      to->ktypevec[i] = xmalloc ("demangle.wsctf.4", len);
       memcpy (to->ktypevec[i], from->ktypevec[i], len);
     }
 
   if (from->bsize)
     to->btypevec
-      = (char **) xmalloc (from->bsize * sizeof (to->btypevec[0]));
+      = (char **) xmalloc ("demangle.wsctf.5",
+                           from->bsize * sizeof (to->btypevec[0]));
 
   for (i = 0; i < from->numb; i++)
     {
       int len = strlen (from->btypevec[i]) + 1;
 
-      to->btypevec[i] = xmalloc (len);
+      to->btypevec[i] = xmalloc ("demangle.wsctf.6", len);
       memcpy (to->btypevec[i], from->btypevec[i], len);
     }
 
   if (from->ntmpl_args)
     to->tmpl_argvec
-      = xmalloc (from->ntmpl_args * sizeof (to->tmpl_argvec[0]));
+      = xmalloc ("demangle.wsctf.7",
+                 from->ntmpl_args * sizeof (to->tmpl_argvec[0]));
 
   for (i = 0; i < from->ntmpl_args; i++)
     {
       int len = strlen (from->tmpl_argvec[i]) + 1;
 
-      to->tmpl_argvec[i] = xmalloc (len);
+      to->tmpl_argvec[i] = xmalloc ("demangle.wsctf.8", len);
       memcpy (to->tmpl_argvec[i], from->tmpl_argvec[i], len);
     }
 
   if (from->previous_argument)
     {
-      to->previous_argument = (string*) xmalloc (sizeof (string));
+      to->previous_argument = (string*) xmalloc ("demangle.wsctf.9",
+                                                 sizeof (string));
       string_init (to->previous_argument);
       string_appends (to->previous_argument, from->previous_argument);
     }
@@ -2018,7 +2023,7 @@
 	    string_appendn (s, "0", 1);
 	  else
 	    {
-	      char *p = xmalloc (symbol_len + 1), *q;
+	      char *p = xmalloc ("demangle.dtvp.1", symbol_len + 1), *q;
 	      strncpy (p, *mangled, symbol_len);
 	      p [symbol_len] = '\0';
 	      /* We use cplus_demangle here, rather than
@@ -2133,7 +2138,8 @@
   if (!is_type)
     {
       /* Create an array for saving the template argument values. */
-      work->tmpl_argvec = (char**) xmalloc (r * sizeof (char *));
+      work->tmpl_argvec = (char**) xmalloc ("demangle.dt.1",
+                                            r * sizeof (char *));
       work->ntmpl_args = r;
       for (i = 0; i < r; i++)
 	work->tmpl_argvec[i] = 0;
@@ -2158,7 +2164,7 @@
 		{
 		  /* Save the template argument. */
 		  int len = temp.p - temp.b;
-		  work->tmpl_argvec[i] = xmalloc (len + 1);
+		  work->tmpl_argvec[i] = xmalloc ("demangle.dt.2", len + 1);
 		  memcpy (work->tmpl_argvec[i], temp.b, len);
 		  work->tmpl_argvec[i][len] = '\0';
 		}
@@ -2186,7 +2192,7 @@
 		{
 		  /* Save the template argument. */
 		  int len = r2;
-		  work->tmpl_argvec[i] = xmalloc (len + 1);
+		  work->tmpl_argvec[i] = xmalloc ("demangle.dt.3", len + 1);
 		  memcpy (work->tmpl_argvec[i], *mangled, len);
 		  work->tmpl_argvec[i][len] = '\0';
 		}
@@ -2232,7 +2238,7 @@
 	  if (!is_type)
 	    {
 	      int len = s->p - s->b;
-	      work->tmpl_argvec[i] = xmalloc (len + 1);
+	      work->tmpl_argvec[i] = xmalloc ("demangle.dt.4", len + 1);
 	      memcpy (work->tmpl_argvec[i], s->b, len);
 	      work->tmpl_argvec[i][len] = '\0';
 
@@ -3131,7 +3137,7 @@
   char * recurse = (char *)NULL;
   char * recurse_dem = (char *)NULL;
 
-  recurse = (char *) xmalloc (namelength + 1);
+  recurse = (char *) xmalloc ("demangle.rd.1", namelength + 1);
   memcpy (recurse, *mangled, namelength);
   recurse[namelength] = '\000';
 
@@ -4130,7 +4136,7 @@
   string_append (result, "&");
 
   /* Now recursively demangle the literal name */
-  recurse = (char *) xmalloc (literal_len + 1);
+  recurse = (char *) xmalloc ("demangle.dhtl.1", literal_len + 1);
   memcpy (recurse, *mangled, literal_len);
   recurse[literal_len] = '\000';
 
@@ -4240,7 +4246,8 @@
     string_clear (work->previous_argument);
   else
     {
-      work->previous_argument = (string*) xmalloc (sizeof (string));
+      work->previous_argument = (string*) xmalloc ("demangle.da.1",
+                                                   sizeof (string));
       string_init (work->previous_argument);
     }
 
@@ -4275,17 +4282,18 @@
 	{
 	  work -> typevec_size = 3;
 	  work -> typevec
-	    = (char **) xmalloc (sizeof (char *) * work -> typevec_size);
+	    = (char **) xmalloc ("demangle.rt.1",
+                                 sizeof (char *) * work -> typevec_size);
 	}
       else
 	{
 	  work -> typevec_size *= 2;
 	  work -> typevec
-	    = (char **) xrealloc ((char *)work -> typevec,
+	    = (char **) xrealloc ("demangle.rt.2", (char *)work -> typevec,
 				  sizeof (char *) * work -> typevec_size);
 	}
     }
-  tem = xmalloc (len + 1);
+  tem = xmalloc ("demangle.rt.3", len + 1);
   memcpy (tem, start, len);
   tem[len] = '\0';
   work -> typevec[work -> ntypes++] = tem;
@@ -4307,17 +4315,18 @@
 	{
 	  work -> ksize = 5;
 	  work -> ktypevec
-	    = (char **) xmalloc (sizeof (char *) * work -> ksize);
+	    = (char **) xmalloc ("demangle.rK.1",
+                                 sizeof (char *) * work -> ksize);
 	}
       else
 	{
 	  work -> ksize *= 2;
 	  work -> ktypevec
-	    = (char **) xrealloc ((char *)work -> ktypevec,
+	    = (char **) xrealloc ("demangle.rK.2", (char *)work -> ktypevec,
 				  sizeof (char *) * work -> ksize);
 	}
     }
-  tem = xmalloc (len + 1);
+  tem = xmalloc ("demangle.rK.3", len + 1);
   memcpy (tem, start, len);
   tem[len] = '\0';
   work -> ktypevec[work -> numk++] = tem;
@@ -4339,13 +4348,14 @@
 	{
 	  work -> bsize = 5;
 	  work -> btypevec
-	    = (char **) xmalloc (sizeof (char *) * work -> bsize);
+	    = (char **) xmalloc ("demangle.rB.1", 
+                                 sizeof (char *) * work -> bsize);
 	}
       else
 	{
 	  work -> bsize *= 2;
 	  work -> btypevec
-	    = (char **) xrealloc ((char *)work -> btypevec,
+	    = (char **) xrealloc ("demangle.rB.2", (char *)work -> btypevec,
 				  sizeof (char *) * work -> bsize);
 	}
     }
@@ -4364,7 +4374,7 @@
 {
   char *tem;
 
-  tem = xmalloc (len + 1);
+  tem = xmalloc ("demangle.remember_Btype.1", len + 1);
   memcpy (tem, start, len);
   tem[len] = '\0';
   work -> btypevec[ind] = tem;
@@ -4815,7 +4825,7 @@
 	{
 	  n = 32;
 	}
-      s->p = s->b = xmalloc (n);
+      s->p = s->b = xmalloc ("demangle.sn.1", n);
       s->e = s->b + n;
     }
   else if (s->e - s->p < n)
@@ -4823,7 +4833,7 @@
       tem = s->p - s->b;
       n += tem;
       n *= 2;
-      s->b = xrealloc (s->b, n);
+      s->b = xrealloc ("demangle.sn.2", s->b, n);
       s->p = s->b + tem;
       s->e = s->b + n;
     }
diff --git a/coregrind/m_demangle/dyn-string.c b/coregrind/m_demangle/dyn-string.c
index 1c0466b..1dcbba8 100644
--- a/coregrind/m_demangle/dyn-string.c
+++ b/coregrind/m_demangle/dyn-string.c
@@ -39,9 +39,9 @@
 #include "dyn-string.h"
 
 #ifndef STANDALONE
-#define malloc(s)    VG_(arena_malloc) (VG_AR_DEMANGLE, s)
-#define free(p)      VG_(arena_free)   (VG_AR_DEMANGLE, p)
-#define realloc(p,s) VG_(arena_realloc)(VG_AR_DEMANGLE, p, s)
+#define malloc(_cc,s)    VG_(arena_malloc) (VG_AR_DEMANGLE, _cc, s)
+#define free(p)          VG_(arena_free)   (VG_AR_DEMANGLE, p)
+#define realloc(_cc,p,s) VG_(arena_realloc)(VG_AR_DEMANGLE, _cc, p, s)
 #endif
 
 /* If this file is being compiled for inclusion in the C++ runtime
@@ -77,7 +77,7 @@
   if (ds_struct_ptr->s == NULL)
     return 0;
 #else
-  ds_struct_ptr->s = (char *) malloc (space);
+  ds_struct_ptr->s = (char *) malloc ("demangle.dsi.1", space);
 #endif
   ds_struct_ptr->allocated = space;
   ds_struct_ptr->length = 0;
@@ -98,7 +98,7 @@
 {
   dyn_string_t result;
 #ifdef RETURN_ON_ALLOCATION_FAILURE
-  result = (dyn_string_t) malloc (sizeof (struct dyn_string));
+  result = (dyn_string_t) malloc ("demangle.dsn.1", sizeof (struct dyn_string));
   if (result == NULL)
     return NULL;
   if (!dyn_string_init (result, space))
@@ -107,7 +107,7 @@
       return NULL;
     }
 #else
-  result = (dyn_string_t) malloc (sizeof (struct dyn_string));
+  result = (dyn_string_t) malloc ("demangle.dsn.2", sizeof (struct dyn_string));
   dyn_string_init (result, space);
 #endif
   return result;
@@ -167,14 +167,14 @@
       ds->allocated = new_allocated;
       /* We actually need more space.  */
 #ifdef RETURN_ON_ALLOCATION_FAILURE
-      ds->s = (char *) realloc (ds->s, ds->allocated);
+      ds->s = (char *) realloc ("demangle.dsr.1", ds->s, ds->allocated);
       if (ds->s == NULL)
 	{
 	  free (ds);
 	  return NULL;
 	}
 #else
-      ds->s = (char *) realloc (ds->s, ds->allocated);
+      ds->s = (char *) realloc ("demangle.dsr.2", ds->s, ds->allocated);
 #endif
     }
 
diff --git a/coregrind/m_errormgr.c b/coregrind/m_errormgr.c
index 475e008..0e0f31d 100644
--- a/coregrind/m_errormgr.c
+++ b/coregrind/m_errormgr.c
@@ -600,7 +600,7 @@
    */
 
    /* copy main part */
-   p = VG_(arena_malloc)(VG_AR_ERRORS, sizeof(Error));
+   p = VG_(arena_malloc)(VG_AR_ERRORS, "errormgr.mre.1", sizeof(Error));
    *p = err;
 
    /* update 'extra' */
@@ -618,7 +618,7 @@
 
    /* copy block pointed to by 'extra', if there is one */
    if (NULL != p->extra && 0 != extra_size) { 
-      void* new_extra = VG_(malloc)(extra_size);
+      void* new_extra = VG_(malloc)("errormgr.mre.2", extra_size);
       VG_(memcpy)(new_extra, p->extra, extra_size);
       p->extra = new_extra;
    }
@@ -979,7 +979,8 @@
    while (True) {
       /* Assign and initialise the two suppression halves (core and tool) */
       Supp* supp;
-      supp        = VG_(arena_malloc)(VG_AR_CORE, sizeof(Supp));
+      supp        = VG_(arena_malloc)(VG_AR_CORE, "errormgr.losf.1",
+                                      sizeof(Supp));
       supp->count = 0;
 
       // Initialise temporary reading-in buffer.
@@ -999,7 +1000,7 @@
 
       if (eof || VG_STREQ(buf, "}")) BOMB("unexpected '}'");
 
-      supp->sname = VG_(arena_strdup)(VG_AR_CORE, buf);
+      supp->sname = VG_(arena_strdup)(VG_AR_CORE, "errormgr.losf.2", buf);
 
       eof = VG_(get_line) ( fd, buf, N_BUF );
 
@@ -1069,7 +1070,8 @@
             BOMB("too many callers in stack trace");
          if (i > 0 && i >= VG_(clo_backtrace_size)) 
             break;
-         tmp_callers[i].name = VG_(arena_strdup)(VG_AR_CORE, buf);
+         tmp_callers[i].name = VG_(arena_strdup)(VG_AR_CORE,
+                                                 "errormgr.losf.3", buf);
          if (!setLocationTy(&(tmp_callers[i])))
             BOMB("location should start with 'fun:' or 'obj:'");
          i++;
@@ -1085,7 +1087,8 @@
 
       // Copy tmp_callers[] into supp->callers[]
       supp->n_callers = i;
-      supp->callers = VG_(arena_malloc)(VG_AR_CORE, i*sizeof(SuppLoc));
+      supp->callers = VG_(arena_malloc)(VG_AR_CORE, "errormgr.losf.4",
+                                        i*sizeof(SuppLoc));
       for (i = 0; i < supp->n_callers; i++) {
          supp->callers[i] = tmp_callers[i];
       }
diff --git a/coregrind/m_execontext.c b/coregrind/m_execontext.c
index dde94ff..08de8f0 100644
--- a/coregrind/m_execontext.c
+++ b/coregrind/m_execontext.c
@@ -135,7 +135,7 @@
 
    ec_htab_size_idx = 0;
    ec_htab_size = ec_primes[ec_htab_size_idx];
-   ec_htab = VG_(arena_malloc)(VG_AR_EXECTXT, 
+   ec_htab = VG_(arena_malloc)(VG_AR_EXECTXT, "execontext.iEs1",
                                sizeof(ExeContext*) * ec_htab_size);
    for (i = 0; i < ec_htab_size; i++)
       ec_htab[i] = NULL;
@@ -260,7 +260,7 @@
       return; /* out of primes - can't resize further */
 
    new_size = ec_primes[ec_htab_size_idx + 1];
-   new_ec_htab = VG_(arena_malloc)(VG_AR_EXECTXT,
+   new_ec_htab = VG_(arena_malloc)(VG_AR_EXECTXT, "execontext.reh1",
                                    sizeof(ExeContext*) * new_size);
 
    VG_(debugLog)(
@@ -395,7 +395,7 @@
    /* Bummer.  We have to allocate a new context record. */
    ec_totstored++;
 
-   new_ec = VG_(arena_malloc)( VG_AR_EXECTXT, 
+   new_ec = VG_(arena_malloc)( VG_AR_EXECTXT, "execontext.rEw2.2",
                                sizeof(struct _ExeContext) 
                                + n_ips * sizeof(Addr) );
 
diff --git a/coregrind/m_hashtable.c b/coregrind/m_hashtable.c
index e28a4be..dd2020b 100644
--- a/coregrind/m_hashtable.c
+++ b/coregrind/m_hashtable.c
@@ -69,8 +69,9 @@
    /* Initialises to zero, ie. all entries NULL */
    SizeT       n_chains = primes[0];
    SizeT       sz       = n_chains * sizeof(VgHashNode*);
-   VgHashTable table    = VG_(calloc)(1, sizeof(struct _VgHashTable));
-   table->chains        = VG_(calloc)(1, sz);
+   VgHashTable table    = VG_(calloc)("hashtable.Hc.1",
+                                      1, sizeof(struct _VgHashTable));
+   table->chains        = VG_(calloc)("hashtable.Hc.2", 1, sz);
    table->n_chains      = n_chains;
    table->n_elements    = 0;
    table->iterOK        = True;
@@ -119,7 +120,7 @@
 
    table->n_chains = new_chains;
    sz = new_chains * sizeof(VgHashNode*);
-   chains = VG_(calloc)(1, sz);
+   chains = VG_(calloc)("hashtable.resize.1", 1, sz);
 
    for (i = 0; i < old_chains; i++) {
       node = table->chains[i];
@@ -209,7 +210,7 @@
    if (*n_elems == 0)
       return NULL;
 
-   arr = VG_(malloc)( *n_elems * sizeof(VgHashNode*) );
+   arr = VG_(malloc)( "hashtable.Hta.1", *n_elems * sizeof(VgHashNode*) );
 
    j = 0;
    for (i = 0; i < table->n_chains; i++) {
diff --git a/coregrind/m_initimg/initimg-aix5.c b/coregrind/m_initimg/initimg-aix5.c
index 0a7b6b3..0d62e62 100644
--- a/coregrind/m_initimg/initimg-aix5.c
+++ b/coregrind/m_initimg/initimg-aix5.c
@@ -115,7 +115,7 @@
                 + VG_(strlen)( _so )
                 + 1 /*NUL*/;
    vg_assert(pltool_len > 0);
-   pltool_str = VG_(malloc)( pltool_len );
+   pltool_str = VG_(malloc)( "initimg-aix5.ici.1", pltool_len );
    pltool_str[0] = 0;
    VG_(strcat)( pltool_str, VG_(libdir) );
    VG_(strcat)( pltool_str, "/" );
@@ -134,7 +134,7 @@
                 + VG_(strlen)( vgpreload_core_so )
                 + 1 /*NUL*/;
    vg_assert(plcore_len > 0);
-   plcore_str = VG_(malloc)( plcore_len );
+   plcore_str = VG_(malloc)( "initimg-aix5.ici.2", plcore_len );
    plcore_str[0] = 0;
    VG_(strcat)( plcore_str, VG_(libdir) );
    VG_(strcat)( plcore_str, "/" );
@@ -151,7 +151,7 @@
    if (ld_pre_str && VG_(strlen)(ld_pre_str) > 0) {
       have_ld_pre = True;
       ld_pre_len  = VG_(strlen)(ld_pre_str) + 1/*NUL*/;
-      ld_pre_str = VG_(malloc)( ld_pre_len );
+      ld_pre_str = VG_(malloc)( "initimg-aix5.ici.3", ld_pre_len );
       ld_pre_str[0] = 0;
       VG_(strcat)( ld_pre_str, VG_(getenv)("LD_PRELOAD") );
       vg_assert( ld_pre_str[ld_pre_len-1] == 0);
diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c
index a97bea9..d1cb5f3 100644
--- a/coregrind/m_initimg/initimg-linux.c
+++ b/coregrind/m_initimg/initimg-linux.c
@@ -178,9 +178,6 @@
    }
 
    VG_(memset)(info, 0, sizeof(*info));
-   info->exe_base = VG_(client_base);
-   info->exe_end  = VG_(client_end);
-
    ret = VG_(do_exec)(exe_name, info);
 
    // The client was successfully loaded!  Continue.
@@ -241,12 +238,13 @@
    Int preload_tool_path_len = vglib_len + VG_(strlen)(toolname) 
                                          + sizeof(VG_PLATFORM) + 16;
    Int preload_string_len    = preload_core_path_len + preload_tool_path_len;
-   HChar* preload_string     = VG_(malloc)(preload_string_len);
+   HChar* preload_string     = VG_(malloc)("initimg-linux.sce.1",
+                                           preload_string_len);
    vg_assert(preload_string);
 
    /* Determine if there's a vgpreload_<tool>.so file, and setup
       preload_string. */
-   preload_tool_path = VG_(malloc)(preload_tool_path_len);
+   preload_tool_path = VG_(malloc)("initimg-linux.sce.2", preload_tool_path_len);
    vg_assert(preload_tool_path);
    VG_(snprintf)(preload_tool_path, preload_tool_path_len,
                  "%s/%s/vgpreload_%s.so", VG_(libdir), VG_PLATFORM, toolname);
@@ -268,7 +266,8 @@
       envc++;
 
    /* Allocate a new space */
-   ret = VG_(malloc) (sizeof(HChar *) * (envc+1+1)); /* 1 new entry + NULL */
+   ret = VG_(malloc) ("initimg-linux.sce.3",
+                      sizeof(HChar *) * (envc+1+1)); /* 1 new entry + NULL */
    vg_assert(ret);
 
    /* copy it over */
@@ -282,7 +281,7 @@
    for (cpp = ret; cpp && *cpp; cpp++) {
       if (VG_(memcmp)(*cpp, ld_preload, ld_preload_len) == 0) {
          Int len = VG_(strlen)(*cpp) + preload_string_len;
-         HChar *cp = VG_(malloc)(len);
+         HChar *cp = VG_(malloc)("initimg-linux.sce.4", len);
          vg_assert(cp);
 
          VG_(snprintf)(cp, len, "%s%s:%s",
@@ -297,7 +296,7 @@
    /* Add the missing bits */
    if (!ld_preload_done) {
       Int len = ld_preload_len + preload_string_len;
-      HChar *cp = VG_(malloc) (len);
+      HChar *cp = VG_(malloc) ("initimg-linux.sce.5", len);
       vg_assert(cp);
 
       VG_(snprintf)(cp, len, "%s%s", ld_preload, preload_string);
diff --git a/coregrind/m_libcfile.c b/coregrind/m_libcfile.c
index 05b0cf9..16d1d63 100644
--- a/coregrind/m_libcfile.c
+++ b/coregrind/m_libcfile.c
@@ -186,14 +186,23 @@
      return res;
    }
 #  elif defined(VGO_aix5)
-   res = VG_(do_syscall4)(__NR_AIX5_statx,
-                          (UWord)file_name,
-                          (UWord)buf,
-                          sizeof(struct vki_stat),
-                          VKI_STX_NORMAL);
-   if (!res.isError)
-      TRANSLATE_TO_vg_stat(vgbuf, &buf);
-   return res;
+   { struct vki_stat buf;
+     res = VG_(do_syscall4)(__NR_AIX5_statx,
+                            (UWord)file_name,
+                            (UWord)&buf,
+                            sizeof(struct vki_stat),
+                            VKI_STX_NORMAL);
+     if (!res.isError) {
+        VG_(memset)(vgbuf, 0, sizeof(*vgbuf));
+        vgbuf->st_dev  = (ULong)buf.st_dev;
+        vgbuf->st_ino  = (ULong)buf.st_ino;
+        vgbuf->st_mode = (UInt)buf.st_mode;
+        vgbuf->st_uid  = (UInt)buf.st_uid;
+        vgbuf->st_gid  = (UInt)buf.st_gid;
+        vgbuf->st_size = (Long)buf.st_size;
+     }
+     return res;
+   }
 #  else
 #    error Unknown OS
 #  endif
diff --git a/coregrind/m_libcproc.c b/coregrind/m_libcproc.c
index 9ab786f..4ea181a 100644
--- a/coregrind/m_libcproc.c
+++ b/coregrind/m_libcproc.c
@@ -89,7 +89,8 @@
    Char **env = (*envp);
    Char **cpp;
    Int len = VG_(strlen)(varname);
-   Char *valstr = VG_(arena_malloc)(VG_AR_CORE, len + VG_(strlen)(val) + 2);
+   Char *valstr = VG_(arena_malloc)(VG_AR_CORE, "libcproc.es.1",
+                                    len + VG_(strlen)(val) + 2);
    Char **oldenv = NULL;
 
    VG_(sprintf)(valstr, "%s=%s", varname, val);
@@ -102,7 +103,7 @@
    }
 
    if (env == NULL) {
-      env = VG_(arena_malloc)(VG_AR_CORE, sizeof(Char **) * 2);
+      env = VG_(arena_malloc)(VG_AR_CORE, "libcproc.es.2", sizeof(Char **) * 2);
       env[0] = valstr;
       env[1] = NULL;
 
@@ -110,7 +111,8 @@
 
    }  else {
       Int envlen = (cpp-env) + 2;
-      Char **newenv = VG_(arena_malloc)(VG_AR_CORE, envlen * sizeof(Char **));
+      Char **newenv = VG_(arena_malloc)(VG_AR_CORE, "libcproc.es.3",
+                                        envlen * sizeof(Char **));
 
       for (cpp = newenv; *env; )
 	 *cpp++ = *env++;
@@ -203,7 +205,8 @@
          ld_library_path_str = &envp[i][16];
    }
 
-   buf = VG_(arena_malloc)(VG_AR_CORE, VG_(strlen)(VG_(libdir)) + 20);
+   buf = VG_(arena_malloc)(VG_AR_CORE, "libcproc.erves.1",
+                           VG_(strlen)(VG_(libdir)) + 20);
 
    // Remove Valgrind-specific entries from LD_*.
    VG_(sprintf)(buf, "%s*/vgpreload_*.so", VG_(libdir));
@@ -253,7 +256,8 @@
 
    envlen = oldenvp - oldenv + 1;
    
-   newenv = VG_(arena_malloc)(VG_AR_CORE, envlen * sizeof(Char **));
+   newenv = VG_(arena_malloc)(VG_AR_CORE, "libcproc.ec.1",
+                              envlen * sizeof(Char **));
 
    oldenvp = oldenv;
    newenvp = newenv;
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
index 480a28d..894aaef 100644
--- a/coregrind/m_machine.c
+++ b/coregrind/m_machine.c
@@ -78,6 +78,30 @@
    INSTR_PTR( VG_(threads)[tid].arch ) = ip;
 }
 
+void VG_(set_syscall_return_shadows) ( ThreadId tid,
+                                       /* shadow vals for the result */
+                                       UWord s1res, UWord s2res,
+                                       /* shadow vals for the error val */
+                                       UWord s1err, UWord s2err )
+{
+#  if defined(VGP_x86_linux)
+   VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res;
+   VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res;
+#  elif defined(VGP_amd64_linux)
+   VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res;
+   VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res;
+#  elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
+   VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
+   VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
+#  elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
+   VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
+   VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
+   VG_(threads)[tid].arch.vex_shadow1.guest_GPR4 = s1err;
+   VG_(threads)[tid].arch.vex_shadow2.guest_GPR4 = s2err;
+#  else
+#    error "Unknown plat"
+#  endif
+}
 
 void
 VG_(get_shadow_regs_area) ( ThreadId tid, 
@@ -86,16 +110,20 @@
 {
    void*        src;
    ThreadState* tst;
-   vg_assert(shadowNo == 1 || shadowNo == 2);
+   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
    vg_assert(VG_(is_valid_tid)(tid));
    // Bounds check
    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    vg_assert(offset + size <= sizeof(VexGuestArchState));
    // Copy
    tst = & VG_(threads)[tid];
-   src = shadowNo == 1
-            ? (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset)
-            : (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset);
+   src = NULL;
+   switch (shadowNo) {
+      case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
+      case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
+      case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
+   }
+   tl_assert(src != NULL);
    VG_(memcpy)( dst, src, size);
 }
 
@@ -106,16 +134,20 @@
 {
    void*        dst;
    ThreadState* tst;
-   vg_assert(shadowNo == 1 || shadowNo == 2);
+   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
    vg_assert(VG_(is_valid_tid)(tid));
    // Bounds check
    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
    vg_assert(offset + size <= sizeof(VexGuestArchState));
    // Copy
    tst = & VG_(threads)[tid];
-   dst = shadowNo == 1
-            ? (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset)
-            : (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset);
+   dst = NULL;
+   switch (shadowNo) {
+      case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
+      case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
+      case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
+   }
+   tl_assert(dst != NULL);
    VG_(memcpy)( dst, src, size);
 }
 
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
index a4bb850..a3c1a5c 100644
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -170,6 +170,7 @@
 "    --debug-dump=frames       mimic /usr/bin/readelf --debug-dump=frames\n"
 "    --trace-redir=no|yes      show redirection details? [no]\n"
 "    --trace-sched=no|yes      show thread scheduler details? [no]\n"
+"    --profile-heap=no|yes     profile Valgrind's own space use\n"
 "    --wait-for-gdb=yes|no     pause on startup to wait for gdb attach\n"
 "    --sym-offsets=yes|no      show syms in form 'name+offset' ? [no]\n"
 "    --read-var-info=yes|no    read variable type & location info? [no]\n"
@@ -364,7 +365,7 @@
             //   wouldn't disappear on them.)
             if (0)
                VG_(printf)("tool-specific arg: %s\n", arg);
-            arg = VG_(strdup)(arg + toolname_len + 1);
+            arg = VG_(strdup)("main.mpclo.1", arg + toolname_len + 1);
             arg[0] = '-';
             arg[1] = '-';
 
@@ -419,7 +420,7 @@
       else VG_BOOL_CLO(arg, "--trace-redir",      VG_(clo_trace_redir))
 
       else VG_BOOL_CLO(arg, "--trace-syscalls",   VG_(clo_trace_syscalls))
-      else VG_BOOL_CLO(arg, "--trace-pthreads",   VG_(clo_trace_pthreads))
+      else VG_BOOL_CLO(arg, "--profile-heap",     VG_(clo_profile_heap))
       else VG_BOOL_CLO(arg, "--wait-for-gdb",     VG_(clo_wait_for_gdb))
       else VG_STR_CLO (arg, "--db-command",       VG_(clo_db_command))
       else VG_STR_CLO (arg, "--sim-hints",        VG_(clo_sim_hints))
@@ -595,6 +596,8 @@
       VG_(clo_track_fds) = False;
       /* Disable timestamped output */
       VG_(clo_time_stamp) = False;
+      /* Disable heap profiling, since that prints lots of stuff. */
+      VG_(clo_profile_heap) = False;
       /* Also, we want to set options for the leak checker, but that
          will have to be done in Memcheck's flag-handling code, not
          here. */
@@ -710,7 +713,7 @@
          the default one. */
       static const Char default_supp[] = "default.supp";
       Int len = VG_(strlen)(VG_(libdir)) + 1 + sizeof(default_supp);
-      Char *buf = VG_(arena_malloc)(VG_AR_CORE, len);
+      Char *buf = VG_(arena_malloc)(VG_AR_CORE, "main.mpclo.2", len);
       VG_(sprintf)(buf, "%s/%s", VG_(libdir), default_supp);
       VG_(clo_suppressions)[VG_(clo_n_suppressions)] = buf;
       VG_(clo_n_suppressions)++;
@@ -1125,6 +1128,11 @@
 static IIFinaliseImageInfo the_iifii;
 
 
+/* A simple pair structure, used for conveying debuginfo handles to
+   calls to VG_TRACK(new_mem_startup, ...). */
+typedef  struct { Addr a; ULong ull; }  Addr_n_ULong;
+
+
 /* --- Forwards decls to do with shutdown --- */
 
 static void final_tidyup(ThreadId tid); 
@@ -1151,7 +1159,7 @@
 
    n_starts = 1;
    while (True) {
-      starts = VG_(malloc)( n_starts * sizeof(Addr) );
+      starts = VG_(malloc)( "main.gss.1", n_starts * sizeof(Addr) );
       if (starts == NULL)
          break;
       r = VG_(am_get_segment_starts)( starts, n_starts );
@@ -1185,6 +1193,7 @@
    Int     loglevel, i;
    Bool    logging_to_fd;
    struct vki_rlimit zero = { 0, 0 };
+   XArray* addr2dihandle = NULL;
 
    //============================================================
    //
@@ -1330,7 +1339,7 @@
    //   free pair right now to check that nothing is broken.
    //--------------------------------------------------------------
    VG_(debugLog)(1, "main", "Starting the dynamic memory manager\n");
-   { void* p = VG_(malloc)( 12345 );
+   { void* p = VG_(malloc)( "main.vm.1", 12345 );
      if (p) VG_(free)( p );
    }
    VG_(debugLog)(1, "main", "Dynamic memory manager is running\n");
@@ -1711,11 +1720,29 @@
    //   p: setup_code_redirect_table [so that redirs can be recorded]
    //   p: mallocfree
    //   p: probably: setup fds and process CLOs, so that logging works
+   //
+   // While doing this, make a note of the debuginfo-handles that
+   // come back from VG_(di_notify_mmap)/VG_(di_aix5_notify_segchange).
+   // Later, in "Tell the tool about the initial client memory permissions"
+   // (just below) we can then hand these handles off to the tool in
+   // calls to VG_TRACK(new_mem_startup, ...).  This gives the tool the
+   // opportunity to make further queries to m_debuginfo before the
+   // client is started, if it wants.  We put this information into an
+   // XArray, each handle along with the associated segment start address,
+   // and search the XArray for the handles later, when calling
+   // VG_TRACK(new_mem_startup, ...).
    //--------------------------------------------------------------
    VG_(debugLog)(1, "main", "Load initial debug info\n");
+
+   tl_assert(!addr2dihandle);
+   addr2dihandle = VG_(newXA)( VG_(malloc), "main.vm.2",
+                               VG_(free), sizeof(Addr_n_ULong) );
+   tl_assert(addr2dihandle);
+
 #  if defined(VGO_linux)
    { Addr* seg_starts;
      Int   n_seg_starts;
+     Addr_n_ULong anu;
 
      seg_starts = get_seg_starts( &n_seg_starts );
      vg_assert(seg_starts && n_seg_starts >= 0);
@@ -1723,19 +1750,27 @@
      /* show them all to the debug info reader.  allow_SkFileV has to
         be True here so that we read info from the valgrind executable
         itself. */
-     for (i = 0; i < n_seg_starts; i++)
-        VG_(di_notify_mmap)( seg_starts[i], True/*allow_SkFileV*/ );
+     for (i = 0; i < n_seg_starts; i++) {
+        anu.ull = VG_(di_notify_mmap)( seg_starts[i], True/*allow_SkFileV*/ );
+        /* anu.ull holds the debuginfo handle returned by di_notify_mmap,
+           if any. */
+        if (anu.ull > 0) {
+           anu.a = seg_starts[i];
+           VG_(addToXA)( addr2dihandle, &anu );
+        }
+     }
 
      VG_(free)( seg_starts );
    }
 #  elif defined(VGO_aix5)
    { AixCodeSegChange* changes;
      Int changes_size, changes_used;
+     Addr_n_ULong anu;
 
      /* Find out how many AixCodeSegChange records we will need,
 	and acquire them. */
      changes_size = VG_(am_aix5_reread_procmap_howmany_directives)(); 
-     changes = VG_(malloc)(changes_size * sizeof(AixCodeSegChange));
+     changes = VG_(malloc)("main.vm.3", changes_size * sizeof(AixCodeSegChange));
      vg_assert(changes);
 
      /* Now re-read /proc/<pid>/map and acquire a change set */
@@ -1743,17 +1778,23 @@
      vg_assert(changes_used >= 0 && changes_used <= changes_size);
 
      /* And notify m_debuginfo of the changes. */
-     for (i = 0; i < changes_used; i++)
-        VG_(di_aix5_notify_segchange)(
-           changes[i].code_start,
-           changes[i].code_len,
-           changes[i].data_start,
-           changes[i].data_len,
-           changes[i].file_name,
-           changes[i].mem_name,
-           changes[i].is_mainexe,
-           changes[i].acquire
-        );
+     for (i = 0; i < changes_used; i++) {
+        anu.ull = VG_(di_aix5_notify_segchange)(
+                     changes[i].code_start,
+                     changes[i].code_len,
+                     changes[i].data_start,
+                     changes[i].data_len,
+                     changes[i].file_name,
+                     changes[i].mem_name,
+                     changes[i].is_mainexe,
+                     changes[i].acquire
+                  );
+        if (anu.ull > 0) {
+           tl_assert(changes[i].acquire);
+           anu.a = changes[i].code_start; /* is this correct? */
+           VG_(addToXA)( addr2dihandle, &anu );
+        }
+     }
 
      VG_(free)(changes);
    }
@@ -1797,11 +1838,18 @@
    //   p: mallocfree
    //   p: setup_client_stack
    //   p: setup_client_dataseg
+   //
+   // For each segment we tell the client about, look up in 
+   // addr2dihandle as created above, to see if there's a debuginfo
+   // handle associated with the segment, that we can hand along
+   // to the tool, to be helpful.
    //--------------------------------------------------------------
    VG_(debugLog)(1, "main", "Tell tool about initial permissions\n");
    { Addr*     seg_starts;
      Int       n_seg_starts;
 
+     tl_assert(addr2dihandle);
+
      /* Mark the main thread as running while we tell the tool about
         the client memory so that the tool can associate that memory
         with the main thread. */
@@ -1813,9 +1861,11 @@
 
      /* show interesting ones to the tool */
      for (i = 0; i < n_seg_starts; i++) {
+        Word j, n;
         NSegment const* seg 
            = VG_(am_find_nsegment)( seg_starts[i] );
         vg_assert(seg);
+        vg_assert(seg->start == seg_starts[i] );
         if (seg->kind == SkFileC || seg->kind == SkAnonC) {
            VG_(debugLog)(2, "main", 
                             "tell tool about %010lx-%010lx %c%c%c\n",
@@ -1823,12 +1873,28 @@
                              seg->hasR ? 'r' : '-',
                              seg->hasW ? 'w' : '-',
                              seg->hasX ? 'x' : '-' );
+           /* search addr2dihandle to see if we have an entry
+              matching seg->start. */
+           n = VG_(sizeXA)( addr2dihandle );
+           for (j = 0; j < n; j++) {
+              Addr_n_ULong* anl = VG_(indexXA)( addr2dihandle, j );
+              if (anl->a == seg->start) {
+                  tl_assert(anl->ull > 0); /* check it's a valid handle */
+                  break;
+              }
+           }
+           vg_assert(j >= 0 && j <= n);
            VG_TRACK( new_mem_startup, seg->start, seg->end+1-seg->start, 
-                                      seg->hasR, seg->hasW, seg->hasX );
+                     seg->hasR, seg->hasW, seg->hasX,
+                     /* and the retrieved debuginfo handle, if any */
+                     j < n
+                     ? ((Addr_n_ULong*)VG_(indexXA)( addr2dihandle, j ))->ull
+                        : 0 );
         }
      }
 
      VG_(free)( seg_starts );
+     VG_(deleteXA)( addr2dihandle );
 
      /* Also do the initial stack permissions. */
      { NSegment const* seg 
@@ -1865,7 +1931,8 @@
                   - (Addr)&VG_(trampoline_stuff_start),
                False, /* readable? */
                False, /* writable? */
-               True   /* executable? */ );
+               True   /* executable? */,
+               0 /* di_handle: no associated debug info */ );
 
      /* Clear the running thread indicator */
      VG_(running_tid) = VG_INVALID_THREADID;
@@ -2076,6 +2143,14 @@
    if (VG_(clo_verbosity) > 1)
       print_all_stats();
 
+   /* Show a profile of the heap(s) at shutdown.  Optionally, first
+      throw away all the debug info, as that makes it easy to spot
+      leaks in the debuginfo reader. */
+   if (VG_(clo_profile_heap)) {
+      if (0) VG_(di_discard_ALL_debuginfo)();
+      VG_(print_arena_cc_analysis)();
+   }
+
    if (VG_(clo_profile_flags) > 0) {
       #define N_MAX 200
       BBProfEntry tops[N_MAX];
diff --git a/coregrind/m_mallocfree.c b/coregrind/m_mallocfree.c
index 66f64c5..9ed35a2 100644
--- a/coregrind/m_mallocfree.c
+++ b/coregrind/m_mallocfree.c
@@ -51,6 +51,8 @@
 Long VG_(free_queue_volume) = 0;
 Long VG_(free_queue_length) = 0;
 
+static void cc_analyse_alloc_arena ( ArenaId aid ); /* fwds */
+
 /*------------------------------------------------------------*/
 /*--- Main types                                           ---*/
 /*------------------------------------------------------------*/
@@ -68,6 +70,7 @@
 
 /* Layout of an in-use block:
 
+      cost center              (sizeof(ULong) bytes)
       this block total szB     (sizeof(SizeT) bytes)
       red zone bytes           (depends on Arena.rz_szB, but >= sizeof(void*))
       (payload bytes)
@@ -76,6 +79,7 @@
 
    Layout of a block on the free list:
 
+      cost center              (sizeof(ULong) bytes)
       this block total szB     (sizeof(SizeT) bytes)
       freelist previous ptr    (sizeof(void*) bytes)
       excess red zone bytes    (if Arena.rz_szB > sizeof(void*))
@@ -87,13 +91,13 @@
    Total size in bytes (bszB) and payload size in bytes (pszB)
    are related by:
 
-      bszB == pszB + 2*sizeof(SizeT) + 2*a->rz_szB
+      bszB == pszB + 2*sizeof(SizeT) + 2*a->rz_szB + sizeof(ULong)
 
    The minimum overhead per heap block for arenas used by
    the core is:   
 
-      32-bit platforms:  2*4 + 2*4 == 16 bytes
-      64-bit platforms:  2*8 + 2*8 == 32 bytes
+      32-bit platforms:  2*4 + 2*4 + 8 == 24 bytes
+      64-bit platforms:  2*8 + 2*8 + 8 == 40 bytes
 
    In both cases extra overhead may be incurred when rounding the payload
    size up to VG_MIN_MALLOC_SZB.
@@ -111,6 +115,13 @@
    - Superblock admin section lengths (due to elastic padding)
    - Block admin section (low and high) lengths (due to elastic redzones)
    - Block payload lengths (due to req_pszB rounding up)
+
+   The heap-profile cost-center field is 8 bytes even on 32 bit
+   platforms.  This is so as to keep the payload field 8-aligned.  On
+   a 64-bit platform, this cc-field contains a pointer to a const
+   HChar*, which is the cost center name.  On 32-bit platforms, the
+   pointer lives in the lower-addressed half of the field, regardless
+   of the endianness of the host.
 */
 typedef
    struct {
@@ -169,6 +180,7 @@
       SizeT        bytes_on_loan;
       SizeT        bytes_mmaped;
       SizeT        bytes_on_loan_max;
+      SizeT        next_profile_at;
    }
    Arena;
 
@@ -206,7 +218,7 @@
 SizeT get_bszB_as_is ( Block* b )
 {
    UByte* b2     = (UByte*)b;
-   SizeT bszB_lo = *(SizeT*)&b2[0];
+   SizeT bszB_lo = *(SizeT*)&b2[0 + sizeof(ULong)];
    SizeT bszB_hi = *(SizeT*)&b2[mk_plain_bszB(bszB_lo) - sizeof(SizeT)];
    vg_assert2(bszB_lo == bszB_hi, 
       "Heap block lo/hi size mismatch: lo = %llu, hi = %llu.\n"
@@ -227,7 +239,7 @@
 void set_bszB ( Block* b, SizeT bszB )
 {
    UByte* b2 = (UByte*)b;
-   *(SizeT*)&b2[0]                                   = bszB;
+   *(SizeT*)&b2[0 + sizeof(ULong)]                   = bszB;
    *(SizeT*)&b2[mk_plain_bszB(bszB) - sizeof(SizeT)] = bszB;
 }
 
@@ -249,7 +261,7 @@
 static __inline__
 SizeT overhead_szB_lo ( Arena* a )
 {
-   return sizeof(SizeT) + a->rz_szB;
+   return sizeof(ULong) + sizeof(SizeT) + a->rz_szB;
 }
 static __inline__
 SizeT overhead_szB_hi ( Arena* a )
@@ -319,7 +331,7 @@
 void set_prev_b ( Block* b, Block* prev_p )
 { 
    UByte* b2 = (UByte*)b;
-   *(Block**)&b2[sizeof(SizeT)] = prev_p;
+   *(Block**)&b2[sizeof(ULong) + sizeof(SizeT)] = prev_p;
 }
 static __inline__
 void set_next_b ( Block* b, Block* next_p )
@@ -331,7 +343,7 @@
 Block* get_prev_b ( Block* b )
 { 
    UByte* b2 = (UByte*)b;
-   return *(Block**)&b2[sizeof(SizeT)];
+   return *(Block**)&b2[sizeof(ULong) + sizeof(SizeT)];
 }
 static __inline__
 Block* get_next_b ( Block* b )
@@ -342,6 +354,22 @@
 
 //---------------------------------------------------------------------------
 
+// Set and get the cost-center field of a block.
+static __inline__
+void set_cc ( Block* b, HChar* cc )
+{ 
+   UByte* b2 = (UByte*)b;
+   *(HChar**)&b2[0] = cc;
+}
+static __inline__
+HChar* get_cc ( Block* b )
+{
+   UByte* b2 = (UByte*)b;
+   return *(HChar**)&b2[0];
+}
+
+//---------------------------------------------------------------------------
+
 // Get the block immediately preceding this one in the Superblock.
 static __inline__
 Block* get_predecessor_block ( Block* b )
@@ -358,7 +386,7 @@
 void set_rz_lo_byte ( Arena* a, Block* b, UInt rz_byteno, UByte v )
 {
    UByte* b2 = (UByte*)b;
-   b2[sizeof(SizeT) + rz_byteno] = v;
+   b2[sizeof(ULong) + sizeof(SizeT) + rz_byteno] = v;
 }
 static __inline__
 void set_rz_hi_byte ( Arena* a, Block* b, UInt rz_byteno, UByte v )
@@ -370,7 +398,7 @@
 UByte get_rz_lo_byte ( Arena* a, Block* b, UInt rz_byteno )
 {
    UByte* b2 = (UByte*)b;
-   return b2[sizeof(SizeT) + rz_byteno];
+   return b2[sizeof(ULong) + sizeof(SizeT) + rz_byteno];
 }
 static __inline__
 UByte get_rz_hi_byte ( Arena* a, Block* b, UInt rz_byteno )
@@ -420,7 +448,9 @@
    // redzone size if necessary to achieve this.
    a->rz_szB = rz_szB;
    while (0 != overhead_szB_lo(a) % VG_MIN_MALLOC_SZB) a->rz_szB++;
-   vg_assert(overhead_szB_lo(a) == overhead_szB_hi(a));
+   //   vg_assert(overhead_szB_lo(a) == overhead_szB_hi(a));
+   vg_assert(0 == overhead_szB_lo(a) % VG_MIN_MALLOC_SZB);
+   vg_assert(0 == overhead_szB_hi(a) % VG_MIN_MALLOC_SZB);
 
    a->min_sblock_szB = min_sblock_szB;
    for (i = 0; i < N_MALLOC_LISTS; i++) a->freelist[i] = NULL;
@@ -431,6 +461,7 @@
    a->bytes_on_loan     = 0;
    a->bytes_mmaped      = 0;
    a->bytes_on_loan_max = 0;
+   a->next_profile_at   = 25 * 1000 * 1000;
    vg_assert(sizeof(a->sblocks_initial) 
              == SBLOCKS_SIZE_INITIAL * sizeof(Superblock*));
 }
@@ -448,6 +479,16 @@
    }
 }
 
+void VG_(print_arena_cc_analysis) ( void )
+{
+   UInt i;
+   vg_assert( VG_(clo_profile_heap) );
+   for (i = 0; i < VG_N_ARENAS; i++) {
+      cc_analyse_alloc_arena(i);
+   }
+}
+
+
 /* This library is self-initialising, as it makes this more self-contained,
    less coupled with the outside world.  Hence VG_(arena_malloc)() and
    VG_(arena_free)() below always call ensure_mm_init() to ensure things are
@@ -804,7 +845,7 @@
    }
    if (p_best < a->freelist[lno]) {
 #     ifdef VERBOSE_MALLOC
-      VG_(printf)("retreat by %d\n", a->freelist[lno] - p_best);
+      VG_(printf)("retreat by %ld\n", (Word)(a->freelist[lno] - p_best));
 #     endif
       a->freelist[lno] = p_best;
    }
@@ -930,8 +971,8 @@
 
    if (arena_bytes_on_loan != a->bytes_on_loan) {
 #     ifdef VERBOSE_MALLOC
-      VG_(printf)( "sanity_check_malloc_arena: a->bytes_on_loan %d, "
-                   "arena_bytes_on_loan %d: "
+      VG_(printf)( "sanity_check_malloc_arena: a->bytes_on_loan %ld, "
+                   "arena_bytes_on_loan %ld: "
                    "MISMATCH\n", a->bytes_on_loan, arena_bytes_on_loan);
 #     endif
       ppSuperblocks(a);
@@ -991,6 +1032,110 @@
 }
 
 
+#define N_AN_CCS 1000
+
+typedef struct { ULong nBytes; ULong nBlocks; HChar* cc; } AnCC;
+
+static AnCC anCCs[N_AN_CCS];
+
+static Int cmp_AnCC_by_vol ( void* v1, void* v2 ) {
+   AnCC* ancc1 = (AnCC*)v1;
+   AnCC* ancc2 = (AnCC*)v2;
+   if (ancc1->nBytes < ancc2->nBytes) return -1;
+   if (ancc1->nBytes > ancc2->nBytes) return 1;
+   return 0;
+}
+
+static void cc_analyse_alloc_arena ( ArenaId aid )
+{
+   Word i, j, k;
+   Arena*      a;
+   Block*      b;
+   Bool        thisFree, lastWasFree;
+   SizeT       b_bszB;
+
+   HChar* cc;
+   UInt n_ccs = 0;
+   //return;
+   a = arenaId_to_ArenaP(aid);
+   if (a->name == NULL) {
+      /* arena is not in use, is not initialised and will fail the
+         sanity check that follows. */
+      return;
+   }
+
+   sanity_check_malloc_arena(aid);
+
+   VG_(printf)(
+      "-------- Arena \"%s\": %ld mmap'd, %ld/%ld max/curr --------\n",
+      a->name, a->bytes_mmaped, a->bytes_on_loan_max, a->bytes_on_loan 
+   );
+
+   for (j = 0; j < a->sblocks_used; ++j) {
+      Superblock * sb = a->sblocks[j];
+      lastWasFree = False;
+      for (i = 0; i < sb->n_payload_bytes; i += mk_plain_bszB(b_bszB)) {
+         b     = (Block*)&sb->payload_bytes[i];
+         b_bszB = get_bszB_as_is(b);
+         if (!blockSane(a, b)) {
+            VG_(printf)("sanity_check_malloc_arena: sb %p, block %ld "
+                        "(bszB %lu):  BAD\n", sb, i, b_bszB );
+            tl_assert(0);
+         }
+         thisFree = !is_inuse_block(b);
+         if (thisFree && lastWasFree) {
+            VG_(printf)("sanity_check_malloc_arena: sb %p, block %ld "
+                        "(bszB %lu): UNMERGED FREES\n", sb, i, b_bszB );
+            tl_assert(0);
+         }
+         lastWasFree = thisFree;
+
+         if (thisFree) continue;
+
+         if (0)
+         VG_(printf)("block: inUse=%d pszB=%d cc=%s\n", 
+                     (Int)(!thisFree), 
+                     (Int)bszB_to_pszB(a, b_bszB),
+                     get_cc(b));
+         cc = get_cc(b);
+         tl_assert(cc);
+         for (k = 0; k < n_ccs; k++) {
+           tl_assert(anCCs[k].cc);
+            if (0 == VG_(strcmp)(cc, anCCs[k].cc))
+               break;
+         }
+         tl_assert(k >= 0 && k <= n_ccs);
+
+         if (k == n_ccs) {
+            tl_assert(n_ccs < N_AN_CCS-1);
+            n_ccs++;
+            anCCs[k].nBytes  = 0;
+            anCCs[k].nBlocks = 0;
+            anCCs[k].cc      = cc;
+         }
+
+         tl_assert(k >= 0 && k < n_ccs && k < N_AN_CCS);
+         anCCs[k].nBytes += (ULong)bszB_to_pszB(a, b_bszB);
+         anCCs[k].nBlocks++;
+      }
+      if (i > sb->n_payload_bytes) {
+         VG_(printf)( "sanity_check_malloc_arena: sb %p: last block "
+                      "overshoots end\n", sb);
+         tl_assert(0);
+      }
+   }
+
+   VG_(ssort)( &anCCs[0], n_ccs, sizeof(anCCs[0]), cmp_AnCC_by_vol );
+
+   for (k = 0; k < n_ccs; k++) {
+      VG_(printf)("%'13llu in %'9llu: %s\n",
+                  anCCs[k].nBytes, anCCs[k].nBlocks, anCCs[k].cc );
+   }
+
+   VG_(printf)("\n");
+}
+
+
 void VG_(sanity_check_malloc_all) ( void )
 {
    UInt i;
@@ -1092,7 +1237,7 @@
    return ((req_pszB + n) & (~n));
 }
 
-void* VG_(arena_malloc) ( ArenaId aid, SizeT req_pszB )
+void* VG_(arena_malloc) ( ArenaId aid, HChar* cc, SizeT req_pszB )
 {
    SizeT       req_bszB, frag_bszB, b_bszB;
    UInt        lno, i;
@@ -1108,6 +1253,10 @@
    req_pszB = align_req_pszB(req_pszB);
    req_bszB = pszB_to_bszB(a, req_pszB);
 
+   // You must provide a cost-center name against which to charge
+   // this allocation; it isn't optional.
+   vg_assert(cc);
+
    // Scan through all the big-enough freelists for a block.
    //
    // Nb: this scanning might be expensive in some cases.  Eg. if you
@@ -1185,6 +1334,7 @@
    b = (Block*)&new_sb->payload_bytes[0];
    lno = pszB_to_listNo(bszB_to_pszB(a, new_sb->n_payload_bytes));
    mkFreeBlock ( a, b, new_sb->n_payload_bytes, lno);
+   set_cc(b, "admin.free-new-sb-1");
    // fall through
 
   obtained_block:
@@ -1205,19 +1355,31 @@
       // printf( "split %dB into %dB and %dB\n", b_bszB, req_bszB, frag_bszB );
       unlinkBlock(a, b, lno);
       mkInuseBlock(a, b, req_bszB);
+      set_cc(b, cc);
       mkFreeBlock(a, &b[req_bszB], frag_bszB, 
                      pszB_to_listNo(bszB_to_pszB(a, frag_bszB)));
+      set_cc(&b[req_bszB], "admin.fragmentation-1");
       b_bszB = get_bszB(b);
    } else {
       // No, mark as in use and use as-is.
       unlinkBlock(a, b, lno);
       mkInuseBlock(a, b, b_bszB);
+      set_cc(b, cc);
    }
 
    // Update stats
    a->bytes_on_loan += bszB_to_pszB(a, b_bszB);
-   if (a->bytes_on_loan > a->bytes_on_loan_max)
+   if (a->bytes_on_loan > a->bytes_on_loan_max) {
       a->bytes_on_loan_max = a->bytes_on_loan;
+      if (a->bytes_on_loan_max >= a->next_profile_at) {
+         /* next profile after 10% more growth */
+         a->next_profile_at 
+            = (SizeT)( 
+                 (((ULong)a->bytes_on_loan_max) * 110ULL) / 100ULL );
+         if (VG_(clo_profile_heap))
+            cc_analyse_alloc_arena(aid);
+      }
+   }
 
 #  ifdef DEBUG_MALLOC
    sanity_check_malloc_arena(aid);
@@ -1286,6 +1448,7 @@
    // Put this chunk back on a list somewhere.
    b_listno = pszB_to_listNo(b_pszB);
    mkFreeBlock( a, b, b_bszB, b_listno );
+   set_cc(b, "admin.free-1");
 
    // See if this block can be merged with its successor.
    // First test if we're far enough before the superblock's end to possibly
@@ -1304,6 +1467,7 @@
          b_bszB += other_bszB;
          b_listno = pszB_to_listNo(bszB_to_pszB(a, b_bszB));
          mkFreeBlock( a, b, b_bszB, b_listno );
+         set_cc(b, "admin.free-2");
       }
    } else {
       // Not enough space for successor: check that b is the last block
@@ -1326,6 +1490,7 @@
          b_bszB += other_bszB;
          b_listno = pszB_to_listNo(bszB_to_pszB(a, b_bszB));
          mkFreeBlock( a, b, b_bszB, b_listno );
+         set_cc(b, "admin.free-3");
       }
    } else {
       // Not enough space for predecessor: check that b is the first block,
@@ -1373,7 +1538,8 @@
    .    .               .   .   .               .   .
 
 */
-void* VG_(arena_memalign) ( ArenaId aid, SizeT req_alignB, SizeT req_pszB )
+void* VG_(arena_memalign) ( ArenaId aid, HChar* cc, 
+                            SizeT req_alignB, SizeT req_pszB )
 {
    SizeT  base_pszB_req, base_pszB_act, frag_bszB;
    Block  *base_b, *align_b;
@@ -1386,6 +1552,10 @@
 
    vg_assert(req_pszB < MAX_PSZB);
 
+   // You must provide a cost-center name against which to charge
+   // this allocation; it isn't optional.
+   vg_assert(cc);
+
    // Check that the requested alignment seems reasonable; that is, is
    // a power of 2.
    if (req_alignB < VG_MIN_MALLOC_SZB
@@ -1408,7 +1578,7 @@
    /* Payload ptr for the block we are going to split.  Note this
       changes a->bytes_on_loan; we save and restore it ourselves. */
    saved_bytes_on_loan = a->bytes_on_loan;
-   base_p = VG_(arena_malloc) ( aid, base_pszB_req );
+   base_p = VG_(arena_malloc) ( aid, cc, base_pszB_req );
    a->bytes_on_loan = saved_bytes_on_loan;
 
    /* Give up if we couldn't allocate enough space */
@@ -1437,11 +1607,13 @@
    /* Create the fragment block, and put it back on the relevant free list. */
    mkFreeBlock ( a, base_b, frag_bszB,
                  pszB_to_listNo(bszB_to_pszB(a, frag_bszB)) );
+   set_cc(base_b, "admin.frag-memalign-1");
 
    /* Create the aligned block. */
    mkInuseBlock ( a, align_b,
                   base_p + base_pszB_act 
                          + overhead_szB_hi(a) - (UByte*)align_b );
+   set_cc(align_b, cc);
 
    /* Final sanity checks. */
    vg_assert( is_inuse_block(get_payload_block(a, align_p)) );
@@ -1538,7 +1710,8 @@
 /*--- Services layered on top of malloc/free.              ---*/
 /*------------------------------------------------------------*/
 
-void* VG_(arena_calloc) ( ArenaId aid, SizeT nmemb, SizeT bytes_per_memb )
+void* VG_(arena_calloc) ( ArenaId aid, HChar* cc,
+                          SizeT nmemb, SizeT bytes_per_memb )
 {
    SizeT  size;
    UChar* p;
@@ -1546,7 +1719,7 @@
    size = nmemb * bytes_per_memb;
    vg_assert(size >= nmemb && size >= bytes_per_memb);// check against overflow
 
-   p = VG_(arena_malloc) ( aid, size );
+   p = VG_(arena_malloc) ( aid, cc, size );
 
    VG_(memset)(p, 0, size);
 
@@ -1556,7 +1729,8 @@
 }
 
 
-void* VG_(arena_realloc) ( ArenaId aid, void* ptr, SizeT req_pszB )
+void* VG_(arena_realloc) ( ArenaId aid, HChar* cc, 
+                           void* ptr, SizeT req_pszB )
 {
    Arena* a;
    SizeT  old_pszB;
@@ -1578,7 +1752,7 @@
       return ptr;
    }
 
-   p_new = VG_(arena_malloc) ( aid, req_pszB );
+   p_new = VG_(arena_malloc) ( aid, cc, req_pszB );
       
    VG_(memcpy)(p_new, ptr, old_pszB);
 
@@ -1589,7 +1763,8 @@
 
 
 /* Inline just for the wrapper VG_(strdup) below */
-__inline__ Char* VG_(arena_strdup) ( ArenaId aid, const Char* s )
+__inline__ Char* VG_(arena_strdup) ( ArenaId aid, HChar* cc, 
+                                     const Char* s )
 {
    Int   i;
    Int   len;
@@ -1599,7 +1774,7 @@
       return NULL;
 
    len = VG_(strlen)(s) + 1;
-   res = VG_(arena_malloc) (aid, len);
+   res = VG_(arena_malloc) (aid, cc, len);
 
    for (i = 0; i < len; i++)
       res[i] = s[i];
@@ -1613,9 +1788,9 @@
 
 // All just wrappers to avoid exposing arenas to tools.
 
-void* VG_(malloc) ( SizeT nbytes )
+void* VG_(malloc) ( HChar* cc, SizeT nbytes )
 {
-   return VG_(arena_malloc) ( VG_AR_TOOL, nbytes );
+   return VG_(arena_malloc) ( VG_AR_TOOL, cc, nbytes );
 }
 
 void  VG_(free) ( void* ptr )
@@ -1623,19 +1798,19 @@
    VG_(arena_free) ( VG_AR_TOOL, ptr );
 }
 
-void* VG_(calloc) ( SizeT nmemb, SizeT bytes_per_memb )
+void* VG_(calloc) ( HChar* cc, SizeT nmemb, SizeT bytes_per_memb )
 {
-   return VG_(arena_calloc) ( VG_AR_TOOL, nmemb, bytes_per_memb );
+   return VG_(arena_calloc) ( VG_AR_TOOL, cc, nmemb, bytes_per_memb );
 }
 
-void* VG_(realloc) ( void* ptr, SizeT size )
+void* VG_(realloc) ( HChar* cc, void* ptr, SizeT size )
 {
-   return VG_(arena_realloc) ( VG_AR_TOOL, ptr, size );
+   return VG_(arena_realloc) ( VG_AR_TOOL, cc, ptr, size );
 }
 
-Char* VG_(strdup) ( const Char* s )
+Char* VG_(strdup) ( HChar* cc, const Char* s )
 {
-   return VG_(arena_strdup) ( VG_AR_TOOL, s ); 
+   return VG_(arena_strdup) ( VG_AR_TOOL, cc, s ); 
 }
 
 // Useful for querying user blocks.           
diff --git a/coregrind/m_options.c b/coregrind/m_options.c
index af47dbf..5c2df63 100644
--- a/coregrind/m_options.c
+++ b/coregrind/m_options.c
@@ -75,7 +75,7 @@
 Bool   VG_(clo_debug_dump_frames) = False;
 Bool   VG_(clo_trace_redir)    = False;
 Bool   VG_(clo_trace_sched)    = False;
-Bool   VG_(clo_trace_pthreads) = False;
+Bool   VG_(clo_profile_heap)   = False;
 Int    VG_(clo_dump_error)     = 0;
 Int    VG_(clo_backtrace_size) = 12;
 Char*  VG_(clo_sim_hints)      = NULL;
@@ -153,7 +153,7 @@
 
    // The 10 is slop, it should be enough in most cases.
    len = j + VG_(strlen)(format) + 10;
-   out = VG_(malloc)( len );
+   out = VG_(malloc)( "options.efn.1", len );
    if (format[0] != '/') {
       VG_(strcpy)(out, base_dir);
       out[j++] = '/';
@@ -162,7 +162,7 @@
 #define ENSURE_THIS_MUCH_SPACE(x) \
    if (j + x >= len) { \
       len += (10 + x); \
-      out = VG_(realloc)(out, len); \
+      out = VG_(realloc)("options.efn.2(multiple)", out, len); \
    }
 
    while (format[i]) {
@@ -240,7 +240,8 @@
 
   bad: {
    Char* opt =    // 2:  1 for the '=', 1 for the NUL.
-      VG_(malloc)( VG_(strlen)(option_name) + VG_(strlen)(format) + 2 );
+      VG_(malloc)( "options.efn.3",
+                   VG_(strlen)(option_name) + VG_(strlen)(format) + 2 );
    VG_(strcpy)(opt, option_name);
    VG_(strcat)(opt, "=");
    VG_(strcat)(opt, format);
diff --git a/coregrind/m_oset.c b/coregrind/m_oset.c
index d176398..980d054 100644
--- a/coregrind/m_oset.c
+++ b/coregrind/m_oset.c
@@ -112,6 +112,7 @@
    SizeT       keyOff;     // key offset
    OSetCmp_t   cmp;        // compare a key and an element, or NULL
    OSetAlloc_t alloc;      // allocator
+   HChar* cc;              // cc for allocator
    OSetFree_t  free;       // deallocator
    Word        nElems;     // number of elements in the tree
    AvlNode*    root;       // root node
@@ -282,7 +283,8 @@
 
 // The underscores avoid GCC complaints about overshadowing global names.
 AvlTree* VG_(OSetGen_Create)(OffT _keyOff, OSetCmp_t _cmp,
-                             OSetAlloc_t _alloc, OSetFree_t _free)
+                             OSetAlloc_t _alloc, HChar* _cc,
+                             OSetFree_t _free)
 {
    AvlTree* t;
 
@@ -294,10 +296,11 @@
    vg_assert(_free);
    if (!_cmp) vg_assert(0 == _keyOff);    // If no cmp, offset must be zero
 
-   t           = _alloc(sizeof(AvlTree));
+   t           = _alloc(_cc, sizeof(AvlTree));
    t->keyOff   = _keyOff;
    t->cmp      = _cmp;
    t->alloc    = _alloc;
+   t->cc       = _cc;
    t->free     = _free;
    t->nElems   = 0;
    t->root     = NULL;
@@ -306,9 +309,10 @@
    return t;
 }
 
-AvlTree* VG_(OSetWord_Create)(OSetAlloc_t _alloc, OSetFree_t _free)
+AvlTree* VG_(OSetWord_Create)(OSetAlloc_t _alloc, HChar* _cc, 
+                              OSetFree_t _free)
 {
-   return VG_(OSetGen_Create)(/*keyOff*/0, /*cmp*/NULL, _alloc, _free);
+   return VG_(OSetGen_Create)(/*keyOff*/0, /*cmp*/NULL, _alloc, _cc, _free);
 }
 
 // Destructor, frees up all memory held by remaining nodes.
@@ -356,7 +360,7 @@
 void* VG_(OSetGen_AllocNode)(AvlTree* t, SizeT elemSize)
 {
    Int nodeSize = sizeof(AvlNode) + elemSize;
-   AvlNode* n   = t->alloc( nodeSize );
+   AvlNode* n   = t->alloc( t->cc, nodeSize );
    vg_assert(elemSize > 0);
    VG_(memset)(n, 0, nodeSize);
    n->magic = OSET_MAGIC;
diff --git a/coregrind/m_redir.c b/coregrind/m_redir.c
index cd16a56..3b10272 100644
--- a/coregrind/m_redir.c
+++ b/coregrind/m_redir.c
@@ -280,9 +280,9 @@
 
 static void maybe_add_active ( Active /*by value; callee copies*/ );
 
-static void*  dinfo_zalloc(SizeT);
+static void*  dinfo_zalloc(HChar* ec, SizeT);
 static void   dinfo_free(void*);
-static HChar* dinfo_strdup(HChar*);
+static HChar* dinfo_strdup(HChar* ec, HChar*);
 static Bool   is_plausible_guest_addr(Addr);
 static Bool   is_aix5_glink_idiom(Addr);
 
@@ -369,10 +369,10 @@
             the following loop, and complain at that point. */
          continue;
       }
-      spec = dinfo_zalloc(sizeof(Spec));
+      spec = dinfo_zalloc("redir.rnnD.1", sizeof(Spec));
       vg_assert(spec);
-      spec->from_sopatt = dinfo_strdup(demangled_sopatt);
-      spec->from_fnpatt = dinfo_strdup(demangled_fnpatt);
+      spec->from_sopatt = dinfo_strdup("redir.rnnD.2", demangled_sopatt);
+      spec->from_fnpatt = dinfo_strdup("redir.rnnD.3", demangled_fnpatt);
       vg_assert(spec->from_sopatt);
       vg_assert(spec->from_fnpatt);
       spec->to_addr = sym_addr;
@@ -418,7 +418,7 @@
 
    /* Ok.  Now specList holds the list of specs from the DebugInfo. 
       Build a new TopSpec, but don't add it to topSpecs yet. */
-   newts = dinfo_zalloc(sizeof(TopSpec));
+   newts = dinfo_zalloc("redir.rnnD.4", sizeof(TopSpec));
    vg_assert(newts);
    newts->next    = NULL; /* not significant */
    newts->seginfo = newsi;
@@ -691,7 +691,7 @@
 
    /* Traverse the actives, copying the addresses of those we intend
       to delete into tmpSet. */
-   tmpSet = VG_(OSetWord_Create)(dinfo_zalloc, dinfo_free);
+   tmpSet = VG_(OSetWord_Create)(dinfo_zalloc, "redir.rndD.1", dinfo_free);
 
    ts->mark = True;
 
@@ -809,11 +809,11 @@
                                  Addr   to_addr,
                                  const HChar* const mandatory )
 {
-   Spec* spec = dinfo_zalloc(sizeof(Spec));
+   Spec* spec = dinfo_zalloc("redir.ahs.1", sizeof(Spec));
    vg_assert(spec);
 
    if (topSpecs == NULL) {
-      topSpecs = dinfo_zalloc(sizeof(TopSpec));
+      topSpecs = dinfo_zalloc("redir.ahs.2", sizeof(TopSpec));
       vg_assert(topSpecs);
       /* symtab_zalloc sets all fields to zero */
    }
@@ -851,6 +851,7 @@
    activeSet = VG_(OSetGen_Create)(offsetof(Active, from_addr),
                                    NULL,     // Use fast comparison
                                    dinfo_zalloc,
+                                   "redir.ri.1", 
                                    dinfo_free);
 
    // The rest of this function just adds initial Specs.   
@@ -970,10 +971,10 @@
 /*--- MISC HELPERS                                         ---*/
 /*------------------------------------------------------------*/
 
-static void* dinfo_zalloc(SizeT n) {
+static void* dinfo_zalloc(HChar* ec, SizeT n) {
    void* p;
    vg_assert(n > 0);
-   p = VG_(arena_malloc)(VG_AR_DINFO, n);
+   p = VG_(arena_malloc)(VG_AR_DINFO, ec, n);
    tl_assert(p);
    VG_(memset)(p, 0, n);
    return p;
@@ -984,9 +985,9 @@
    return VG_(arena_free)(VG_AR_DINFO, p);
 }
 
-static HChar* dinfo_strdup(HChar* str)
+static HChar* dinfo_strdup(HChar* ec, HChar* str)
 {
-   return VG_(arena_strdup)(VG_AR_DINFO, str);
+   return VG_(arena_strdup)(VG_AR_DINFO, ec, str);
 }
 
 /* Really this should be merged with translations_allowable_from_seg
diff --git a/coregrind/m_replacemalloc/replacemalloc_core.c b/coregrind/m_replacemalloc/replacemalloc_core.c
index d7e2e23..175402b 100644
--- a/coregrind/m_replacemalloc/replacemalloc_core.c
+++ b/coregrind/m_replacemalloc/replacemalloc_core.c
@@ -98,9 +98,11 @@
    // 'align' should be valid (ie. big enough and a power of two) by now.
    // VG_(arena_memalign)() will abort if it's not.
    if (VG_MIN_MALLOC_SZB == align)
-      return VG_(arena_malloc)   ( VG_AR_CLIENT, nbytes ); 
+      return VG_(arena_malloc)   ( VG_AR_CLIENT, "replacemalloc.cm.1", 
+                                   nbytes ); 
    else                                                                       
-      return VG_(arena_memalign) ( VG_AR_CLIENT, align, nbytes );
+      return VG_(arena_memalign) ( VG_AR_CLIENT, "replacemalloc.cm.2", 
+                                   align, nbytes );
 }                                                                             
 
 void VG_(cli_free) ( void* p )                                   
diff --git a/coregrind/m_signals.c b/coregrind/m_signals.c
index 370c59a..bbfdf8b 100644
--- a/coregrind/m_signals.c
+++ b/coregrind/m_signals.c
@@ -1528,7 +1528,8 @@
    block_all_host_signals(&savedmask);
 
    if (tst->sig_queue == NULL) {
-      tst->sig_queue = VG_(arena_malloc)(VG_AR_CORE, sizeof(*tst->sig_queue));
+      tst->sig_queue = VG_(arena_malloc)(VG_AR_CORE, "signals.qs.1",
+                                         sizeof(*tst->sig_queue));
       VG_(memset)(tst->sig_queue, 0, sizeof(*tst->sig_queue));
    }
    sq = tst->sig_queue;
diff --git a/coregrind/m_stacks.c b/coregrind/m_stacks.c
index 1cba31a..47ac0bd 100644
--- a/coregrind/m_stacks.c
+++ b/coregrind/m_stacks.c
@@ -185,7 +185,7 @@
       start = t;
    }
 
-   i = (Stack *)VG_(arena_malloc)(VG_AR_CORE, sizeof(Stack));
+   i = (Stack *)VG_(arena_malloc)(VG_AR_CORE, "stacks.rs.1", sizeof(Stack));
    i->start = start;
    i->end = end;
    i->id = next_id++;
diff --git a/coregrind/m_syswrap/syswrap-aix5.c b/coregrind/m_syswrap/syswrap-aix5.c
index 8cd4225..9d4db5c 100644
--- a/coregrind/m_syswrap/syswrap-aix5.c
+++ b/coregrind/m_syswrap/syswrap-aix5.c
@@ -125,8 +125,8 @@
    /* Find out how many AixCodeSegChange records we will need, and
       acquire them. */
    changes_size = VG_(am_aix5_reread_procmap_howmany_directives)(); 
-   changes = VG_(arena_malloc)(VG_AR_CORE,
-                                  changes_size * sizeof(AixCodeSegChange));
+   changes = VG_(arena_malloc)(VG_AR_CORE, "syswrap-aix5.arpalou.1",
+                               changes_size * sizeof(AixCodeSegChange));
    vg_assert(changes);
 
    /* Now re-read /proc/<pid>/map and acquire a change set */
@@ -135,24 +135,24 @@
 
    /* And notify all parties of the changes. */
    for (i = 0; i < changes_used; i++) {
-      VG_(di_aix5_notify_segchange)(
-         changes[i].code_start,
-         changes[i].code_len,
-         changes[i].data_start,
-         changes[i].data_len,
-         changes[i].file_name,
-         changes[i].mem_name,
-         changes[i].is_mainexe,
-         changes[i].acquire
-      );
+      ULong di_handle = VG_(di_aix5_notify_segchange)(
+                           changes[i].code_start,
+                           changes[i].code_len,
+                           changes[i].data_start,
+                           changes[i].data_len,
+                           changes[i].file_name,
+                           changes[i].mem_name,
+                           changes[i].is_mainexe,
+                           changes[i].acquire
+                        );
 
       if (changes[i].acquire) {
          VG_TRACK( new_mem_mmap, 
                    changes[i].code_start, changes[i].code_len, 
-                   /*r*/True, /*w*/False, /*x*/True );
+                   /*r*/True, /*w*/False, /*x*/True, di_handle );
          VG_TRACK( new_mem_mmap, 
                    changes[i].data_start, changes[i].data_len, 
-                   /*r*/True, /*w*/True, /*x*/False );
+                   /*r*/True, /*w*/True, /*x*/False, 0/*or di_handle?*/ );
       } else {
          VG_TRACK( die_mem_munmap, 
                    changes[i].code_start, changes[i].code_len );
@@ -962,7 +962,8 @@
             tot_args++;
       }
       // allocate
-      argv = VG_(malloc)( (tot_args+1) * sizeof(HChar*) );
+      argv = VG_(malloc)( "syswrap-aix5.pre_sys_execve.1",
+                          (tot_args+1) * sizeof(HChar*) );
       if (argv == 0) goto hosed;
       // copy
       j = 0;
@@ -1711,7 +1712,7 @@
    Bool r = (prot & VKI_PROT_READ) > 0;
    Bool w = (prot & VKI_PROT_WRITE) > 0;
    Bool x = (prot & VKI_PROT_EXEC) > 0;
-   VG_TRACK( new_mem_mmap, addr, len, r,w,x );
+   VG_TRACK( new_mem_mmap, addr, len, r,w,x, 0/*di_handle*/ );
    Bool d = VG_(am_notify_client_mmap)( addr, len, prot, flags, 
                                         0/*fake fd*/, 0/*fake offset*/);
    if (d) 
@@ -2116,7 +2117,7 @@
 
       /* we don't distinguish whether it's read-only or
        * read-write -- it doesn't matter really. */
-      VG_TRACK( new_mem_mmap, RES, segmentSize, True, True, False );
+      VG_TRACK( new_mem_mmap, RES, segmentSize, True, True, False, 0/*di_handle*/ );
       if (d)
          VG_(discard_translations)( (Addr64)RES, 
                                     (ULong)VG_PGROUNDUP(segmentSize),
diff --git a/coregrind/m_syswrap/syswrap-generic.c b/coregrind/m_syswrap/syswrap-generic.c
index 51ab4fe..0d36747 100644
--- a/coregrind/m_syswrap/syswrap-generic.c
+++ b/coregrind/m_syswrap/syswrap-generic.c
@@ -65,7 +65,8 @@
 void notify_aspacem_of_mmap(Addr a, SizeT len, UInt prot,
                             UInt flags, Int fd, Off64T offset);
 static
-void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, Off64T offset);
+void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, Off64T offset,
+                         ULong di_handle);
 
 
 /* Returns True iff address range is something the client can
@@ -151,13 +152,22 @@
 /* When a client mmap has been successfully done, this function must
    be called.  It notifies both aspacem and the tool of the new
    mapping.
-*/
+
+   JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
+   it is called from is POST(sys_io_setup).  In particular,
+   ML_(generic_PRE_sys_mmap), further down in this file, is the
+   "normal case" handler for client mmap.  But it doesn't call this
+   function; instead it does the relevant notifications itself.  Here,
+   we just pass di_handle=0 to notify_tool_of_mmap as we have no
+   better information.  But really this function should be done away
+   with; problem is I don't understand what POST(sys_io_setup) does or
+   how it works. */
 void 
 ML_(notify_aspacem_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot, 
                                        UInt flags, Int fd, Off64T offset )
 {
    notify_aspacem_of_mmap(a, len, prot, flags, fd, offset);
-   notify_tool_of_mmap(a, len, prot, offset);
+   notify_tool_of_mmap(a, len, prot, offset, 0/*di_handle*/);
 }
 
 static
@@ -179,7 +189,8 @@
 }
 
 static
-void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, Off64T offset)
+void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, Off64T offset,
+                         ULong di_handle)
 {
    Bool rr, ww, xx;
 
@@ -192,7 +203,7 @@
    ww = toBool(prot & VKI_PROT_WRITE);
    xx = toBool(prot & VKI_PROT_EXEC);
 
-   VG_TRACK( new_mem_mmap, a, len, rr, ww, xx );
+   VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
 }
 
 /* Expand (or shrink) an existing mapping, potentially moving it at
@@ -332,7 +343,8 @@
                                    MIN_SIZET(old_len,new_len) );
          if (new_len > old_len)
             VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
-                      old_seg->hasR, old_seg->hasW, old_seg->hasX );
+                      old_seg->hasR, old_seg->hasW, old_seg->hasX,
+                      0/*di_handle*/ );
          VG_TRACK(die_mem_munmap, old_addr, old_len);
          if (d) {
             VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
@@ -375,7 +387,8 @@
       if (ok) {
          VG_TRACK( new_mem_mmap, needA, needL, 
                                  old_seg->hasR, 
-                                 old_seg->hasW, old_seg->hasX );
+                                 old_seg->hasW, old_seg->hasX,
+                                 0/*di_handle*/ );
          if (d) 
             VG_(discard_translations)( needA, needL, "do_remap(3)" );
          return VG_(mk_SysRes_Success)( old_addr );
@@ -395,7 +408,8 @@
                                    MIN_SIZET(old_len,new_len) );
          if (new_len > old_len)
             VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
-                      old_seg->hasR, old_seg->hasW, old_seg->hasX );
+                      old_seg->hasR, old_seg->hasW, old_seg->hasX,
+                      0/*di_handle*/ );
          VG_TRACK(die_mem_munmap, old_addr, old_len);
          if (d) {
             VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
@@ -434,7 +448,8 @@
    if (!ok)
       goto eNOMEM;
    VG_TRACK( new_mem_mmap, needA, needL, 
-                           old_seg->hasR, old_seg->hasW, old_seg->hasX );
+                           old_seg->hasR, old_seg->hasW, old_seg->hasX,
+                           0/*di_handle*/ );
    if (d)
       VG_(discard_translations)( needA, needL, "do_remap(6)" );
    return VG_(mk_SysRes_Success)( old_addr );
@@ -539,7 +554,7 @@
 
    /* Not already one: allocate an OpenFd */
    if (i == NULL) {
-      i = VG_(arena_malloc)(VG_AR_CORE, sizeof(OpenFd));
+      i = VG_(arena_malloc)(VG_AR_CORE, "syswrap.rfdowgn.1", sizeof(OpenFd));
 
       i->prev = NULL;
       i->next = allocated_fds;
@@ -549,7 +564,7 @@
    }
 
    i->fd = fd;
-   i->pathname = VG_(arena_strdup)(VG_AR_CORE, pathname);
+   i->pathname = VG_(arena_strdup)(VG_AR_CORE, "syswrap.rfdowgn.2", pathname);
    i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
 }
 
@@ -752,10 +767,10 @@
 }
 
 static
-Char *strdupcat ( const Char *s1, const Char *s2, ArenaId aid )
+Char *strdupcat ( HChar* cc, const Char *s1, const Char *s2, ArenaId aid )
 {
    UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
-   Char *result = VG_(arena_malloc) ( aid, len );
+   Char *result = VG_(arena_malloc) ( aid, cc, len );
    VG_(strcpy) ( result, s1 );
    VG_(strcat) ( result, s2 );
    return result;
@@ -765,7 +780,8 @@
 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
                             Char *msg, Addr base, SizeT size )
 {
-   Char *outmsg = strdupcat ( "socketcall.sendmsg", msg, VG_AR_CORE );
+   Char *outmsg = strdupcat ( "di.syswrap.pmrs.1",
+                              "socketcall.sendmsg", msg, VG_AR_CORE );
    PRE_MEM_READ( outmsg, base, size );
    VG_(arena_free) ( VG_AR_CORE, outmsg );
 }
@@ -774,7 +790,8 @@
 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
                              Char *msg, Addr base, SizeT size )
 {
-   Char *outmsg = strdupcat ( "socketcall.recvmsg", msg, VG_AR_CORE );
+   Char *outmsg = strdupcat ( "di.syswrap.pmwr.1",
+                              "socketcall.recvmsg", msg, VG_AR_CORE );
    if ( read )
       PRE_MEM_READ( outmsg, base, size );
    else
@@ -866,7 +883,7 @@
    /* NULL/zero-length sockaddrs are legal */
    if ( sa == NULL || salen == 0 ) return;
 
-   outmsg = VG_(arena_malloc) ( VG_AR_CORE,
+   outmsg = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.pmr_sockaddr.1",
                                 VG_(strlen)( description ) + 30 );
 
    VG_(sprintf) ( outmsg, description, ".sa_family" );
@@ -1722,7 +1739,8 @@
 
       /* we don't distinguish whether it's read-only or
        * read-write -- it doesn't matter really. */
-      VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False );
+      VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
+                              0/*di_handle*/ );
       if (d)
          VG_(discard_translations)( (Addr64)res, 
                                     (ULong)VG_PGROUNDUP(segmentSize),
@@ -1937,6 +1955,7 @@
    }
 
    if (!sres.isError) {
+      ULong di_handle;
       /* Notify aspacem. */
       notify_aspacem_of_mmap(
          (Addr)sres.res, /* addr kernel actually assigned */
@@ -1947,13 +1966,15 @@
          arg6  /* offset */
       );
       /* Load symbols? */
-      VG_(di_notify_mmap)( (Addr)sres.res, False/*allow_SkFileV*/ );
+      di_handle = VG_(di_notify_mmap)( (Addr)sres.res, False/*allow_SkFileV*/ );
       /* Notify the tool. */
       notify_tool_of_mmap(
          (Addr)sres.res, /* addr kernel actually assigned */
          arg2, /* length */
          arg3, /* prot */
-         arg6  /* offset */
+         arg6, /* offset */
+         di_handle /* so the tool can refer to the read debuginfo later,
+                      if it wants. */
       );
    }
 
@@ -2553,7 +2574,8 @@
             tot_args++;
       }
       // allocate
-      argv = VG_(malloc)( (tot_args+1) * sizeof(HChar*) );
+      argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
+                          (tot_args+1) * sizeof(HChar*) );
       if (argv == 0) goto hosed;
       // copy
       j = 0;
diff --git a/coregrind/m_syswrap/syswrap-x86-linux.c b/coregrind/m_syswrap/syswrap-x86-linux.c
index 3eb077a..8c5351a 100644
--- a/coregrind/m_syswrap/syswrap-x86-linux.c
+++ b/coregrind/m_syswrap/syswrap-x86-linux.c
@@ -444,14 +444,14 @@
 static VexGuestX86SegDescr* alloc_zeroed_x86_GDT ( void )
 {
    Int nbytes = VEX_GUEST_X86_GDT_NENT * sizeof(VexGuestX86SegDescr);
-   return VG_(arena_calloc)(VG_AR_CORE, nbytes, 1);
+   return VG_(arena_calloc)(VG_AR_CORE, "di.syswrap-x86.azxG.1", nbytes, 1);
 }
 
 /* Create a zeroed-out LDT. */
 static VexGuestX86SegDescr* alloc_zeroed_x86_LDT ( void )
 {
    Int nbytes = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
-   return VG_(arena_calloc)(VG_AR_CORE, nbytes, 1);
+   return VG_(arena_calloc)(VG_AR_CORE, "di.syswrap-x86.azxL.1", nbytes, 1);
 }
 
 /* Free up an LDT or GDT allocated by the above fns. */
diff --git a/coregrind/m_tooliface.c b/coregrind/m_tooliface.c
index bd0af0a..cc4ac1d 100644
--- a/coregrind/m_tooliface.c
+++ b/coregrind/m_tooliface.c
@@ -340,10 +340,10 @@
    VG_(tdict).fn = f; \
 }
 
-DEF0(track_new_mem_startup,       Addr, SizeT, Bool, Bool, Bool)
+DEF0(track_new_mem_startup,       Addr, SizeT, Bool, Bool, Bool, ULong)
 DEF0(track_new_mem_stack_signal,  Addr, SizeT, UInt)
 DEF0(track_new_mem_brk,           Addr, SizeT, UInt)
-DEF0(track_new_mem_mmap,          Addr, SizeT, Bool, Bool, Bool)
+DEF0(track_new_mem_mmap,          Addr, SizeT, Bool, Bool, Bool, ULong)
 
 DEF0(track_copy_mem_remap,        Addr, Addr, SizeT)
 DEF0(track_change_mem_mprotect,   Addr, SizeT, Bool, Bool, Bool)
diff --git a/coregrind/m_transtab.c b/coregrind/m_transtab.c
index 8d0d35b..c051ae7 100644
--- a/coregrind/m_transtab.c
+++ b/coregrind/m_transtab.c
@@ -384,7 +384,8 @@
       old_sz = sec->ec2tte_size[ec];
       old_ar = sec->ec2tte[ec];
       new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
-      new_ar = VG_(arena_malloc)(VG_AR_TTAUX, new_sz * sizeof(UShort));
+      new_ar = VG_(arena_malloc)(VG_AR_TTAUX, "transtab.aECN.1",
+                                 new_sz * sizeof(UShort));
       for (i = 0; i < old_sz; i++)
          new_ar[i] = old_ar[i];
       if (old_ar)
diff --git a/coregrind/m_ume.c b/coregrind/m_ume.c
index bdfcfb8..93d79a9 100644
--- a/coregrind/m_ume.c
+++ b/coregrind/m_ume.c
@@ -120,7 +120,7 @@
 struct elfinfo *readelf(Int fd, const char *filename)
 {
    SysRes sres;
-   struct elfinfo *e = VG_(malloc)(sizeof(*e));
+   struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e));
    Int phsz;
 
    vg_assert(e);
@@ -163,7 +163,7 @@
    }
 
    phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum;
-   e->p = VG_(malloc)(phsz);
+   e->p = VG_(malloc)("ume.re.2", phsz);
    vg_assert(e->p);
 
    sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff);
@@ -355,7 +355,15 @@
    /* The kernel maps position-independent executables at TASK_SIZE*2/3;
       duplicate this behavior as close as we can. */
    if (e->e.e_type == ET_DYN && ebase == 0) {
-      ebase = VG_PGROUNDDN(info->exe_base + (info->exe_end - info->exe_base) * 2 / 3);
+      ebase = VG_PGROUNDDN(info->exe_base 
+                           + (info->exe_end - info->exe_base) * 2 / 3);
+      /* We really don't want to load PIEs at zero or too close.  It
+         works, but it's unrobust (NULL pointer reads and writes
+         become legit, which is really bad) and causes problems for
+         exp-ptrcheck, which assumes all numbers below 1MB are
+         nonpointers.  So, hackily, move it above 1MB. */
+      if (ebase < 0x100000)
+         ebase = 0x100000;
    }
 
    info->phnum = e->e.e_phnum;
@@ -378,7 +386,7 @@
 	 break;
 			
       case PT_INTERP: {
-	 char *buf = VG_(malloc)(ph->p_filesz+1);
+         HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1);
 	 Int j;
 	 Int intfd;
 	 Int baseaddr_set;
@@ -613,10 +621,10 @@
       *cp = '\0';
    }
    
-   info->interp_name = VG_(strdup)(interp);
+   info->interp_name = VG_(strdup)("ume.ls.1", interp);
    vg_assert(NULL != info->interp_name);
    if (arg != NULL && *arg != '\0') {
-      info->interp_args = VG_(strdup)(arg);
+      info->interp_args = VG_(strdup)("ume.ls.2", arg);
       vg_assert(NULL != info->interp_args);
    }
 
@@ -788,7 +796,7 @@
       // Looks like a script.  Run it with /bin/sh.  This includes
       // zero-length files.
 
-      info->interp_name = VG_(strdup)(default_interp_name);
+      info->interp_name = VG_(strdup)("ume.desf.1", default_interp_name);
       info->interp_args = NULL;
       if (info->argv && info->argv[0] != NULL)
          info->argv[0] = (char *)exe_name;
diff --git a/coregrind/m_wordfm.c b/coregrind/m_wordfm.c
index d00fac7..c43908e 100644
--- a/coregrind/m_wordfm.c
+++ b/coregrind/m_wordfm.c
@@ -81,7 +81,8 @@
 
 struct _WordFM {
    AvlNode* root;
-   void*    (*alloc_nofail)( SizeT );
+   void*    (*alloc_nofail)( HChar*, SizeT );
+   HChar*   cc;
    void     (*dealloc)(void*);
    Word     (*kCmp)(UWord,UWord);
    AvlNode* nodeStack[WFM_STKMAX]; // Iterator node stack
@@ -415,6 +416,39 @@
    }
 }
 
+static
+Bool avl_find_bounds ( AvlNode* t, 
+                       /*OUT*/UWord* kMinP, /*OUT*/UWord* kMaxP,
+                       UWord minKey, UWord maxKey, UWord key,
+                       Word(*kCmp)(UWord,UWord) )
+{
+   UWord lowerBound = minKey;
+   UWord upperBound = maxKey;
+   while (t) {
+      Word cmpresS = kCmp ? kCmp(t->key, key)
+                          : cmp_unsigned_Words(t->key, key);
+      if (cmpresS < 0) {
+         lowerBound = t->key;
+         t = t->child[1];
+         continue;
+      }
+      if (cmpresS > 0) {
+         upperBound = t->key;
+         t = t->child[0];
+         continue;
+      }
+      /* We should never get here.  If we do, it means the given key
+         is actually present in the tree, which means the original
+         call was invalid -- an error on the caller's part, and we
+         cannot give any meaningful values for the bounds.  (Well,
+         maybe we could, but we're not gonna.  Ner!) */
+      return False;
+   }
+   *kMinP = lowerBound;
+   *kMaxP = upperBound;
+   return True;
+}
+
 // Clear the iterator stack.
 static void stackClear(WordFM* fm)
 {
@@ -459,12 +493,13 @@
 AvlNode* avl_dopy ( AvlNode* nd, 
                     UWord(*dopyK)(UWord), 
                     UWord(*dopyV)(UWord),
-                    void*(alloc_nofail)(SizeT) )
+                    void*(alloc_nofail)(HChar*,SizeT),
+                    HChar* cc )
 {
    AvlNode* nyu;
    if (! nd)
       return NULL;
-   nyu = alloc_nofail(sizeof(AvlNode));
+   nyu = alloc_nofail(cc, sizeof(AvlNode));
    tl_assert(nyu);
    
    nyu->child[0] = nd->child[0];
@@ -493,12 +528,14 @@
 
    /* Copy subtrees */
    if (nyu->child[0]) {
-      nyu->child[0] = avl_dopy( nyu->child[0], dopyK, dopyV, alloc_nofail );
+      nyu->child[0] = avl_dopy( nyu->child[0], dopyK, dopyV, 
+                                alloc_nofail, cc );
       if (! nyu->child[0])
          return NULL;
    }
    if (nyu->child[1]) {
-      nyu->child[1] = avl_dopy( nyu->child[1], dopyK, dopyV, alloc_nofail );
+      nyu->child[1] = avl_dopy( nyu->child[1], dopyK, dopyV,
+                                alloc_nofail, cc );
       if (! nyu->child[1])
          return NULL;
    }
@@ -508,13 +545,15 @@
 
 /* Initialise a WordFM. */
 static void initFM ( WordFM* fm,
-                     void*   (*alloc_nofail)( SizeT ),
+                     void*   (*alloc_nofail)( HChar*, SizeT ),
+                     HChar*  cc,
                      void    (*dealloc)(void*),
                      Word    (*kCmp)(UWord,UWord) )
 {
    fm->root         = 0;
    fm->kCmp         = kCmp;
    fm->alloc_nofail = alloc_nofail;
+   fm->cc           = cc;
    fm->dealloc      = dealloc;
    fm->stackTop     = 0;
 }
@@ -528,13 +567,14 @@
    sections of the map, or the whole thing.  If kCmp is NULL then the
    ordering used is unsigned word ordering (UWord) on the key
    values. */
-WordFM* VG_(newFM) ( void* (*alloc_nofail)( SizeT ),
+WordFM* VG_(newFM) ( void* (*alloc_nofail)( HChar*, SizeT ),
+                     HChar* cc,
                      void  (*dealloc)(void*),
                      Word  (*kCmp)(UWord,UWord) )
 {
-   WordFM* fm = alloc_nofail(sizeof(WordFM));
+   WordFM* fm = alloc_nofail(cc, sizeof(WordFM));
    tl_assert(fm);
-   initFM(fm, alloc_nofail, dealloc, kCmp);
+   initFM(fm, alloc_nofail, cc, dealloc, kCmp);
    return fm;
 }
 
@@ -568,11 +608,11 @@
 }
 
 /* Add (k,v) to fm. */
-void VG_(addToFM) ( WordFM* fm, UWord k, UWord v )
+Bool VG_(addToFM) ( WordFM* fm, UWord k, UWord v )
 {
    MaybeWord oldV;
    AvlNode* node;
-   node = fm->alloc_nofail( sizeof(struct _AvlNode) );
+   node = fm->alloc_nofail( fm->cc, sizeof(struct _AvlNode) );
    node->key = k;
    node->val = v;
    oldV.b = False;
@@ -582,6 +622,7 @@
    //   fm->vFin( oldV.w );
    if (oldV.b)
       fm->dealloc(node);
+   return oldV.b;
 }
 
 // Delete key from fm, returning associated key and val if found
@@ -618,6 +659,15 @@
    }
 }
 
+// See comment in pub_tool_wordfm.h for explanation
+Bool VG_(findBoundsFM)( WordFM* fm,
+                        /*OUT*/UWord* kMinP, /*OUT*/UWord* kMaxP,
+                        UWord minKey, UWord maxKey, UWord key )
+{
+   return avl_find_bounds( fm->root, kMinP, kMaxP, minKey, maxKey,
+                                     key, fm->kCmp );
+}
+
 UWord VG_(sizeFM) ( WordFM* fm )
 {
    // Hmm, this is a bad way to do this
@@ -735,7 +785,7 @@
    /* can't clone the fm whilst iterating on it */
    tl_assert(fm->stackTop == 0);
 
-   nyu = fm->alloc_nofail( sizeof(WordFM) );
+   nyu = fm->alloc_nofail( fm->cc, sizeof(WordFM) );
    tl_assert(nyu);
 
    *nyu = *fm;
@@ -745,7 +795,8 @@
    VG_(memset)(fm->numStack, 0,  sizeof(fm->numStack));
 
    if (nyu->root) {
-      nyu->root = avl_dopy( nyu->root, dopyK, dopyV, fm->alloc_nofail );
+      nyu->root = avl_dopy( nyu->root, dopyK, dopyV,
+                            fm->alloc_nofail, fm->cc );
       if (! nyu->root)
          return NULL;
    }
@@ -768,11 +819,12 @@
    WordFM* fm; 
 };
 
-WordBag* VG_(newBag) ( void* (*alloc_nofail)( SizeT ),
+WordBag* VG_(newBag) ( void* (*alloc_nofail)( HChar*, SizeT ),
+                       HChar* cc,
                        void  (*dealloc)(void*) )
 {
-   WordBag* bag = alloc_nofail(sizeof(WordBag));
-   bag->fm = VG_(newFM)( alloc_nofail, dealloc, NULL );
+   WordBag* bag = alloc_nofail(cc, sizeof(WordBag));
+   bag->fm = VG_(newFM)( alloc_nofail, cc, dealloc, NULL );
    return bag;
 }
 
diff --git a/coregrind/m_xarray.c b/coregrind/m_xarray.c
index 1b85994..8f749d2 100644
--- a/coregrind/m_xarray.c
+++ b/coregrind/m_xarray.c
@@ -38,7 +38,8 @@
 /* See pub_tool_xarray.h for details of what this is all about. */
 
 struct _XArray {
-   void* (*alloc) ( SizeT );        /* alloc fn (nofail) */
+   void* (*alloc) ( HChar*, SizeT ); /* alloc fn (nofail) */
+   HChar* cc;                       /* cost centre for alloc */
    void  (*free) ( void* );         /* free fn */
    Int   (*cmpFn) ( void*, void* ); /* cmp fn (may be NULL) */
    Word  elemSzB;   /* element size in bytes */
@@ -49,7 +50,8 @@
 };
 
 
-XArray* VG_(newXA) ( void*(*alloc_fn)(SizeT), 
+XArray* VG_(newXA) ( void*(*alloc_fn)(HChar*,SizeT), 
+                     HChar* cc,
                      void(*free_fn)(void*),
                      Word elemSzB )
 {
@@ -63,9 +65,10 @@
    vg_assert(alloc_fn);
    vg_assert(free_fn);
    vg_assert(elemSzB > 0);
-   xa = alloc_fn( sizeof(struct _XArray) );
+   xa = alloc_fn( cc, sizeof(struct _XArray) );
    vg_assert(xa);
    xa->alloc     = alloc_fn;
+   xa->cc        = cc;
    xa->free      = free_fn;
    xa->cmpFn     = NULL;
    xa->elemSzB   = elemSzB;
@@ -76,19 +79,22 @@
    return xa;
 }
 
-XArray* VG_(cloneXA)( XArray* xao )
+XArray* VG_(cloneXA)( HChar* cc, XArray* xao )
 {
    struct _XArray* xa = (struct _XArray*)xao;
    struct _XArray* nyu;
+   HChar* nyu_cc;
    vg_assert(xa);
    vg_assert(xa->alloc);
    vg_assert(xa->free);
    vg_assert(xa->elemSzB >= 1);
-   nyu = xa->alloc( sizeof(struct _XArray) );
+   nyu_cc = cc ? cc : xa->cc;
+   nyu = xa->alloc( nyu_cc, sizeof(struct _XArray) );
    if (!nyu)
       return NULL;
    /* Copy everything verbatim ... */
    *nyu = *xa;
+   nyu->cc = nyu_cc;
    /* ... except we have to clone the contents-array */
    if (nyu->arr) {
       /* Restrict the total size of the new array to its current
@@ -98,7 +104,7 @@
          element is later added to it, unfortunately. */
       nyu->totsizeE = nyu->usedsizeE;
       /* and allocate .. */
-      nyu->arr = nyu->alloc( nyu->totsizeE * nyu->elemSzB );
+      nyu->arr = nyu->alloc( nyu->cc, nyu->totsizeE * nyu->elemSzB );
       if (!nyu->arr) {
          nyu->free(nyu);
          return NULL;
@@ -161,7 +167,7 @@
       if (0 && xa->totsizeE >= 10000) 
          VG_(printf)("addToXA: increasing from %ld to %ld\n", 
                      xa->totsizeE, newsz);
-      tmp = xa->alloc(newsz * xa->elemSzB);
+      tmp = xa->alloc(xa->cc, newsz * xa->elemSzB);
       vg_assert(tmp);
       if (xa->usedsizeE > 0) 
          VG_(memcpy)(tmp, xa->arr, xa->usedsizeE * xa->elemSzB);
diff --git a/coregrind/pub_core_clientstate.h b/coregrind/pub_core_clientstate.h
index 07f4ade..6d1993d 100644
--- a/coregrind/pub_core_clientstate.h
+++ b/coregrind/pub_core_clientstate.h
@@ -42,9 +42,6 @@
 
 // Address space globals
 
-extern Addr  VG_(client_base);	 // client address space limits
-extern Addr  VG_(client_end);
-
 extern Addr  VG_(clstk_base);	 // client stack range
 extern Addr  VG_(clstk_end);
 extern UWord VG_(clstk_id);      // client stack id
diff --git a/coregrind/pub_core_debuginfo.h b/coregrind/pub_core_debuginfo.h
index 3e65bde..3965a71 100644
--- a/coregrind/pub_core_debuginfo.h
+++ b/coregrind/pub_core_debuginfo.h
@@ -45,9 +45,16 @@
    allow_SkFileV is True, it will try load debug info if the mapping
    at 'a' belongs to Valgrind; whereas normally (False) it will not do
    that.  This allows us to carefully control when the thing will read
-   symbols from the Valgrind executable itself. */
+   symbols from the Valgrind executable itself.
+
+   If a call to VG_(di_notify_mmap) causes debug info to be read, then
+   the returned ULong is an abstract handle which can later be used to
+   refer to the debuginfo read as a result of this specific mapping,
+   in later queries to m_debuginfo.  In this case the handle value
+   will be one or above.  If the returned value is zero, no debug info
+   was read. */
 #if defined(VGO_linux)
-extern void VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV );
+extern ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV );
 
 extern void VG_(di_notify_munmap)( Addr a, SizeT len );
 
@@ -59,19 +66,24 @@
    parameters describe a code segment and its associated data segment,
    that have recently been mapped in -- so we need to read debug info
    for it -- or conversely, have recently been dumped, in which case
-   the relevant debug info has to be unloaded. */
-extern void VG_(di_aix5_notify_segchange)( 
-               Addr   code_start,
-               Word   code_len,
-               Addr   data_start,
-               Word   data_len,
-               UChar* file_name,
-               UChar* mem_name,
-               Bool   is_mainexe,
-               Bool   acquire
-            );
+   the relevant debug info has to be unloaded.
+
+   The returned ULong has the same meaning as documented for
+   VG_(di_notify_mmap) just above. */
+extern ULong VG_(di_aix5_notify_segchange)( 
+                Addr   code_start,
+                Word   code_len,
+                Addr   data_start,
+                Word   data_len,
+                UChar* file_name,
+                UChar* mem_name,
+                Bool   is_mainexe,
+                Bool   acquire
+             );
 #endif
 
+extern void VG_(di_discard_ALL_debuginfo)( void );
+
 extern Bool VG_(get_fnname_nodemangle)( Addr a, 
                                         Char* fnname, Int n_fnname );
 
diff --git a/coregrind/pub_core_mallocfree.h b/coregrind/pub_core_mallocfree.h
index f643862..510a22f 100644
--- a/coregrind/pub_core_mallocfree.h
+++ b/coregrind/pub_core_mallocfree.h
@@ -86,14 +86,16 @@
    int keepcost; /* top-most, releasable (via malloc_trim) space */
 };
 
-extern void* VG_(arena_malloc)  ( ArenaId arena, SizeT nbytes );
+extern void* VG_(arena_malloc)  ( ArenaId arena, HChar* cc, SizeT nbytes );
 extern void  VG_(arena_free)    ( ArenaId arena, void* ptr );
-extern void* VG_(arena_calloc)  ( ArenaId arena, 
+extern void* VG_(arena_calloc)  ( ArenaId arena, HChar* cc,
                                   SizeT nmemb, SizeT bytes_per_memb );
-extern void* VG_(arena_realloc) ( ArenaId arena, void* ptr, SizeT size );
-extern void* VG_(arena_memalign)( ArenaId aid, SizeT req_alignB, 
-                                               SizeT req_pszB );
-extern Char* VG_(arena_strdup)  ( ArenaId aid, const Char* s);
+extern void* VG_(arena_realloc) ( ArenaId arena, HChar* cc,
+                                  void* ptr, SizeT size );
+extern void* VG_(arena_memalign)( ArenaId aid, HChar* cc,
+                                  SizeT req_alignB, SizeT req_pszB );
+extern Char* VG_(arena_strdup)  ( ArenaId aid, HChar* cc, 
+                                  const Char* s);
 
 // Nb: The ThreadId doesn't matter, it's not used.
 extern SizeT VG_(arena_payload_szB) ( ThreadId tid, ArenaId aid, void* payload );
@@ -104,6 +106,8 @@
 
 extern void  VG_(print_all_arena_stats) ( void );
 
+extern void  VG_(print_arena_cc_analysis) ( void );
+
 #endif   // __PUB_CORE_MALLOCFREE_H
 
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/pub_core_options.h b/coregrind/pub_core_options.h
index 8e3d85c..457dce5 100644
--- a/coregrind/pub_core_options.h
+++ b/coregrind/pub_core_options.h
@@ -126,10 +126,10 @@
 extern Bool  VG_(clo_trace_redir);
 /* DEBUG: print thread scheduling events?  default: NO */
 extern Bool  VG_(clo_trace_sched);
-/* DEBUG: print pthreads calls?  default: NO */
-extern Bool  VG_(clo_trace_pthreads);
-/* Display gory details for the k'th most popular error.  default:
-   Infinity. */
+/* DEBUG: do heap profiling?  default: NO */
+extern Bool  VG_(clo_profile_heap);
+/* DEBUG: display gory details for the k'th most popular error.
+   default: Infinity. */
 extern Int   VG_(clo_dump_error);
 /* Engage miscellaneous weird hacks needed for some progs. */
 extern Char* VG_(clo_sim_hints);
diff --git a/coregrind/pub_core_tooliface.h b/coregrind/pub_core_tooliface.h
index 31ac7d6..ca6494b 100644
--- a/coregrind/pub_core_tooliface.h
+++ b/coregrind/pub_core_tooliface.h
@@ -161,10 +161,10 @@
    IRSB* (*tool_final_IR_tidy_pass)  (IRSB*);
 
    // -- Event tracking functions ------------------------------------
-   void (*track_new_mem_startup)     (Addr, SizeT, Bool, Bool, Bool);
+   void (*track_new_mem_startup)     (Addr, SizeT, Bool, Bool, Bool, ULong);
    void (*track_new_mem_stack_signal)(Addr, SizeT, ThreadId);
    void (*track_new_mem_brk)         (Addr, SizeT, ThreadId);
-   void (*track_new_mem_mmap)        (Addr, SizeT, Bool, Bool, Bool);
+   void (*track_new_mem_mmap)        (Addr, SizeT, Bool, Bool, Bool, ULong);
 
    void (*track_copy_mem_remap)      (Addr src, Addr dst, SizeT);
    void (*track_change_mem_mprotect) (Addr, SizeT, Bool, Bool, Bool);
diff --git a/coregrind/pub_core_vkiscnums.h b/coregrind/pub_core_vkiscnums.h
index 80eb53c..1df5042 100644
--- a/coregrind/pub_core_vkiscnums.h
+++ b/coregrind/pub_core_vkiscnums.h
@@ -51,9 +51,6 @@
 /* Bind the given syscall name to the given number.  Returns True if
    successful, False if the name is unknown. */
 extern Bool VG_(aix5_register_syscall)( Int, UChar* );
-/* Look up in said binding later, for the purposes of making error
-   messages. */
-extern UChar* VG_(aix5_sysno_to_sysname)( Int sysno );
 #endif
 
 #endif /* !defined(VG_IN_ASSEMBLY_SOURCE) */
diff --git a/drd/drd_barrier.c b/drd/drd_barrier.c
index 0f2d4db..542c610 100644
--- a/drd/drd_barrier.c
+++ b/drd/drd_barrier.c
@@ -112,7 +112,8 @@
   tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) == sizeof(Word));
   tl_assert(sizeof(((struct barrier_thread_info*)0)->tid)
             >= sizeof(DrdThreadId));
-  p->oset = VG_(OSetGen_Create)(0, 0, VG_(malloc), VG_(free));
+  p->oset = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1",
+                                      VG_(free));
 }
 
 /** Deallocate the memory allocated by barrier_initialize() and in p->oset. 
diff --git a/drd/drd_bitmap.c b/drd/drd_bitmap.c
index c02bd49..9e5d1ba 100644
--- a/drd/drd_bitmap.c
+++ b/drd/drd_bitmap.c
@@ -63,7 +63,7 @@
   /* in drd_bitmap.h.                                                    */
   tl_assert((1 << BITS_PER_BITS_PER_UWORD) == BITS_PER_UWORD);
 
-  bm = VG_(malloc)(sizeof(*bm));
+  bm = VG_(malloc)("drd.bitmap.bn.1", sizeof(*bm));
   tl_assert(bm);
   /* Cache initialization. a1 is initialized with a value that never can */
   /* match any valid address: the upper ADDR0_BITS bits of a1 are always */
@@ -73,7 +73,8 @@
     bm->cache[i].a1  = ~(UWord)1;
     bm->cache[i].bm2 = 0;
   }
-  bm->oset = VG_(OSetGen_Create)(0, 0, VG_(malloc), VG_(free));
+  bm->oset = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.bitmap.bn.2",
+                                       VG_(free));
 
   s_bitmap_creation_count++;
 
@@ -917,7 +918,7 @@
 {
   struct bitmap2* bm2;
 
-  bm2 = VG_(malloc)(sizeof(*bm2));
+  bm2 = VG_(malloc)("drd.bitmap.bm2n.1", sizeof(*bm2));
   bm2->addr   = a1;
   bm2->refcnt = 1;
 
diff --git a/drd/drd_clientobj.c b/drd/drd_clientobj.c
index 1d62db1..768079a 100644
--- a/drd/drd_clientobj.c
+++ b/drd/drd_clientobj.c
@@ -53,7 +53,8 @@
 void clientobj_init(void)
 {
   tl_assert(s_clientobj == 0);
-  s_clientobj = VG_(OSetGen_Create)(0, 0, VG_(malloc), VG_(free));
+  s_clientobj = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.clientobj.ci.1",
+                                          VG_(free));
   tl_assert(s_clientobj);
 }
 
diff --git a/drd/drd_error.c b/drd/drd_error.c
index a8c0052..c3eb745 100644
--- a/drd/drd_error.c
+++ b/drd/drd_error.c
@@ -95,8 +95,8 @@
 {
   AddrInfo ai;
   const unsigned descr_size = 256;
-  Char* descr1 = VG_(malloc)(descr_size);
-  Char* descr2 = VG_(malloc)(descr_size);
+  Char* descr1 = VG_(malloc)("drd.error.drdr2.1", descr_size);
+  Char* descr2 = VG_(malloc)("drd.error.drdr2.2", descr_size);
 
   tl_assert(dri);
   tl_assert(dri->addr);
diff --git a/drd/drd_main.c b/drd/drd_main.c
index 3f9ff30..5f49203 100644
--- a/drd/drd_main.c
+++ b/drd/drd_main.c
@@ -548,7 +548,8 @@
 
 static
 void drd_start_using_mem_w_perms(const Addr a, const SizeT len,
-                                 const Bool rr, const Bool ww, const Bool xx)
+                                 const Bool rr, const Bool ww, const Bool xx,
+                                 ULong di_handle)
 {
   thread_set_vg_running_tid(VG_(get_running_tid)());
 
@@ -654,7 +655,7 @@
     const unsigned msg_size = 256;
     char* msg;
 
-    msg = VG_(malloc)(msg_size);
+    msg = VG_(malloc)("drd.main.dptj.1", msg_size);
     tl_assert(msg);
     VG_(snprintf)(msg, msg_size,
                   "drd_post_thread_join joiner = %d/%d, joinee = %d/%d",
diff --git a/drd/drd_malloc_wrappers.c b/drd/drd_malloc_wrappers.c
index e9cbbf8..5a47ed7 100644
--- a/drd/drd_malloc_wrappers.c
+++ b/drd/drd_malloc_wrappers.c
@@ -70,7 +70,8 @@
 static
 DRD_Chunk* create_DRD_Chunk(ThreadId tid, Addr p, SizeT size)
 {
-  DRD_Chunk* mc = VG_(malloc)(sizeof(DRD_Chunk));
+  DRD_Chunk* mc = VG_(malloc)("drd.malloc_wrappers.cDC.1",
+                              sizeof(DRD_Chunk));
   mc->data      = p;
   mc->size      = size;
   mc->where     = VG_(record_ExeContext)(tid, 0);
diff --git a/drd/drd_rwlock.c b/drd/drd_rwlock.c
index 44058b4..c7d0fc6 100644
--- a/drd/drd_rwlock.c
+++ b/drd/drd_rwlock.c
@@ -180,7 +180,8 @@
   tl_assert(p->type == ClientRwlock);
 
   p->cleanup         = (void(*)(DrdClientobj*))&rwlock_cleanup;
-  p->thread_info     = VG_(OSetGen_Create)(0, 0, VG_(malloc), VG_(free));
+  p->thread_info     = VG_(OSetGen_Create)(
+                          0, 0, VG_(malloc), "drd.rwlock.ri.1", VG_(free));
   p->acquiry_time_ms = 0;
   p->acquired_at     = 0;
 }
diff --git a/drd/drd_segment.c b/drd/drd_segment.c
index 8fe1707..7e3022a 100644
--- a/drd/drd_segment.c
+++ b/drd/drd_segment.c
@@ -116,7 +116,7 @@
   if (s_max_alive_segments_count < s_alive_segments_count)
     s_max_alive_segments_count = s_alive_segments_count;
 
-  sg = VG_(malloc)(sizeof(*sg));
+  sg = VG_(malloc)("drd.segment.sn.1", sizeof(*sg));
   tl_assert(sg);
   sg_init(sg, creator, created);
   return sg;
diff --git a/drd/drd_vc.c b/drd/drd_vc.c
index b083974..1d4d833 100644
--- a/drd/drd_vc.c
+++ b/drd/drd_vc.c
@@ -320,11 +320,13 @@
   {
     if (vc->vc)
     {
-      vc->vc = VG_(realloc)(vc->vc, new_capacity * sizeof(vc->vc[0]));
+      vc->vc = VG_(realloc)("drd.vc.vr.1",
+                            vc->vc, new_capacity * sizeof(vc->vc[0]));
     }
     else if (new_capacity > 0)
     {
-      vc->vc = VG_(malloc)(new_capacity * sizeof(vc->vc[0]));
+      vc->vc = VG_(malloc)("drd.vc.vr.2",
+                           new_capacity * sizeof(vc->vc[0]));
     }
     else
     {
diff --git a/drd/tests/drd_bitmap_test.c b/drd/tests/drd_bitmap_test.c
index d2855ad..f653f42 100644
--- a/drd/tests/drd_bitmap_test.c
+++ b/drd/tests/drd_bitmap_test.c
@@ -10,7 +10,7 @@
 
 /* Replacements for core functionality. */
 
-void* VG_(malloc)(SizeT nbytes)
+void* VG_(malloc)(HChar* cc, SizeT nbytes)
 { return malloc(nbytes); }
 void  VG_(free)(void* p)
 { return free(p); }
diff --git a/exp-omega/o_main.c b/exp-omega/o_main.c
index 1551e09..a801168 100644
--- a/exp-omega/o_main.c
+++ b/exp-omega/o_main.c
@@ -399,7 +399,7 @@
       /*
       ** We don't have a node for this address. Create one now.
       */
-      o_lastPBitNode = VG_(malloc)( sizeof(PBitNode) );
+      o_lastPBitNode = VG_(malloc)( "om.ogPBN.1", sizeof(PBitNode) );
       tl_assert(o_lastPBitNode);
       VG_(memset)(o_lastPBitNode, 0, sizeof(PBitNode));
       o_lastPBitNode->hdr.key = key;
@@ -903,7 +903,7 @@
     /*
     ** Create a new block and add it to the leaked list.
     */
-    item = VG_(malloc)(sizeof(BlockRecord));
+    item = VG_(malloc)("om.oaLB.1", sizeof(BlockRecord));
     tl_assert(item);
     
     item->count = 1;
@@ -1288,7 +1288,7 @@
   if(!smb->pointers)
   {
     smb->pointers =
-      VG_(malloc)((smb->refNum + 8) * sizeof(TrackedPointer *));
+      VG_(malloc)("om.oAMBR.1", (smb->refNum + 8) * sizeof(TrackedPointer *));
     tl_assert(smb->pointers);
   }
   else if(!((smb->refNum + 1) & 7))
@@ -1298,7 +1298,8 @@
     ** Note that this will also shrink us if needed.
     */
     smb->pointers =
-      VG_(realloc)(smb->pointers, ((smb->refNum + 8) * sizeof(Addr)));
+      VG_(realloc)("om.oAMBR.2",
+                   smb->pointers, ((smb->refNum + 8) * sizeof(Addr)));
     tl_assert(smb->pointers);
   }
 
@@ -1728,7 +1729,7 @@
       /*
       ** Create a new shadow for the block.
       */
-      smb = VG_(malloc)( sizeof(MemBlock) );
+      smb = VG_(malloc)( "om.osuS.1", sizeof(MemBlock) );
       tl_assert(smb);
 
       o_stats.shadowMemoryBlocksAllocated++;
@@ -1905,7 +1906,7 @@
     */
     TrackedPointer *tp = VG_(HT_lookup)(o_TrackedPointers, TRACKED_KEY(address));
     Int diff           = dst - src;
-    TrackedPointer *ntp = VG_(malloc)((sizeof(TrackedPointer)));
+    TrackedPointer *ntp = VG_(malloc)("om.odTP.1", (sizeof(TrackedPointer)));
     MemBlock       *mb = NULL;
     
     tl_assert(tp);
@@ -1946,7 +1947,7 @@
 
 static void o_createMemBlock(ThreadId tid, Addr start, SizeT size)
 {
-  MemBlock *mb = VG_(malloc)(sizeof(MemBlock));
+  MemBlock *mb = VG_(malloc)("om.ocMB.1", sizeof(MemBlock));
   tl_assert(mb);
   
   o_stats.memoryBlocksAllocated++;
@@ -2324,7 +2325,7 @@
       /*
       ** No tracked pointer - create one now.
       */
-      tp = VG_(malloc)(sizeof(TrackedPointer));
+      tp = VG_(malloc)("om.oD.1", sizeof(TrackedPointer));
       tl_assert(tp);
       o_stats.trackedPointersAllocated++;
       o_stats.liveTrackedPointers++;
@@ -3160,7 +3161,7 @@
       /*
       ** Create and populate the new node
       */
-      tn = VG_(malloc)(sizeof(TreeNode));
+      tn = VG_(malloc)("om.obMbT.1", sizeof(TreeNode));
       VG_(memset)(tn, 0, sizeof(TreeNode));
       
       tn->start = mb->hdr.key;
@@ -3299,7 +3300,7 @@
 	/*
 	** Create a new block and add it to the circular records list.
 	*/
-	BlockRecord *item = VG_(malloc)(sizeof(BlockRecord));
+	BlockRecord *item = VG_(malloc)("om.orCB.1", sizeof(BlockRecord));
 	tl_assert(item);
 	
 	item->count = 1;
diff --git a/helgrind/hg_main.c b/helgrind/hg_main.c
index 54d76cd..19de00d 100644
--- a/helgrind/hg_main.c
+++ b/helgrind/hg_main.c
@@ -208,10 +208,10 @@
 /*--- Some very basic stuff                                    ---*/
 /*----------------------------------------------------------------*/
 
-static void* hg_zalloc ( SizeT n ) {
+static void* hg_zalloc ( HChar* cc, SizeT n ) {
    void* p;
    tl_assert(n > 0);
-   p = VG_(malloc)( n );
+   p = VG_(malloc)( cc, n );
    tl_assert(p);
    VG_(memset)(p, 0, n);
    return p;
@@ -575,7 +575,7 @@
 
 static Thread* mk_Thread ( SegmentID csegid ) {
    static Int indx      = 1;
-   Thread* thread       = hg_zalloc( sizeof(Thread) );
+   Thread* thread       = hg_zalloc( "hg", sizeof(Thread) );
    thread->locksetA     = HG_(emptyWS)( univ_lsets );
    thread->locksetW     = HG_(emptyWS)( univ_lsets );
    thread->csegid       = csegid;
@@ -590,7 +590,7 @@
 // Make a new lock which is unlocked (hence ownerless)
 static Lock* mk_LockN ( LockKind kind, Addr guestaddr ) {
    static ULong unique = 0;
-   Lock* lock             = hg_zalloc( sizeof(Lock) );
+   Lock* lock             = hg_zalloc( "hg", sizeof(Lock) );
    lock->admin            = admin_locks;
    lock->unique           = unique++;
    lock->magic            = LockN_MAGIC;
@@ -605,7 +605,7 @@
    return lock;
 }
 static Segment* mk_Segment ( Thread* thr, Segment* prev, Segment* other ) {
-   Segment* seg    = hg_zalloc( sizeof(Segment) );
+   Segment* seg    = hg_zalloc( "hg", sizeof(Segment) );
    seg->dfsver     = 0;
    seg->thr        = thr;
    seg->prev       = prev;
@@ -729,7 +729,7 @@
          tl_assert(lk->heldBy == NULL); /* can't w-lock recursively */
          tl_assert(!lk->heldW);
          lk->heldW  = True;
-         lk->heldBy = VG_(newBag)( hg_zalloc, hg_free );
+         lk->heldBy = VG_(newBag)( hg_zalloc, "hg", hg_free );
          VG_(addToBag)( lk->heldBy, (Word)thr );
          break;
       case LK_mbRec:
@@ -783,7 +783,7 @@
       VG_(addToBag)(lk->heldBy, (Word)thr);
    } else {
       lk->heldW  = False;
-      lk->heldBy = VG_(newBag)( hg_zalloc, hg_free );
+      lk->heldBy = VG_(newBag)( hg_zalloc, "hg", hg_free );
       VG_(addToBag)( lk->heldBy, (Word)thr );
    }
    tl_assert(!lk->heldW);
@@ -1380,25 +1380,25 @@
 
    tl_assert(sizeof(Addr) == sizeof(Word));
    tl_assert(map_shmem == NULL);
-   map_shmem = VG_(newFM)( hg_zalloc, hg_free, NULL/*unboxed Word cmp*/);
+   map_shmem = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL/*unboxed Word cmp*/);
    tl_assert(map_shmem != NULL);
    shmem__invalidate_scache();
 
    tl_assert(map_threads == NULL);
-   map_threads = hg_zalloc( VG_N_THREADS * sizeof(Thread*) );
+   map_threads = hg_zalloc( "hg", VG_N_THREADS * sizeof(Thread*) );
    tl_assert(map_threads != NULL);
 
    /* re <=: < on 64-bit platforms, == on 32-bit ones */
    tl_assert(sizeof(SegmentID) <= sizeof(Word));
    tl_assert(sizeof(Segment*) == sizeof(Word));
    tl_assert(map_segments == NULL);
-   map_segments = VG_(newFM)( hg_zalloc, hg_free, NULL/*unboxed Word cmp*/);
+   map_segments = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL/*unboxed Word cmp*/);
    tl_assert(map_segments != NULL);
    hbefore__invalidate_cache();
 
    tl_assert(sizeof(Addr) == sizeof(Word));
    tl_assert(map_locks == NULL);
-   map_locks = VG_(newFM)( hg_zalloc, hg_free, NULL/*unboxed Word cmp*/);
+   map_locks = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL/*unboxed Word cmp*/);
    tl_assert(map_locks != NULL);
 
    __bus_lock_Lock = mk_LockN( LK_nonRec, (Addr)&__bus_lock );
@@ -1640,7 +1640,7 @@
 }
 
 static XArray* new_VTS ( void ) {
-   return VG_(newXA)( hg_zalloc, hg_free, sizeof(ScalarTS) );
+   return VG_(newXA)( hg_zalloc, "hg", hg_free, sizeof(ScalarTS) );
 }
 static XArray* singleton_VTS ( Thread* thr, UWord tym ) {
    ScalarTS st;
@@ -2071,7 +2071,7 @@
    dfsver_current++;
    
    if (dfsver_stack == NULL) {
-     dfsver_stack = VG_(newXA)( hg_zalloc, hg_free, sizeof(Segment*) );
+     dfsver_stack = VG_(newXA)( hg_zalloc, "hg", hg_free, sizeof(Segment*) );
      tl_assert(dfsver_stack);
    }
 
@@ -2740,7 +2740,7 @@
    VG_(doneIterFM)( map_shmem );
 
    // check the cache
-   valid_tags   = hg_zalloc(N_WAY_NENT * sizeof(Addr));
+   valid_tags   = hg_zalloc("hg", N_WAY_NENT * sizeof(Addr));
    n_valid_tags = 0;
    tl_assert(valid_tags);
    for (i = 0; i < N_WAY_NENT; i++) {
@@ -2929,7 +2929,7 @@
                       HG_(cardinalityWS)( univ_lsets, lset_old), lk );
    if (lk->appeared_at) {
       if (ga_to_lastlock == NULL)
-         ga_to_lastlock = VG_(newFM)( hg_zalloc, hg_free, NULL );
+         ga_to_lastlock = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL );
       VG_(addToFM)( ga_to_lastlock, ga_of_access, (Word)lk->appeared_at );
       stats__ga_LL_adds++;
    }
@@ -3438,7 +3438,7 @@
 
    /* No free F line found.  Expand existing array and try again. */
    new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
-   nyu      = hg_zalloc( new_size * sizeof(CacheLineF) );
+   nyu      = hg_zalloc( "hg", new_size * sizeof(CacheLineF) );
    tl_assert(nyu);
 
    stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
@@ -5435,7 +5435,7 @@
 
 static
 void evh__new_mem_w_perms ( Addr a, SizeT len, 
-                            Bool rr, Bool ww, Bool xx ) {
+                            Bool rr, Bool ww, Bool xx, ULong di_handle ) {
    if (SHOW_EVENTS >= 1)
       VG_(printf)("evh__new_mem_w_perms(%p, %lu, %d,%d,%d)\n",
                   (void*)a, len, (Int)rr, (Int)ww, (Int)xx );
@@ -6021,7 +6021,7 @@
 
 static void map_cond_to_Segment_INIT ( void ) {
    if (UNLIKELY(map_cond_to_Segment == NULL)) {
-      map_cond_to_Segment = VG_(newFM)( hg_zalloc, hg_free, NULL );
+      map_cond_to_Segment = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL );
       tl_assert(map_cond_to_Segment != NULL);
    }
 }
@@ -6362,7 +6362,7 @@
 
 static void map_sem_to_Segment_stack_INIT ( void ) {
    if (map_sem_to_Segment_stack == NULL) {
-      map_sem_to_Segment_stack = VG_(newFM)( hg_zalloc, hg_free, NULL );
+      map_sem_to_Segment_stack = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL );
       tl_assert(map_sem_to_Segment_stack != NULL);
    }
 }
@@ -6376,7 +6376,7 @@
       tl_assert(xa);
       VG_(addToXA)( xa, &seg );
    } else {
-      xa = VG_(newXA)( hg_zalloc, hg_free, sizeof(Segment*) );
+      xa = VG_(newXA)( hg_zalloc, "hg", hg_free, sizeof(Segment*) );
       VG_(addToXA)( xa, &seg );
       VG_(addToFM)( map_sem_to_Segment_stack, (Word)sem, (Word)xa );
    }
@@ -6689,7 +6689,7 @@
       presentF = outs_new == links->outs;
       links->outs = outs_new;
    } else {
-      links = hg_zalloc(sizeof(LAOGLinks));
+      links = hg_zalloc("hg", sizeof(LAOGLinks));
       links->inns = HG_(emptyWS)( univ_laog );
       links->outs = HG_(singletonWS)( univ_laog, (Word)dst );
       VG_(addToFM)( laog, (Word)src, (Word)links );
@@ -6705,7 +6705,7 @@
       presentR = inns_new == links->inns;
       links->inns = inns_new;
    } else {
-      links = hg_zalloc(sizeof(LAOGLinks));
+      links = hg_zalloc("hg", sizeof(LAOGLinks));
       links->inns = HG_(singletonWS)( univ_laog, (Word)src );
       links->outs = HG_(emptyWS)( univ_laog );
       VG_(addToFM)( laog, (Word)dst, (Word)links );
@@ -6730,7 +6730,7 @@
       if (VG_(lookupFM)( laog_exposition, NULL, NULL, (Word)&expo )) {
          /* we already have it; do nothing */
       } else {
-         LAOGLinkExposition* expo2 = hg_zalloc(sizeof(LAOGLinkExposition));
+         LAOGLinkExposition* expo2 = hg_zalloc("hg", sizeof(LAOGLinkExposition));
          expo2->src_ga = src->guestaddr;
          expo2->dst_ga = dst->guestaddr;
          expo2->src_ec = src->acquired_at;
@@ -6859,8 +6859,8 @@
       return NULL;
 
    ret     = NULL;
-   stack   = VG_(newXA)( hg_zalloc, hg_free, sizeof(Lock*) );
-   visited = VG_(newFM)( hg_zalloc, hg_free, NULL/*unboxedcmp*/ );
+   stack   = VG_(newXA)( hg_zalloc, "hg", hg_free, sizeof(Lock*) );
+   visited = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL/*unboxedcmp*/ );
 
    (void) VG_(addToXA)( stack, &src );
 
@@ -6913,9 +6913,9 @@
       return;
 
    if (!laog)
-      laog = VG_(newFM)( hg_zalloc, hg_free, NULL/*unboxedcmp*/ );
+      laog = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL/*unboxedcmp*/ );
    if (!laog_exposition)
-      laog_exposition = VG_(newFM)( hg_zalloc, hg_free, 
+      laog_exposition = VG_(newFM)( hg_zalloc, "hg", hg_free, 
                                     cmp_LAOGLinkExposition );
 
    /* First, the check.  Complain if there is any path in laog from lk
@@ -7021,9 +7021,9 @@
    UWord* ws_words;
 
    if (!laog)
-      laog = VG_(newFM)( hg_zalloc, hg_free, NULL/*unboxedcmp*/ );
+      laog = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL/*unboxedcmp*/ );
    if (!laog_exposition)
-      laog_exposition = VG_(newFM)( hg_zalloc, hg_free, 
+      laog_exposition = VG_(newFM)( hg_zalloc, "hg", hg_free, 
                                     cmp_LAOGLinkExposition );
 
    HG_(getPayloadWS)( &ws_words, &ws_size, univ_lsets, locksToDelete );
@@ -7055,7 +7055,7 @@
 
 
 static MallocMeta* new_MallocMeta ( void ) {
-   MallocMeta* md = hg_zalloc( sizeof(MallocMeta) );
+   MallocMeta* md = hg_zalloc( "hg", sizeof(MallocMeta) );
    tl_assert(md);
    return md;
 }
@@ -7499,7 +7499,7 @@
 
 static void map_pthread_t_to_Thread_INIT ( void ) {
    if (UNLIKELY(map_pthread_t_to_Thread == NULL)) {
-      map_pthread_t_to_Thread = VG_(newFM)( hg_zalloc, hg_free, NULL );
+      map_pthread_t_to_Thread = VG_(newFM)( hg_zalloc, "hg", hg_free, NULL );
       tl_assert(map_pthread_t_to_Thread != NULL);
    }
 }
@@ -7736,7 +7736,7 @@
    if (!str)
       str = "(null)";
    if (!string_table) {
-      string_table = VG_(newFM)( hg_zalloc, hg_free, string_table_cmp );
+      string_table = VG_(newFM)( hg_zalloc, "hg", hg_free, string_table_cmp );
       tl_assert(string_table);
    }
    if (VG_(lookupFM)( string_table,
@@ -7745,7 +7745,7 @@
       if (0) VG_(printf)("string_table_strdup: %p -> %p\n", str, copy );
       return copy;
    } else {
-      copy = VG_(strdup)(str);
+      copy = VG_(strdup)("hg", str);
       tl_assert(copy);
       VG_(addToFM)( string_table, (Word)copy, (Word)copy );
       return copy;
@@ -7771,11 +7771,11 @@
    stats__ga_LockN_to_P_queries++;
    tl_assert( is_sane_LockN(lkn) );
    if (!yaWFM) {
-      yaWFM = VG_(newFM)( hg_zalloc, hg_free, lock_unique_cmp );
+      yaWFM = VG_(newFM)( hg_zalloc, "hg", hg_free, lock_unique_cmp );
       tl_assert(yaWFM);
    }
    if (!VG_(lookupFM)( yaWFM, NULL, (Word*)&lkp, (Word)lkn)) {
-      lkp = hg_zalloc( sizeof(Lock) );
+      lkp = hg_zalloc( "hg", sizeof(Lock) );
       *lkp = *lkn;
       lkp->admin = NULL;
       lkp->magic = LockP_MAGIC;
@@ -8113,7 +8113,7 @@
    XArray* xa;
    UWord*  ts_words;
    UWord   ts_size, i;
-   xa = VG_(newXA)( hg_zalloc, hg_free, sizeof(Thread*) );
+   xa = VG_(newXA)( hg_zalloc, "hg", hg_free, sizeof(Thread*) );
    tl_assert(xa);
    HG_(getPayloadWS)( &ts_words, &ts_size, univ_tsets, tset );
    tl_assert(ts_words);
diff --git a/helgrind/hg_wordset.c b/helgrind/hg_wordset.c
index ad6f66d..041af2e 100644
--- a/helgrind/hg_wordset.c
+++ b/helgrind/hg_wordset.c
@@ -140,7 +140,7 @@
    corresponding ix2vec entry number.  The two mappings are mutually
    redundant. */
 struct _WordSetU {
-      void*     (*alloc)(SizeT);
+      void*     (*alloc)(HChar*, SizeT);
       void      (*dealloc)(void*);
       WordFM*   vec2ix; /* WordVec-to-WordSet mapping tree */
       WordVec** ix2vec; /* WordSet-to-WordVec mapping array */
@@ -176,12 +176,12 @@
 {
    WordVec* wv;
    tl_assert(sz >= 0);
-   wv = wsu->alloc( sizeof(WordVec) );
+   wv = wsu->alloc( "hg", sizeof(WordVec) );
    wv->owner = wsu;
    wv->words = NULL;
    wv->size = sz;
    if (sz > 0) {
-     wv->words = wsu->alloc( (SizeT)sz * sizeof(UWord) );
+     wv->words = wsu->alloc( "hg", (SizeT)sz * sizeof(UWord) );
    }
    return wv;
 }
@@ -238,7 +238,7 @@
       return;
    new_sz = 2 * wsu->ix2vec_size;
    if (new_sz == 0) new_sz = 2;
-   new_vec = wsu->alloc( new_sz * sizeof(WordVec*) );
+   new_vec = wsu->alloc( "hg", new_sz * sizeof(WordVec*) );
    tl_assert(new_vec);
    for (i = 0; i < wsu->ix2vec_size; i++)
       new_vec[i] = wsu->ix2vec[i];
@@ -305,18 +305,19 @@
 }
 
 
-WordSetU* HG_(newWordSetU) ( void* (*alloc_nofail)( SizeT ),
+WordSetU* HG_(newWordSetU) ( void* (*alloc_nofail)( HChar*, SizeT ),
                              void  (*dealloc)(void*),
                              Word  cacheSize )
 {
    WordSetU* wsu;
    WordVec*  empty;
 
-   wsu          = alloc_nofail( sizeof(WordSetU) );
+   wsu          = alloc_nofail( "hg", sizeof(WordSetU) );
    VG_(memset)( wsu, 0, sizeof(WordSetU) );
    wsu->alloc   = alloc_nofail;
    wsu->dealloc = dealloc;
-   wsu->vec2ix  = VG_(newFM)( alloc_nofail, dealloc, cmp_WordVecs_for_FM );
+   wsu->vec2ix  = VG_(newFM)( alloc_nofail, "hg", 
+                              dealloc, cmp_WordVecs_for_FM );
    wsu->ix2vec_used = 0;
    wsu->ix2vec_size = 0;
    wsu->ix2vec      = NULL;
diff --git a/helgrind/hg_wordset.h b/helgrind/hg_wordset.h
index 631d519..871ab78 100644
--- a/helgrind/hg_wordset.h
+++ b/helgrind/hg_wordset.h
@@ -47,7 +47,7 @@
 typedef  UInt              WordSet;   /* opaque, small int index */
 
 /* Allocate and initialise a WordSetU */
-WordSetU* HG_(newWordSetU) ( void* (*alloc_nofail)( SizeT ),
+WordSetU* HG_(newWordSetU) ( void* (*alloc_nofail)( HChar*, SizeT ),
                              void  (*dealloc)(void*),
                              Word  cacheSize );
 
diff --git a/include/pub_tool_basics.h b/include/pub_tool_basics.h
index 8e00ec3..3292f00 100644
--- a/include/pub_tool_basics.h
+++ b/include/pub_tool_basics.h
@@ -83,6 +83,9 @@
 #  define NULL ((void*)0)
 #endif
 
+/* This is just too useful to not have around the place somewhere. */
+typedef  struct { UWord uw1; UWord uw2; }  UWordPair;
+
 
 /* ---------------------------------------------------------------------
    non-builtin types
diff --git a/include/pub_tool_debuginfo.h b/include/pub_tool_debuginfo.h
index b43e0a6..30725d5 100644
--- a/include/pub_tool_debuginfo.h
+++ b/include/pub_tool_debuginfo.h
@@ -105,6 +105,48 @@
 */
 extern Char* VG_(describe_IP)(Addr eip, Char* buf, Int n_buf);
 
+
+/* Get an XArray of StackBlock which describe the stack (auto) blocks
+   for this ip.  The caller is expected to free the XArray at some
+   point.  If 'arrays_only' is True, only array-typed blocks are
+   returned; otherwise blocks of all types are returned. */
+
+typedef
+   struct {
+      OffT  base;     /* offset from sp or fp */
+      SizeT szB;      /* size in bytes */
+      Bool  spRel;    /* True => sp-rel, False => fp-rel */
+      Bool  isVec;    /* does block have an array type, or not? */
+      HChar name[16]; /* first 15 chars of name (asciiz) */
+   }
+   StackBlock;
+
+extern void* /* really, XArray* of StackBlock */
+             VG_(di_get_stack_blocks_at_ip)( Addr ip, Bool arrays_only );
+
+
+/* Get an array of GlobalBlock which describe the global blocks owned
+   by the shared object characterised by the given di_handle.  Asserts
+   if the handle is invalid.  The caller is responsible for freeing
+   the array at some point.  If 'arrays_only' is True, only
+   array-typed blocks are returned; otherwise blocks of all types are
+   returned. */
+
+typedef
+   struct {
+      Addr  addr;
+      SizeT szB;
+      Bool  isVec;      /* does block have an array type, or not? */
+      HChar name[16];   /* first 15 chars of name (asciiz) */
+      HChar soname[16]; /* first 15 chars of name (asciiz) */
+   }
+   GlobalBlock;
+
+extern void* /* really, XArray* of GlobalBlock */
+VG_(di_get_global_blocks_from_dihandle) ( ULong di_handle,
+                                          Bool  arrays_only );
+
+
 /*====================================================================*/
 /*=== Obtaining segment information                                ===*/
 /*====================================================================*/
diff --git a/include/pub_tool_libcbase.h b/include/pub_tool_libcbase.h
index b2dc388..458d3d5 100644
--- a/include/pub_tool_libcbase.h
+++ b/include/pub_tool_libcbase.h
@@ -144,6 +144,7 @@
 // is NULL, it uses its own seed, which starts at zero.  If pSeed is
 // non-NULL, it uses and updates whatever pSeed points at.
 extern UInt VG_(random) ( /*MOD*/UInt* pSeed );
+#define VG_RAND_MAX (1ULL << 32)
 
 #endif   // __PUB_TOOL_LIBCBASE_H
 
diff --git a/include/pub_tool_machine.h b/include/pub_tool_machine.h
index 6775bc1..ae9669c 100644
--- a/include/pub_tool_machine.h
+++ b/include/pub_tool_machine.h
@@ -81,8 +81,12 @@
 extern void VG_(set_SP) ( ThreadId tid, Addr sp );
 extern void VG_(set_IP) ( ThreadId tid, Addr ip );
 
-// For get/set, 'area' is where the asked-for shadow state will be copied
-// into/from.
+// For get/set, 'area' is where the asked-for guest state will be copied
+// into/from.  If shadowNo == 0, the real (non-shadow) guest state is
+// accessed.  If shadowNo == 1, the first shadow area is accessed, and
+// if shadowNo == 2, the second shadow area is accessed.  This gives a
+// completely general way to read/modify a thread's guest register state
+// providing you know the offsets you need.
 void
 VG_(get_shadow_regs_area) ( ThreadId tid, 
                             /*DST*/UChar* dst,
@@ -92,6 +96,14 @@
                             /*DST*/Int shadowNo, OffT offset, SizeT size,
                             /*SRC*/const UChar* src );
 
+// Sets the shadow values for the syscall return value register(s).
+// This is platform specific.
+void VG_(set_syscall_return_shadows) ( ThreadId tid,
+                                       /* shadow vals for the result */
+                                       UWord s1res, UWord s2res,
+                                       /* shadow vals for the error val */
+                                       UWord s1err, UWord s2err );
+
 // Apply a function 'f' to all the general purpose registers in all the
 // current threads.
 // This is very Memcheck-specific -- it's used to find the roots when
diff --git a/include/pub_tool_mallocfree.h b/include/pub_tool_mallocfree.h
index e6f31fa..46482e3 100644
--- a/include/pub_tool_mallocfree.h
+++ b/include/pub_tool_mallocfree.h
@@ -35,11 +35,11 @@
 // These can be for allocating memory used by tools.
 // Nb: the allocators *always succeed* -- they never return NULL (Valgrind
 // will abort if they can't allocate the memory).
-extern void* VG_(malloc)         ( SizeT nbytes );
+extern void* VG_(malloc)         ( HChar* cc, SizeT nbytes );
 extern void  VG_(free)           ( void* p );
-extern void* VG_(calloc)         ( SizeT n, SizeT bytes_per_elem );
-extern void* VG_(realloc)        ( void* p, SizeT size );
-extern Char* VG_(strdup)         ( const Char* s );
+extern void* VG_(calloc)         ( HChar* cc, SizeT n, SizeT bytes_per_elem );
+extern void* VG_(realloc)        ( HChar* cc, void* p, SizeT size );
+extern Char* VG_(strdup)         ( HChar* cc, const Char* s );
 
 // Returns the usable size of a heap-block.  It's the asked-for size plus
 // possibly some more due to rounding up.
diff --git a/include/pub_tool_oset.h b/include/pub_tool_oset.h
index a068d3e..1573136 100644
--- a/include/pub_tool_oset.h
+++ b/include/pub_tool_oset.h
@@ -77,7 +77,7 @@
 // - Free: frees a chunk of memory allocated with Alloc.
 
 typedef Word  (*OSetCmp_t)         ( const void* key, const void* elem );
-typedef void* (*OSetAlloc_t)       ( SizeT szB );
+typedef void* (*OSetAlloc_t)       ( HChar* ec, SizeT szB );
 typedef void  (*OSetFree_t)        ( void* p );
 
 /*--------------------------------------------------------------------*/
@@ -98,7 +98,8 @@
 //   to allow the destruction of any attached resources;  if NULL it is not
 //   called.
 
-extern OSet* VG_(OSetWord_Create)       ( OSetAlloc_t alloc, OSetFree_t free );
+extern OSet* VG_(OSetWord_Create)       ( OSetAlloc_t alloc, HChar* ec, 
+                                          OSetFree_t free );
 extern void  VG_(OSetWord_Destroy)      ( OSet* os );
 
 /*--------------------------------------------------------------------*/
@@ -183,7 +184,8 @@
 //   lead to assertions in Valgrind's allocator.
 
 extern OSet* VG_(OSetGen_Create)    ( OffT keyOff, OSetCmp_t cmp,
-                                      OSetAlloc_t alloc, OSetFree_t free );
+                                      OSetAlloc_t alloc, HChar* ec,
+                                      OSetFree_t free );
 extern void  VG_(OSetGen_Destroy)   ( OSet* os );
 extern void* VG_(OSetGen_AllocNode) ( OSet* os, SizeT elemSize );
 extern void  VG_(OSetGen_FreeNode)  ( OSet* os, void* elem );
diff --git a/include/pub_tool_tooliface.h b/include/pub_tool_tooliface.h
index 04ac74a..210df36 100644
--- a/include/pub_tool_tooliface.h
+++ b/include/pub_tool_tooliface.h
@@ -453,7 +453,7 @@
 /* Part of the core from which this call was made.  Useful for determining
    what kind of error message should be emitted. */
 typedef
-   enum { Vg_CoreStartup, Vg_CoreSignal, Vg_CoreSysCall,
+   enum { Vg_CoreStartup=1, Vg_CoreSignal, Vg_CoreSysCall,
           Vg_CoreTranslate, Vg_CoreClientReq }
    CorePart;
 
@@ -471,15 +471,24 @@
 
    These ones occur at startup, upon some signals, and upon some syscalls.
 
-   For the new_mem_brk and new_mem_stack_signal, the supplied ThreadId
+   For new_mem_brk and new_mem_stack_signal, the supplied ThreadId
    indicates the thread for whom the new memory is being allocated.
+
+   For new_mem_startup and new_mem_mmap, the di_handle argument is a
+   handle which can be used to retrieve debug info associated with the
+   mapping or allocation (because it is of a file that Valgrind has
+   decided to read debug info from).  If the value is zero, there is
+   no associated debug info.  If the value exceeds zero, it can be
+   supplied as an argument to selected queries in m_debuginfo.
 */
 void VG_(track_new_mem_startup)     (void(*f)(Addr a, SizeT len,
-                                              Bool rr, Bool ww, Bool xx));
+                                              Bool rr, Bool ww, Bool xx,
+                                              ULong di_handle));
 void VG_(track_new_mem_stack_signal)(void(*f)(Addr a, SizeT len, ThreadId tid));
 void VG_(track_new_mem_brk)         (void(*f)(Addr a, SizeT len, ThreadId tid));
 void VG_(track_new_mem_mmap)        (void(*f)(Addr a, SizeT len,
-                                              Bool rr, Bool ww, Bool xx));
+                                              Bool rr, Bool ww, Bool xx,
+                                              ULong di_handle));
 
 void VG_(track_copy_mem_remap)      (void(*f)(Addr from, Addr to, SizeT len));
 void VG_(track_change_mem_mprotect) (void(*f)(Addr a, SizeT len,
diff --git a/include/pub_tool_vkiscnums.h b/include/pub_tool_vkiscnums.h
index efe1cdc..4bf7cfe 100644
--- a/include/pub_tool_vkiscnums.h
+++ b/include/pub_tool_vkiscnums.h
@@ -54,14 +54,29 @@
 
 #if defined(VGP_x86_linux)
 #  include "vki/vki-scnums-x86-linux.h"
+
 #elif defined(VGP_amd64_linux)
 #  include "vki/vki-scnums-amd64-linux.h"
+
 #elif defined(VGP_ppc32_linux)
 #  include "vki/vki-scnums-ppc32-linux.h"
+
 #elif defined(VGP_ppc64_linux)
 #  include "vki/vki-scnums-ppc64-linux.h"
+
 #elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
 #  include "vki/vki-scnums-aix5.h"
+
+/* Make it possible to include this file in assembly sources. */
+#if !defined(VG_IN_ASSEMBLY_SOURCE)
+
+/* Look up the name of a syscall, using the bindings previously
+   created by VG_(aix5_register_syscall), for the purposes of making
+   error messages. */
+extern UChar* VG_(aix5_sysno_to_sysname)( Int sysno );
+
+#endif /* !defined(VG_IN_ASSEMBLY_SOURCE) */
+
 #else
 #  error Unknown platform
 #endif
diff --git a/include/pub_tool_wordfm.h b/include/pub_tool_wordfm.h
index 8bbee73..ef9abfb 100644
--- a/include/pub_tool_wordfm.h
+++ b/include/pub_tool_wordfm.h
@@ -76,7 +76,8 @@
    sections of the map, or the whole thing.  If kCmp is NULL then the
    ordering used is unsigned word ordering (UWord) on the key
    values. */
-WordFM* VG_(newFM) ( void* (*alloc_nofail)( SizeT ),
+WordFM* VG_(newFM) ( void* (*alloc_nofail)( HChar* cc, SizeT ),
+                     HChar* cc,
                      void  (*dealloc)(void*),
                      Word  (*kCmp)(UWord,UWord) );
 
@@ -86,8 +87,9 @@
 
 /* Add (k,v) to fm.  If a binding for k already exists, it is updated
    to map to this new v.  In that case we should really return the
-   previous v so that caller can finalise it.  Oh well. */
-void VG_(addToFM) ( WordFM* fm, UWord k, UWord v );
+   previous v so that caller can finalise it.  Oh well.  Returns
+   True if a binding for k already exists. */
+Bool VG_(addToFM) ( WordFM* fm, UWord k, UWord v );
 
 // Delete key from fm, returning associated key and val if found
 Bool VG_(delFromFM) ( WordFM* fm,
@@ -97,6 +99,19 @@
 Bool VG_(lookupFM) ( WordFM* fm, 
                      /*OUT*/UWord* keyP, /*OUT*/UWord* valP, UWord key );
 
+// Find the closest key values bracketing the given key, assuming the 
+// given key is not present in the map.  minKey and maxKey are the 
+// minimum and maximum possible key values.  The resulting bracket
+// values are returned in *kMinP and *kMaxP.  It follows that if fm is
+// empty then the returned values are simply minKey and maxKey.
+//
+// If the operation was successful (that is, the given key is not
+// present), True is returned.  If the given key is in fact present,
+// False is returned, and *kMinP and *kMaxP are undefined.
+Bool VG_(findBoundsFM)( WordFM* fm,
+                        /*OUT*/UWord* kMinP, /*OUT*/UWord* kMaxP,
+                        UWord minKey, UWord maxKey, UWord key );
+
 // How many elements are there in fm?
 UWord VG_(sizeFM) ( WordFM* fm );
 
@@ -139,7 +154,8 @@
 typedef  struct _WordBag  WordBag; /* opaque */
 
 /* Allocate and initialise a WordBag */
-WordBag* VG_(newBag) ( void* (*alloc_nofail)( SizeT ),
+WordBag* VG_(newBag) ( void* (*alloc_nofail)( HChar* cc, SizeT ),
+                       HChar* cc,
                        void  (*dealloc)(void*) );
 
 /* Free up the Bag. */
diff --git a/include/pub_tool_xarray.h b/include/pub_tool_xarray.h
index 7cb0ac6..ca86a52 100644
--- a/include/pub_tool_xarray.h
+++ b/include/pub_tool_xarray.h
@@ -44,12 +44,13 @@
 
 
 /* It's an abstract type.  Bwaha. */
-typedef  void  XArray;
+typedef  struct _XArray  XArray;
 
 /* Create new XArray, using given allocation and free function, and
    for elements of the specified size.  Alloc fn must not fail (that
    is, if it returns it must have succeeded.) */
-extern XArray* VG_(newXA) ( void*(*alloc_fn)(SizeT), 
+extern XArray* VG_(newXA) ( void*(*alloc_fn)(HChar*,SizeT), 
+                            HChar* cc,
                             void(*free_fn)(void*),
                             Word elemSzB );
 
@@ -102,8 +103,10 @@
 /* Make a new, completely independent copy of the given XArray, using
    the existing allocation function to allocate the new space.
    Returns NULL if the allocation function didn't manage to allocate
-   space (but did return NULL rather than merely abort.) */
-extern XArray* VG_(cloneXA)( XArray* xa );
+   space (but did return NULL rather than merely abort.)  Space for
+   the clone (and all additions to it) is billed to 'cc' unless that
+   is NULL, in which case the parent's cost-center is used. */
+extern XArray* VG_(cloneXA)( HChar* cc, XArray* xa );
 
 #endif   // __PUB_TOOL_XARRAY_H
 
diff --git a/massif/ms_main.c b/massif/ms_main.c
index 1631fe2..a1fca58 100644
--- a/massif/ms_main.c
+++ b/massif/ms_main.c
@@ -292,7 +292,8 @@
 static void init_alloc_fns(void)
 {
    // Create the list, and add the default elements.
-   alloc_fns = VG_(newXA)(VG_(malloc), VG_(free), sizeof(Char*));
+   alloc_fns = VG_(newXA)(VG_(malloc), "ms.main.iaf.1",
+                                       VG_(free), sizeof(Char*));
    #define DO(x)  { Char* s = x; VG_(addToXA)(alloc_fns, &s); }
 
    // Ordered according to (presumed) frequency.
@@ -583,11 +584,13 @@
    if (parent->n_children == parent->max_children) {
       if (parent->max_children == 0) {
          parent->max_children = 4;
-         parent->children = VG_(malloc)( parent->max_children * sizeof(XPt*) );
+         parent->children = VG_(malloc)( "ms.main.acx.1",
+                                         parent->max_children * sizeof(XPt*) );
          n_xpt_init_expansions++;
       } else {
          parent->max_children *= 2;    // Double size
-         parent->children = VG_(realloc)( parent->children,
+         parent->children = VG_(realloc)( "ms.main.acx.2",
+                                          parent->children,
                                           parent->max_children * sizeof(XPt*) );
          n_xpt_later_expansions++;
       }
@@ -650,7 +653,7 @@
    n_child_sxpts = n_sig_children + ( n_insig_children > 0 ? 1 : 0 );
 
    // Duplicate the XPt.
-   sxpt                 = VG_(malloc)(sizeof(SXPt));
+   sxpt                 = VG_(malloc)("ms.main.dX.1", sizeof(SXPt));
    n_sxpt_allocs++;
    sxpt->tag            = SigSXPt;
    sxpt->szB            = xpt->szB;
@@ -661,7 +664,8 @@
    if (n_child_sxpts > 0) {
       Int j;
       SizeT sig_children_szB = 0, insig_children_szB = 0;
-      sxpt->Sig.children = VG_(malloc)(n_child_sxpts * sizeof(SXPt*));
+      sxpt->Sig.children = VG_(malloc)("ms.main.dX.2", 
+                                       n_child_sxpts * sizeof(SXPt*));
 
       // Duplicate the significant children.  (Nb: sig_children_szB +
       // insig_children_szB doesn't necessarily equal xpt->szB.)
@@ -680,7 +684,7 @@
       if (n_insig_children > 0) {
          // Nb: We 'n_sxpt_allocs' here because creating an Insig SXPt
          // doesn't involve a call to dup_XTree().
-         SXPt* insig_sxpt = VG_(malloc)(sizeof(SXPt));
+         SXPt* insig_sxpt = VG_(malloc)("ms.main.dX.3", sizeof(SXPt));
          n_sxpt_allocs++;
          insig_sxpt->tag = InsigSXPt;
          insig_sxpt->szB = insig_children_szB;
@@ -1478,7 +1482,7 @@
    }
 
    // Make new HP_Chunk node, add to malloc_list
-   hc           = VG_(malloc)(sizeof(HP_Chunk));
+   hc           = VG_(malloc)("ms.main.nb.1", sizeof(HP_Chunk));
    hc->req_szB  = req_szB;
    hc->slop_szB = slop_szB;
    hc->data     = (Addr)p;
@@ -2016,7 +2020,8 @@
    if (is_detailed_snapshot(snapshot)) {
       // Detailed snapshot -- print heap tree.
       Int   depth_str_len = clo_depth + 3;
-      Char* depth_str = VG_(malloc)(sizeof(Char) * depth_str_len);
+      Char* depth_str = VG_(malloc)("ms.main.pps.1", 
+                                    sizeof(Char) * depth_str_len);
       SizeT snapshot_total_szB =
          snapshot->heap_szB + snapshot->heap_extra_szB + snapshot->stacks_szB;
       depth_str[0] = '\0';   // Initialise depth_str to "".
@@ -2184,7 +2189,8 @@
    }
 
    // Initialise snapshot array, and sanity-check it.
-   snapshots = VG_(malloc)(sizeof(Snapshot) * clo_max_snapshots);
+   snapshots = VG_(malloc)("ms.main.mpoci.1", 
+                           sizeof(Snapshot) * clo_max_snapshots);
    // We don't want to do snapshot sanity checks here, because they're
    // currently uninitialised.
    for (i = 0; i < clo_max_snapshots; i++) {
@@ -2236,7 +2242,8 @@
    init_alloc_fns();
 
    // Initialise args_for_massif.
-   args_for_massif = VG_(newXA)(VG_(malloc), VG_(free), sizeof(HChar*));
+   args_for_massif = VG_(newXA)(VG_(malloc), "ms.main.mprci.1", 
+                                VG_(free), sizeof(HChar*));
 }
 
 VG_DETERMINE_INTERFACE_VERSION(ms_pre_clo_init)
diff --git a/memcheck/mc_errors.c b/memcheck/mc_errors.c
index 52e881e..e95ad5e 100644
--- a/memcheck/mc_errors.c
+++ b/memcheck/mc_errors.c
@@ -1240,7 +1240,7 @@
    if (VG_(get_supp_kind)(su) == ParamSupp) {
       eof = VG_(get_line) ( fd, buf, nBuf );
       if (eof) return False;
-      VG_(set_supp_string)(su, VG_(strdup)(buf));
+      VG_(set_supp_string)(su, VG_(strdup)("mc.resi.1", buf));
    }
    return True;
 }
diff --git a/memcheck/mc_leakcheck.c b/memcheck/mc_leakcheck.c
index d9dc42f..1c2b678 100644
--- a/memcheck/mc_leakcheck.c
+++ b/memcheck/mc_leakcheck.c
@@ -85,7 +85,7 @@
 
    n_starts = 1;
    while (True) {
-      starts = VG_(malloc)( n_starts * sizeof(Addr) );
+      starts = VG_(malloc)( "mc.gss.1", n_starts * sizeof(Addr) );
       if (starts == NULL)
          break;
       r = VG_(am_get_segment_starts)( starts, n_starts );
@@ -469,7 +469,7 @@
 	 p->indirect_bytes += lc_markstack[i].indirect;
       } else {
          n_lossrecords ++;
-         p = VG_(malloc)(sizeof(LossRecord));
+         p = VG_(malloc)( "mc.fr.1", sizeof(LossRecord));
          p->loss_mode    = lc_markstack[i].state;
          p->allocated_at = where;
          p->total_bytes  = lc_shadows[i]->szB;
@@ -608,7 +608,8 @@
    VG_(ssort)((void*)mallocs, n_mallocs, 
               sizeof(VgHashNode*), lc_compar);
 
-   malloc_chunk_holds_a_pool_chunk = VG_(calloc)( n_mallocs, sizeof(Bool) );
+   malloc_chunk_holds_a_pool_chunk = VG_(calloc)( "mc.fas.1",
+                                                  n_mallocs, sizeof(Bool) );
 
    *n_shadows = n_mallocs;
 
@@ -620,7 +621,8 @@
          /* We'll need a shadow for this chunk. */
          ++(*n_shadows);
 
-         /* Possibly invalidate the malloc holding the beginning of this chunk. */
+         /* Possibly invalidate the malloc holding the beginning of
+            this chunk. */
          m = find_shadow_for(mc->data, mallocs, n_mallocs);
          if (m != -1 && malloc_chunk_holds_a_pool_chunk[m] == False) {
             tl_assert(*n_shadows > 0);
@@ -641,7 +643,7 @@
    }
 
    tl_assert(*n_shadows > 0);
-   shadows = VG_(malloc)(sizeof(VgHashNode*) * (*n_shadows));
+   shadows = VG_(malloc)("mc.fas.2", sizeof(VgHashNode*) * (*n_shadows));
    s = 0;
 
    /* Copy the mempool chunks into the final array. */
@@ -738,7 +740,8 @@
    lc_max_mallocd_addr = lc_shadows[lc_n_shadows-1]->data
                          + lc_shadows[lc_n_shadows-1]->szB;
 
-   lc_markstack = VG_(malloc)( lc_n_shadows * sizeof(*lc_markstack) );
+   lc_markstack = VG_(malloc)( "mc.ddml.1",
+                               lc_n_shadows * sizeof(*lc_markstack) );
    for (i = 0; i < lc_n_shadows; i++) {
       lc_markstack[i].next = -1;
       lc_markstack[i].state = Unreached;
diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c
index 7f581e8..60eea31 100644
--- a/memcheck/mc_main.c
+++ b/memcheck/mc_main.c
@@ -399,7 +399,7 @@
    tl_assert(sizeof(Addr) == sizeof(void*));
    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
                                     /*fastCmp*/ NULL,
-                                    VG_(malloc), VG_(free) );
+                                    VG_(malloc), "mc.iaLL.1", VG_(free) );
 }
 
 /* Check representation invariants; if OK return NULL; else a
@@ -891,7 +891,8 @@
 {
    return VG_(OSetGen_Create)( offsetof(SecVBitNode, a), 
                                NULL, // use fast comparisons
-                               VG_(malloc), VG_(free) );
+                               VG_(malloc), "mc.cSVT.1 (sec VBit table)", 
+                               VG_(free) );
 }
 
 static void gcSecVBitTable(void)
@@ -2151,8 +2152,8 @@
 
 static OSet* ocacheL2 = NULL;
 
-static void* ocacheL2_malloc ( SizeT szB ) {
-   return VG_(malloc)(szB);
+static void* ocacheL2_malloc ( HChar* cc, SizeT szB ) {
+   return VG_(malloc)(cc, szB);
 }
 static void ocacheL2_free ( void* v ) {
    VG_(free)( v );
@@ -2169,7 +2170,7 @@
    ocacheL2 
       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag), 
                              NULL, /* fast cmp */
-                             ocacheL2_malloc, ocacheL2_free );
+                             ocacheL2_malloc, "mc.ioL2", ocacheL2_free );
    tl_assert(ocacheL2);
    stats__ocacheL2_n_nodes = 0;
 }
@@ -3654,7 +3655,8 @@
 }
 
 static
-void mc_new_mem_startup( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
+void mc_new_mem_startup( Addr a, SizeT len,
+                         Bool rr, Bool ww, Bool xx, ULong di_handle )
 {
    /* Ignore the permissions, just make it defined.  Seems to work... */
    // Because code is defined, initialised variables get put in the data
@@ -3673,7 +3675,8 @@
 }
 
 static
-void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
+void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
+                       ULong di_handle )
 {
    MC_(make_mem_defined)(a, len);
 }
@@ -4801,7 +4804,7 @@
    tl_assert(cgb_used == cgb_size);
    sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
 
-   cgbs_new = VG_(malloc)( sz_new * sizeof(CGenBlock) );
+   cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
    for (i = 0; i < cgb_used; i++) 
       cgbs_new[i] = cgbs[i];
 
@@ -4901,9 +4904,8 @@
             /* VG_(printf)("allocated %d %p\n", i, cgbs); */
             cgbs[i].start = arg[1];
             cgbs[i].size  = arg[2];
-            cgbs[i].desc  = VG_(strdup)((Char *)arg[3]);
+            cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (Char *)arg[3]);
             cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
-
             *ret = i;
          } else
             *ret = -1;
diff --git a/memcheck/mc_malloc_wrappers.c b/memcheck/mc_malloc_wrappers.c
index 07cb484..e2a9eae 100644
--- a/memcheck/mc_malloc_wrappers.c
+++ b/memcheck/mc_malloc_wrappers.c
@@ -135,7 +135,7 @@
 MC_Chunk* create_MC_Chunk ( ExeContext* ec, Addr p, SizeT szB,
                             MC_AllocKind kind)
 {
-   MC_Chunk* mc  = VG_(malloc)(sizeof(MC_Chunk));
+   MC_Chunk* mc  = VG_(malloc)("mc.cMC.1 (a MC_Chunk)", sizeof(MC_Chunk));
    mc->data      = p;
    mc->szB       = szB;
    mc->allockind = kind;
@@ -501,7 +501,7 @@
      VG_(tool_panic)("MC_(create_mempool): duplicate pool creation");
    }
    
-   mp = VG_(malloc)(sizeof(MC_Mempool));
+   mp = VG_(malloc)("mc.cm.1", sizeof(MC_Mempool));
    mp->pool       = pool;
    mp->rzB        = rzB;
    mp->is_zeroed  = is_zeroed;
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
index d848d83..18ee11a 100644
--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
@@ -3787,7 +3787,8 @@
    IRExpr*   guard;
    IRCallee* cee;
    Bool      alreadyPresent;
-   XArray*   pairs = VG_(newXA)( VG_(malloc), VG_(free), sizeof(Pair) );
+   XArray*   pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
+                                 VG_(free), sizeof(Pair) );
    /* Scan forwards through the statements.  Each time a call to one
       of the relevant helpers is seen, check if we have made a
       previous call to the same helper using the same guard
diff --git a/memcheck/tests/oset_test.c b/memcheck/tests/oset_test.c
index 3954f73..10033a2 100644
--- a/memcheck/tests/oset_test.c
+++ b/memcheck/tests/oset_test.c
@@ -45,7 +45,7 @@
   return seed;
 }
 
-static void* allocate_node(SizeT szB)
+static void* allocate_node(HChar* cc, SizeT szB)
 { return malloc(szB); }
 
 static void free_node(void* p)
@@ -84,7 +84,7 @@
    // comparisons.
    OSet* oset = VG_(OSetGen_Create)(0,
                                     NULL,
-                                    allocate_node, free_node);
+                                    allocate_node, "oset_test.1", free_node);
 
    // Try some operations on an empty OSet to ensure they don't screw up.
    vg_assert( ! VG_(OSetGen_Contains)(oset, &v) );
@@ -217,7 +217,7 @@
 
    // Create a static OSet of Ints.  This one uses fast (built-in)
    // comparisons.
-   OSet* oset = VG_(OSetWord_Create)(allocate_node, free_node);
+   OSet* oset = VG_(OSetWord_Create)(allocate_node, "oset_test.2", free_node);
 
    // Try some operations on an empty OSet to ensure they don't screw up.
    vg_assert( ! VG_(OSetWord_Contains)(oset, v) );
@@ -375,7 +375,7 @@
    // comparisons.
    OSet* oset = VG_(OSetGen_Create)(offsetof(Block, first),
                                     blockCmp,
-                                    allocate_node, free_node);
+                                    allocate_node, "oset_test.3", free_node);
 
    // Try some operations on an empty OSet to ensure they don't screw up.
    vg_assert( ! VG_(OSetGen_Contains)(oset, &v) );
diff --git a/none/tests/Makefile.am b/none/tests/Makefile.am
index bd2924d..a27e947 100644
--- a/none/tests/Makefile.am
+++ b/none/tests/Makefile.am
@@ -22,6 +22,8 @@
 	filter_cmdline0 filter_linenos \
 	filter_fdleak filter_none_discards filter_stderr
 
+noinst_HEADERS = fdleak.h
+
 EXTRA_DIST = $(noinst_SCRIPTS) \
 	ansi.stderr.exp ansi.vgtest \
 	args.stderr.exp args.stdout.exp args.vgtest \
diff --git a/none/tests/ppc32/Makefile.am b/none/tests/ppc32/Makefile.am
index c593d58..f168bb7 100644
--- a/none/tests/ppc32/Makefile.am
+++ b/none/tests/ppc32/Makefile.am
@@ -31,6 +31,6 @@
 		@FLAG_M32@
 AM_CXXFLAGS = $(AM_CFLAGS) @FLAG_M32@
 
-jm_insns_CFLAGS = -Winline -Wall -O -mregnames -maltivec @FLAG_M32@
+jm_insns_CFLAGS = -Winline -Wall -O -g -mregnames -maltivec @FLAG_M32@
 testVMX_CFLAGS  = -O -g -Wall -maltivec -mabi=altivec -DALTIVEC \
 			-DGCC_COMPILER @FLAG_M32@
diff --git a/none/tests/ppc64/Makefile.am b/none/tests/ppc64/Makefile.am
index 2bd2d96..c12475e 100644
--- a/none/tests/ppc64/Makefile.am
+++ b/none/tests/ppc64/Makefile.am
@@ -18,4 +18,4 @@
 		@FLAG_M64@
 AM_CXXFLAGS = $(AM_CFLAGS) @FLAG_M64@
 
-jm_insns_CFLAGS = -Winline -Wall -O -mregnames -maltivec @FLAG_M64@
+jm_insns_CFLAGS = -Winline -Wall -O -g -mregnames -maltivec @FLAG_M64@