vg_symtab2.c:
    - Can now handle file sizes > 65536 lines, despite the stabs format only
      storing line numbers in a short.  Do this heuristically, by looking for
      line number sequences that go from 65000-odd to 0-odd within the same
      file.

      This required changing the RiLoc.lineno field to 20 bytes, which gives a
      maximum file length of 1,000,000-odd lines, whichs seems reasonable.

      In order to keep RiLoc at 12 bytes (important because there are lots of
      them) this required stealing four bits from the RiLoc.size field,
      reducing it to 12 bits.  This isn't too bad because the size is unlikely
      to be larger than 4096 bytes -- we were already ignoring any ones larger
      than 10,000 bytes because they were suspicious anyway (and see next
      point).

    - Tightened up the sanity checking on line address ranges.  Previously any
      range that looked suspicious (eg. > 10000 bytes, or not within the bound
      of the segment info) was simply ignored(!)  Now it prints a warning when
      this happens and truncates the size to 1 to be safe;  also there are some
      extra assertions for totally space-cadet numbers.

      (At first these checks were all assertions, but I tried a version of GNU
      gas that produces a small handful of dodgy stabs entries;  warnings
      seemed a reasonable compromise.)

vg_cachesim.c:
    - Removed the requirement that both types of cost centre (iCC, idCC) have
      instr_addr as their second word.  Less fragile -- now the only
      requirement is that they both have their type tag as their first byte.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@197 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index de4f6ef..bfa9171 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -47,7 +47,8 @@
 #define FN_NAME_LEN                       256
 #define BUF_LEN                           512
 #define COMMIFY_BUF_LEN                   128
-#define RESULTS_BUF                       128
+#define RESULTS_BUF_LEN                   128
+#define LINE_BUF_LEN                       64
 
 /*------------------------------------------------------------*/
 /*--- Output file related stuff                            ---*/
@@ -87,15 +88,14 @@
  * vg_include.c 
  *
  * WARNING:  the 'tag' field *must* be the first byte of both CC types.
- *           the 'instr_addr' *must* be the second word of both CC types.
  *
- * This is because we use them when we don't know what type of CC we're dealing
- * with.
+ * This is because we use it to work out what kind of CC we're dealing with.
  */ 
 struct _iCC {
    /* word 1 */
    UChar tag;
    UChar instr_size;
+   /* 2 bytes padding */
 
    /* words 2+ */
    Addr instr_addr;
@@ -107,6 +107,7 @@
    UChar tag;
    UChar instr_size;
    UChar data_size;
+   /* 1 byte padding */
 
    /* words 2+ */
    Addr instr_addr;
@@ -133,25 +134,24 @@
    initCC(&cc->D);
 }
 
-static __inline__ void sprint_iCC(Char buf[BUF_LEN], UInt ln, iCC* cc)
+static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
 {
-   VG_(sprintf)(buf, "%u %llu %llu %llu\n",
-                      ln, cc->I.a, cc->I.m1, cc->I.m2);
+   VG_(sprintf)(buf, "%llu %llu %llu\n",
+                      cc->I.a, cc->I.m1, cc->I.m2);
 }
 
-static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], UInt ln, 
-                                             idCC* cc)
+static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
 {
-   VG_(sprintf)(buf, "%u %llu %llu %llu %llu %llu %llu\n",
-                      ln, cc->I.a, cc->I.m1, cc->I.m2, 
-                          cc->D.a, cc->D.m1, cc->D.m2);
+   VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
+                      cc->I.a, cc->I.m1, cc->I.m2, 
+                      cc->D.a, cc->D.m1, cc->D.m2);
 }
 
-static __inline__ void sprint_write_CC(Char buf[BUF_LEN], UInt ln, idCC* cc)
+static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
 {
-   VG_(sprintf)(buf, "%u %llu %llu %llu . . . %llu %llu %llu\n",
-                      ln, cc->I.a, cc->I.m1, cc->I.m2, 
-                          cc->D.a, cc->D.m1, cc->D.m2);
+   VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
+                      cc->I.a, cc->I.m1, cc->I.m2, 
+                      cc->D.a, cc->D.m1, cc->D.m2);
 }
 
 /*------------------------------------------------------------*/
@@ -211,14 +211,13 @@
       BBCC_table[i] = NULL;
 }
 
-static void get_file_fn_names(Addr instr_addr, Char filename[FILENAME_LEN],
-                       Char fn_name[FN_NAME_LEN])
+static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
+                           Char fn_name[FN_NAME_LEN], Int* line_num)
 {
-   UInt dummy_line_num;
    Bool found1, found2, no_demangle = False;
 
    found1 = VG_(what_line_is_this)(instr_addr, filename,
-                                   FILENAME_LEN, &dummy_line_num);
+                                   FILENAME_LEN, line_num);
    found2 = VG_(what_fn_is_this)(no_demangle, instr_addr, fn_name, FN_NAME_LEN);
 
    if (!found1 && !found2) {
@@ -304,8 +303,9 @@
    BBCC      *curr_BBCC;
    Char       filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
    UInt       filename_hash, fnname_hash, BBCC_hash;
+   Int        dummy_line_num;
 
-   get_file_fn_names(bb_orig_addr, filename, fn_name);
+   get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
 
    VGP_PUSHCC(VgpCacheGetBBCC);
    filename_hash = hash(filename, N_FILE_ENTRIES);
@@ -684,7 +684,7 @@
       }
    }
    VG_(close)(fd);
-    
+
    initCC(&Ir_total);
    initCC(&Dr_total);
    initCC(&Dw_total);
@@ -723,30 +723,17 @@
 /*--- Printing of output file and summary stats            ---*/
 /*------------------------------------------------------------*/
 
-int get_line_num(Addr instr_addr) 
-{
-   Char filename[FILENAME_LEN] = "???";
-   UInt line_num;
-   Bool found;
-
-   found = VG_(what_line_is_this)(instr_addr, filename,
-                                  FILENAME_LEN, &line_num);
-   if (!found) {
-      line_num = 0; 
-   }
-   return line_num;
-}
-
 static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl, 
                                                  Char *first_instr_fn)
 {
    Addr BBCC_ptr0, BBCC_ptr;
-   Char buf[BUF_LEN], curr_file[BUF_LEN], fbuf[BUF_LEN+4];
+   Char buf[BUF_LEN], curr_file[BUF_LEN], fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN];
    UInt line_num;
 
    BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
 
-   VG_(write)(fd, (void*)"\n", 1);
+   /* Mark start of basic block in output, just to ease debugging */
+   VG_(write)(fd, (void*)"\n", 1);  
 
    VG_(strcpy)(curr_file, first_instr_fl);
    
@@ -759,18 +746,7 @@
       Char fl_buf[FILENAME_LEN];
       Char fn_buf[FN_NAME_LEN];
 
-      /* Assumes instr_addr position is same for both CCs. */
-      Addr instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
-      get_file_fn_names(instr_addr, fl_buf, fn_buf);
-
-      /* Allow for filename switching in the middle of a BB;  if this happens,
-       * must print the new filename with the function name. */
-      if (0 != strcmp(fl_buf, curr_file)) {
-         VG_(strcpy)(curr_file, fl_buf);
-         VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
-         VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
-      }
-
+      Addr instr_addr;
       switch ( ((iCC*)BBCC_ptr)->tag ) {
 
 #define ADD_CC_TO(CC_type, cc, total)           \
@@ -779,24 +755,24 @@
    total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
           
          case INSTR_CC:
-            line_num = get_line_num(((iCC*)BBCC_ptr)->instr_addr);
-            sprint_iCC(buf, line_num, (iCC*)BBCC_ptr);
+            instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
+            sprint_iCC(buf, (iCC*)BBCC_ptr);
             ADD_CC_TO(iCC, I, Ir_total);
             BBCC_ptr += sizeof(iCC);
             break;
 
          case READ_CC:
          case  MOD_CC:
-            line_num = get_line_num(((idCC*)BBCC_ptr)->instr_addr);
-            sprint_read_or_mod_CC(buf, line_num, (idCC*)BBCC_ptr);
+            instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
+            sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
             ADD_CC_TO(idCC, I, Ir_total);
             ADD_CC_TO(idCC, D, Dr_total);
             BBCC_ptr += sizeof(idCC);
             break;
 
          case WRITE_CC:
-            line_num = get_line_num(((idCC*)BBCC_ptr)->instr_addr);
-            sprint_write_CC(buf, line_num, (idCC*)BBCC_ptr);
+            instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
+            sprint_write_CC(buf, (idCC*)BBCC_ptr);
             ADD_CC_TO(idCC, I, Ir_total);
             ADD_CC_TO(idCC, D, Dw_total);
             BBCC_ptr += sizeof(idCC);
@@ -810,19 +786,31 @@
       }
       distinct_instrs++;
       
+      get_debug_info(instr_addr, fl_buf, fn_buf, &line_num);
+
+      /* Allow for filename switching in the middle of a BB;  if this happens,
+       * must print the new filename with the function name. */
+      if (0 != strcmp(fl_buf, curr_file)) {
+         VG_(strcpy)(curr_file, fl_buf);
+         VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
+         VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
+      }
+
       /* If the function name for this instruction doesn't match that of the
-       * first instruction in the BB, print out a warning. */
+       * first instruction in the BB, print warning. */
       if (VG_(clo_trace_symtab) && 0 != strcmp(fn_buf, first_instr_fn)) {
          VG_(printf)("Mismatched function names\n");
-         VG_(printf)("  filenames: BB:%s, instr:%s;  "
-                     "fn_names:  BB:%s, instr:%s;  "
-                     "line: %d\n", 
+         VG_(printf)("  filenames: BB:%s, instr:%s;"
+                     "  fn_names:  BB:%s, instr:%s;"
+                     "  line: %d\n", 
                      first_instr_fl, fl_buf, 
                      first_instr_fn, fn_buf, 
                      line_num);
       }
 
-      VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+      VG_(sprintf)(lbuf, "%u ", line_num);
+      VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf));   /* line number */
+      VG_(write)(fd, (void*)buf, VG_(strlen)(buf));     /* cost centre */
    }
    /* If we switched filenames in the middle of the BB without switching back,
     * switch back now because the subsequent BB may be relying on falling under
@@ -831,7 +819,9 @@
       VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
       VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
    }
-   //VG_(write)(fd, (void*)"#}\n", 3);
+
+   /* Mark end of basic block */
+   /* VG_(write)(fd, (void*)"#}\n", 3); */
 
    vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
 }
@@ -961,9 +951,9 @@
    CC D_total;
    ULong L2_total_m, L2_total_mr, L2_total_mw,
          L2_total, L2_total_r, L2_total_w;
-   char buf1[RESULTS_BUF], 
-        buf2[RESULTS_BUF], 
-        buf3[RESULTS_BUF];
+   char buf1[RESULTS_BUF_LEN], 
+        buf2[RESULTS_BUF_LEN], 
+        buf3[RESULTS_BUF_LEN];
    Int l1, l2, l3;
    Int p;
 
diff --git a/coregrind/vg_symtab2.c b/coregrind/vg_symtab2.c
index 25d0ce6..c269b77 100644
--- a/coregrind/vg_symtab2.c
+++ b/coregrind/vg_symtab2.c
@@ -77,15 +77,34 @@
    }
    RiSym;
 
+/* Line count at which overflow happens, due to line numbers being stored as
+ * shorts in `struct nlist' in a.out.h. */
+#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
 
-/* A structure to hold addr-to-source info for a single line. */
+#define LINENO_BITS     20
+#define LOC_SIZE_BITS  (32 - LINENO_BITS)
+#define MAX_LINENO     (1 << LINENO_BITS)
+
+/* Unlikely to have any lines with instruction ranges > 4096 bytes */
+#define MAX_LOC_SIZE   (1 << LOC_SIZE_BITS)
+
+/* Number used to detect line number overflows;  if one line is 60000-odd
+ * smaller than the previous, is was probably an overflow.  
+ */
+#define OVERFLOW_DIFFERENCE     (LINENO_OVERFLOW - 5000)
+
+/* A structure to hold addr-to-source info for a single line.  There can be a
+ * lot of these, hence the dense packing. */
 typedef
    struct {
-      Addr   addr;   /* lowest address for this line */
-      Int    fnmoff; /* source filename; offset in this SegInfo's str tab */
-      UShort lineno; /* source line number, or zero */
-      UShort size;   /* size in bytes; we go to a bit of trouble to
-                        catch overflows of this */
+      /* Word 1 */
+      Addr   addr;                  /* lowest address for this line */
+      /* Word 2 */
+      UShort size:LOC_SIZE_BITS;    /* byte size; we catch overflows of this */
+      UInt   lineno:LINENO_BITS;    /* source line number, or zero */
+      /* Word 3 */
+      UInt   fnmoff;                /* source filename; offset in this 
+                                       SegInfo's str tab */
    }
    RiLoc;
 
@@ -215,8 +234,8 @@
    Int    new_sz, i;
    RiLoc* new_tab;
 
-   /* Ignore zero-sized locs. */
-   if (loc->size == 0) return;
+   /* Zero-sized locs should have been ignored earlier */
+   vg_assert(loc->size > 0);
 
    if (si->loctab_used == si->loctab_size) {
       new_sz = 2 * si->loctab_size;
@@ -547,20 +566,41 @@
 static __inline__
 void addLineInfo ( SegInfo* si,
                    Int      fnmoff,
-                   Addr     start,
-                   Addr     end,
-                   UInt     lineno )
+                   Addr     this,
+                   Addr     next,
+                   Int      lineno,
+                   Int      entry )
 {
    RiLoc loc;
-   UInt size = end - start + 1;
+   Int size = next - this;
 
-   /* Sanity ... */
-   if (size > 10000) return;
+   /* Ignore zero-sized locs */
+   if (this == next) return;
 
-   if (start >= si->start+si->size 
-       || end < si->start) return;
+   /* Maximum sanity checking.  Some versions of GNU as do a shabby job with
+    * stabs entries;  if anything looks suspicious, revert to a size of 1.
+    * This should catch the instruction of interest (since if using asm-level
+    * debug info, one instruction will correspond to one line, unlike with
+    * C-level debug info where multiple instructions can map to the one line), 
+    * but avoid catching any other instructions bogusly. */
+   if (this > next) {
+       VG_(message)(Vg_DebugMsg, 
+                    "warning: stabs addresses out of order "
+                    "at entry %d: 0x%x 0x%x", entry, this, next);
+       size = 1;
+   }
 
-   loc.addr      = start;
+   if (size > MAX_LOC_SIZE) {
+       VG_(message)(Vg_DebugMsg, 
+                    "warning: stabs line address range too large "
+                    "at entry %d: %d", entry, size);
+       size = 1;
+   }
+
+   vg_assert(this < si->start + si->size && next-1 > si->start);
+   vg_assert(lineno >= 0 && lineno < MAX_LINENO);
+
+   loc.addr      = this;
    loc.size      = (UShort)size;
    loc.lineno    = lineno;
    loc.fnmoff    = fnmoff;
@@ -590,13 +630,15 @@
    /* for the .stabs reader */
    Int    curr_filenmoff;
    Addr   curr_fnbaseaddr;
-   Char*  curr_file_name;
+   Char  *curr_file_name, *curr_fn_name;
    Int    n_stab_entries;
+   Int    prev_lineno, lineno;
+   Int    lineno_overflows;
+   Bool   same_file;
 
    oimage = (Addr)NULL;
    if (VG_(clo_verbosity) > 1)
-      VG_(message)(Vg_UserMsg, "Reading syms from %s", 
-                               si->filename );
+      VG_(message)(Vg_UserMsg, "Reading syms from %s", si->filename );
 
    /* mmap the object image aboard, so that we can read symbols and
       line number info out of it.  It will be munmapped immediately
@@ -905,7 +947,10 @@
    */
    curr_filenmoff  = addStr(si,"???");
    curr_fnbaseaddr = (Addr)NULL;
-   curr_file_name  = (Char*)NULL;
+   curr_file_name = curr_fn_name = (Char*)NULL;
+   lineno = prev_lineno = 0;
+   lineno_overflows = 0;
+   same_file = True;
 
    n_stab_entries = stab_sz/(int)sizeof(struct nlist);
 
@@ -920,21 +965,45 @@
       VG_(printf)("\n");
 #     endif
 
+      Char *no_fn_name = "???";
+
       switch (stab[i].n_type) {
          UInt next_addr;
 
-         /* To compute the instr address range covered by a single line, find
-          * the address of the next thing and compute the difference.  The
-          * approach used depends on what kind of entry/entries follow... */
+         /* Two complicated things here:
+          * 1. the n_desc field in 'struct n_list' in a.out.h is only 16-bits,
+          *    which gives a maximum of 65535 lines.  We handle files bigger
+          *    than this by detecting heuristically overflows -- if the line
+          *    count goes from 65000-odd to 0-odd within the same file, we
+          *    assume it's an overflow.  Once we switch files, we zero the
+          *    overflow count
+          *
+          * 2. To compute the instr address range covered by a single line,
+          *    find the address of the next thing and compute the difference.
+          *    The approach used depends on what kind of entry/entries
+          *    follow... 
+          */
          case N_SLINE: {
-            Int lineno = stab[i].n_desc;              
             Int this_addr = (UInt)stab[i].n_value;
 
+            /* Although stored as a short, neg values really are > 32768, hence
+             * the UShort cast.  Then we use an Int to handle overflows. */
+            prev_lineno = lineno;
+            lineno      = (Int)((UShort)stab[i].n_desc);
+
+            if (prev_lineno > lineno + OVERFLOW_DIFFERENCE && same_file) {
+                VG_(message)(Vg_DebugMsg, 
+                             "Line number overflow detected (%d --> %d) in %s", 
+                             prev_lineno, lineno, curr_file_name);
+                lineno_overflows++;
+            }
+            same_file = True;
+
             LOOP:
             if (i+1 >= n_stab_entries) {
                /* If it's the last entry, just guess the range is four;  can't
                 * do any better */
-               next_addr = 4;
+               next_addr = this_addr + 4;
             } else {    
                switch (stab[i+1].n_type) {
                   /* Easy, common case: use address of next entry */
@@ -953,9 +1022,11 @@
                      if ('\0' == * (stabstr + stab[i+1].n_un.n_strx) ) {
                         next_addr = (UInt)stab[i+1].n_value;
                      } else {
-                        VG_(printf)("unhandled stabs case: N_FUN start %d %s\n",
-                                   i, (stabstr + stab[i+1].n_un.n_strx) );
-                        VG_(panic)("unhandled N_FUN stabs case");
+                        VG_(message)(Vg_DebugMsg, 
+                                     "warning: function %s missing closing "
+                                     "N_FUN stab at entry %d",
+                                     curr_fn_name, i );
+                        next_addr = this_addr;  /* assume zero-size loc */
                      }
                      break;
 
@@ -977,18 +1048,9 @@
                }
             }
             
-            //Int offset2 = (i+1 < n_stab_entries && 68 == stab[i+1].n_type
-            //              ? (UInt)stab[i+1].n_value - 1
-            //              : offset + 1);
-            //if (i+1 < n_stab_entries) {
-            //    int x;
-            //    if (68 != (x = stab[i+1].n_type)) {
-            //        VG_(printf)("%d  ", x);
-            //    }
-            //}
-
             addLineInfo ( si, curr_filenmoff, curr_fnbaseaddr + this_addr, 
-                          curr_fnbaseaddr + next_addr - 1, lineno );
+                          curr_fnbaseaddr + next_addr,
+                          lineno + lineno_overflows * LINENO_OVERFLOW, i);
             break;
          }
 
@@ -996,11 +1058,22 @@
             if ('\0' != (stabstr + stab[i].n_un.n_strx)[0] ) {
                /* N_FUN with a name -- indicates the start of a fn.  */
                curr_fnbaseaddr = si->offset + (Addr)stab[i].n_value;
+               curr_fn_name = stabstr + stab[i].n_un.n_strx;
+            } else {
+               curr_fn_name = no_fn_name;
             }
             break;
          }
 
-         case N_SO: case N_SOL:
+         case N_SOL:
+            if (lineno_overflows != 0) {
+               VG_(panic)("Can't currently handle include files in very long "
+                          "(> 65535 lines) files.  Sorry.");
+            }
+            /* fall through! */
+         case N_SO: 
+            lineno_overflows = 0;
+
          /* seems to give lots of locations in header files */
          /* case 130: */ /* BINCL */
          { 
diff --git a/vg_cachesim.c b/vg_cachesim.c
index de4f6ef..bfa9171 100644
--- a/vg_cachesim.c
+++ b/vg_cachesim.c
@@ -47,7 +47,8 @@
 #define FN_NAME_LEN                       256
 #define BUF_LEN                           512
 #define COMMIFY_BUF_LEN                   128
-#define RESULTS_BUF                       128
+#define RESULTS_BUF_LEN                   128
+#define LINE_BUF_LEN                       64
 
 /*------------------------------------------------------------*/
 /*--- Output file related stuff                            ---*/
@@ -87,15 +88,14 @@
  * vg_include.c 
  *
  * WARNING:  the 'tag' field *must* be the first byte of both CC types.
- *           the 'instr_addr' *must* be the second word of both CC types.
  *
- * This is because we use them when we don't know what type of CC we're dealing
- * with.
+ * This is because we use it to work out what kind of CC we're dealing with.
  */ 
 struct _iCC {
    /* word 1 */
    UChar tag;
    UChar instr_size;
+   /* 2 bytes padding */
 
    /* words 2+ */
    Addr instr_addr;
@@ -107,6 +107,7 @@
    UChar tag;
    UChar instr_size;
    UChar data_size;
+   /* 1 byte padding */
 
    /* words 2+ */
    Addr instr_addr;
@@ -133,25 +134,24 @@
    initCC(&cc->D);
 }
 
-static __inline__ void sprint_iCC(Char buf[BUF_LEN], UInt ln, iCC* cc)
+static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
 {
-   VG_(sprintf)(buf, "%u %llu %llu %llu\n",
-                      ln, cc->I.a, cc->I.m1, cc->I.m2);
+   VG_(sprintf)(buf, "%llu %llu %llu\n",
+                      cc->I.a, cc->I.m1, cc->I.m2);
 }
 
-static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], UInt ln, 
-                                             idCC* cc)
+static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
 {
-   VG_(sprintf)(buf, "%u %llu %llu %llu %llu %llu %llu\n",
-                      ln, cc->I.a, cc->I.m1, cc->I.m2, 
-                          cc->D.a, cc->D.m1, cc->D.m2);
+   VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
+                      cc->I.a, cc->I.m1, cc->I.m2, 
+                      cc->D.a, cc->D.m1, cc->D.m2);
 }
 
-static __inline__ void sprint_write_CC(Char buf[BUF_LEN], UInt ln, idCC* cc)
+static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
 {
-   VG_(sprintf)(buf, "%u %llu %llu %llu . . . %llu %llu %llu\n",
-                      ln, cc->I.a, cc->I.m1, cc->I.m2, 
-                          cc->D.a, cc->D.m1, cc->D.m2);
+   VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
+                      cc->I.a, cc->I.m1, cc->I.m2, 
+                      cc->D.a, cc->D.m1, cc->D.m2);
 }
 
 /*------------------------------------------------------------*/
@@ -211,14 +211,13 @@
       BBCC_table[i] = NULL;
 }
 
-static void get_file_fn_names(Addr instr_addr, Char filename[FILENAME_LEN],
-                       Char fn_name[FN_NAME_LEN])
+static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
+                           Char fn_name[FN_NAME_LEN], Int* line_num)
 {
-   UInt dummy_line_num;
    Bool found1, found2, no_demangle = False;
 
    found1 = VG_(what_line_is_this)(instr_addr, filename,
-                                   FILENAME_LEN, &dummy_line_num);
+                                   FILENAME_LEN, line_num);
    found2 = VG_(what_fn_is_this)(no_demangle, instr_addr, fn_name, FN_NAME_LEN);
 
    if (!found1 && !found2) {
@@ -304,8 +303,9 @@
    BBCC      *curr_BBCC;
    Char       filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
    UInt       filename_hash, fnname_hash, BBCC_hash;
+   Int        dummy_line_num;
 
-   get_file_fn_names(bb_orig_addr, filename, fn_name);
+   get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
 
    VGP_PUSHCC(VgpCacheGetBBCC);
    filename_hash = hash(filename, N_FILE_ENTRIES);
@@ -684,7 +684,7 @@
       }
    }
    VG_(close)(fd);
-    
+
    initCC(&Ir_total);
    initCC(&Dr_total);
    initCC(&Dw_total);
@@ -723,30 +723,17 @@
 /*--- Printing of output file and summary stats            ---*/
 /*------------------------------------------------------------*/
 
-int get_line_num(Addr instr_addr) 
-{
-   Char filename[FILENAME_LEN] = "???";
-   UInt line_num;
-   Bool found;
-
-   found = VG_(what_line_is_this)(instr_addr, filename,
-                                  FILENAME_LEN, &line_num);
-   if (!found) {
-      line_num = 0; 
-   }
-   return line_num;
-}
-
 static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl, 
                                                  Char *first_instr_fn)
 {
    Addr BBCC_ptr0, BBCC_ptr;
-   Char buf[BUF_LEN], curr_file[BUF_LEN], fbuf[BUF_LEN+4];
+   Char buf[BUF_LEN], curr_file[BUF_LEN], fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN];
    UInt line_num;
 
    BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
 
-   VG_(write)(fd, (void*)"\n", 1);
+   /* Mark start of basic block in output, just to ease debugging */
+   VG_(write)(fd, (void*)"\n", 1);  
 
    VG_(strcpy)(curr_file, first_instr_fl);
    
@@ -759,18 +746,7 @@
       Char fl_buf[FILENAME_LEN];
       Char fn_buf[FN_NAME_LEN];
 
-      /* Assumes instr_addr position is same for both CCs. */
-      Addr instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
-      get_file_fn_names(instr_addr, fl_buf, fn_buf);
-
-      /* Allow for filename switching in the middle of a BB;  if this happens,
-       * must print the new filename with the function name. */
-      if (0 != strcmp(fl_buf, curr_file)) {
-         VG_(strcpy)(curr_file, fl_buf);
-         VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
-         VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
-      }
-
+      Addr instr_addr;
       switch ( ((iCC*)BBCC_ptr)->tag ) {
 
 #define ADD_CC_TO(CC_type, cc, total)           \
@@ -779,24 +755,24 @@
    total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
           
          case INSTR_CC:
-            line_num = get_line_num(((iCC*)BBCC_ptr)->instr_addr);
-            sprint_iCC(buf, line_num, (iCC*)BBCC_ptr);
+            instr_addr = ((iCC*)BBCC_ptr)->instr_addr;
+            sprint_iCC(buf, (iCC*)BBCC_ptr);
             ADD_CC_TO(iCC, I, Ir_total);
             BBCC_ptr += sizeof(iCC);
             break;
 
          case READ_CC:
          case  MOD_CC:
-            line_num = get_line_num(((idCC*)BBCC_ptr)->instr_addr);
-            sprint_read_or_mod_CC(buf, line_num, (idCC*)BBCC_ptr);
+            instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
+            sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr);
             ADD_CC_TO(idCC, I, Ir_total);
             ADD_CC_TO(idCC, D, Dr_total);
             BBCC_ptr += sizeof(idCC);
             break;
 
          case WRITE_CC:
-            line_num = get_line_num(((idCC*)BBCC_ptr)->instr_addr);
-            sprint_write_CC(buf, line_num, (idCC*)BBCC_ptr);
+            instr_addr = ((idCC*)BBCC_ptr)->instr_addr;
+            sprint_write_CC(buf, (idCC*)BBCC_ptr);
             ADD_CC_TO(idCC, I, Ir_total);
             ADD_CC_TO(idCC, D, Dw_total);
             BBCC_ptr += sizeof(idCC);
@@ -810,19 +786,31 @@
       }
       distinct_instrs++;
       
+      get_debug_info(instr_addr, fl_buf, fn_buf, &line_num);
+
+      /* Allow for filename switching in the middle of a BB;  if this happens,
+       * must print the new filename with the function name. */
+      if (0 != strcmp(fl_buf, curr_file)) {
+         VG_(strcpy)(curr_file, fl_buf);
+         VG_(sprintf)(fbuf, "fi=%s\n", curr_file);
+         VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
+      }
+
       /* If the function name for this instruction doesn't match that of the
-       * first instruction in the BB, print out a warning. */
+       * first instruction in the BB, print warning. */
       if (VG_(clo_trace_symtab) && 0 != strcmp(fn_buf, first_instr_fn)) {
          VG_(printf)("Mismatched function names\n");
-         VG_(printf)("  filenames: BB:%s, instr:%s;  "
-                     "fn_names:  BB:%s, instr:%s;  "
-                     "line: %d\n", 
+         VG_(printf)("  filenames: BB:%s, instr:%s;"
+                     "  fn_names:  BB:%s, instr:%s;"
+                     "  line: %d\n", 
                      first_instr_fl, fl_buf, 
                      first_instr_fn, fn_buf, 
                      line_num);
       }
 
-      VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+      VG_(sprintf)(lbuf, "%u ", line_num);
+      VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf));   /* line number */
+      VG_(write)(fd, (void*)buf, VG_(strlen)(buf));     /* cost centre */
    }
    /* If we switched filenames in the middle of the BB without switching back,
     * switch back now because the subsequent BB may be relying on falling under
@@ -831,7 +819,9 @@
       VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl);
       VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf));
    }
-   //VG_(write)(fd, (void*)"#}\n", 3);
+
+   /* Mark end of basic block */
+   /* VG_(write)(fd, (void*)"#}\n", 3); */
 
    vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size);
 }
@@ -961,9 +951,9 @@
    CC D_total;
    ULong L2_total_m, L2_total_mr, L2_total_mw,
          L2_total, L2_total_r, L2_total_w;
-   char buf1[RESULTS_BUF], 
-        buf2[RESULTS_BUF], 
-        buf3[RESULTS_BUF];
+   char buf1[RESULTS_BUF_LEN], 
+        buf2[RESULTS_BUF_LEN], 
+        buf3[RESULTS_BUF_LEN];
    Int l1, l2, l3;
    Int p;
 
diff --git a/vg_symtab2.c b/vg_symtab2.c
index 25d0ce6..c269b77 100644
--- a/vg_symtab2.c
+++ b/vg_symtab2.c
@@ -77,15 +77,34 @@
    }
    RiSym;
 
+/* Line count at which overflow happens, due to line numbers being stored as
+ * shorts in `struct nlist' in a.out.h. */
+#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
 
-/* A structure to hold addr-to-source info for a single line. */
+#define LINENO_BITS     20
+#define LOC_SIZE_BITS  (32 - LINENO_BITS)
+#define MAX_LINENO     (1 << LINENO_BITS)
+
+/* Unlikely to have any lines with instruction ranges > 4096 bytes */
+#define MAX_LOC_SIZE   (1 << LOC_SIZE_BITS)
+
+/* Number used to detect line number overflows;  if one line is 60000-odd
+ * smaller than the previous, is was probably an overflow.  
+ */
+#define OVERFLOW_DIFFERENCE     (LINENO_OVERFLOW - 5000)
+
+/* A structure to hold addr-to-source info for a single line.  There can be a
+ * lot of these, hence the dense packing. */
 typedef
    struct {
-      Addr   addr;   /* lowest address for this line */
-      Int    fnmoff; /* source filename; offset in this SegInfo's str tab */
-      UShort lineno; /* source line number, or zero */
-      UShort size;   /* size in bytes; we go to a bit of trouble to
-                        catch overflows of this */
+      /* Word 1 */
+      Addr   addr;                  /* lowest address for this line */
+      /* Word 2 */
+      UShort size:LOC_SIZE_BITS;    /* byte size; we catch overflows of this */
+      UInt   lineno:LINENO_BITS;    /* source line number, or zero */
+      /* Word 3 */
+      UInt   fnmoff;                /* source filename; offset in this 
+                                       SegInfo's str tab */
    }
    RiLoc;
 
@@ -215,8 +234,8 @@
    Int    new_sz, i;
    RiLoc* new_tab;
 
-   /* Ignore zero-sized locs. */
-   if (loc->size == 0) return;
+   /* Zero-sized locs should have been ignored earlier */
+   vg_assert(loc->size > 0);
 
    if (si->loctab_used == si->loctab_size) {
       new_sz = 2 * si->loctab_size;
@@ -547,20 +566,41 @@
 static __inline__
 void addLineInfo ( SegInfo* si,
                    Int      fnmoff,
-                   Addr     start,
-                   Addr     end,
-                   UInt     lineno )
+                   Addr     this,
+                   Addr     next,
+                   Int      lineno,
+                   Int      entry )
 {
    RiLoc loc;
-   UInt size = end - start + 1;
+   Int size = next - this;
 
-   /* Sanity ... */
-   if (size > 10000) return;
+   /* Ignore zero-sized locs */
+   if (this == next) return;
 
-   if (start >= si->start+si->size 
-       || end < si->start) return;
+   /* Maximum sanity checking.  Some versions of GNU as do a shabby job with
+    * stabs entries;  if anything looks suspicious, revert to a size of 1.
+    * This should catch the instruction of interest (since if using asm-level
+    * debug info, one instruction will correspond to one line, unlike with
+    * C-level debug info where multiple instructions can map to the one line), 
+    * but avoid catching any other instructions bogusly. */
+   if (this > next) {
+       VG_(message)(Vg_DebugMsg, 
+                    "warning: stabs addresses out of order "
+                    "at entry %d: 0x%x 0x%x", entry, this, next);
+       size = 1;
+   }
 
-   loc.addr      = start;
+   if (size > MAX_LOC_SIZE) {
+       VG_(message)(Vg_DebugMsg, 
+                    "warning: stabs line address range too large "
+                    "at entry %d: %d", entry, size);
+       size = 1;
+   }
+
+   vg_assert(this < si->start + si->size && next-1 > si->start);
+   vg_assert(lineno >= 0 && lineno < MAX_LINENO);
+
+   loc.addr      = this;
    loc.size      = (UShort)size;
    loc.lineno    = lineno;
    loc.fnmoff    = fnmoff;
@@ -590,13 +630,15 @@
    /* for the .stabs reader */
    Int    curr_filenmoff;
    Addr   curr_fnbaseaddr;
-   Char*  curr_file_name;
+   Char  *curr_file_name, *curr_fn_name;
    Int    n_stab_entries;
+   Int    prev_lineno, lineno;
+   Int    lineno_overflows;
+   Bool   same_file;
 
    oimage = (Addr)NULL;
    if (VG_(clo_verbosity) > 1)
-      VG_(message)(Vg_UserMsg, "Reading syms from %s", 
-                               si->filename );
+      VG_(message)(Vg_UserMsg, "Reading syms from %s", si->filename );
 
    /* mmap the object image aboard, so that we can read symbols and
       line number info out of it.  It will be munmapped immediately
@@ -905,7 +947,10 @@
    */
    curr_filenmoff  = addStr(si,"???");
    curr_fnbaseaddr = (Addr)NULL;
-   curr_file_name  = (Char*)NULL;
+   curr_file_name = curr_fn_name = (Char*)NULL;
+   lineno = prev_lineno = 0;
+   lineno_overflows = 0;
+   same_file = True;
 
    n_stab_entries = stab_sz/(int)sizeof(struct nlist);
 
@@ -920,21 +965,45 @@
       VG_(printf)("\n");
 #     endif
 
+      Char *no_fn_name = "???";
+
       switch (stab[i].n_type) {
          UInt next_addr;
 
-         /* To compute the instr address range covered by a single line, find
-          * the address of the next thing and compute the difference.  The
-          * approach used depends on what kind of entry/entries follow... */
+         /* Two complicated things here:
+          * 1. the n_desc field in 'struct n_list' in a.out.h is only 16-bits,
+          *    which gives a maximum of 65535 lines.  We handle files bigger
+          *    than this by detecting heuristically overflows -- if the line
+          *    count goes from 65000-odd to 0-odd within the same file, we
+          *    assume it's an overflow.  Once we switch files, we zero the
+          *    overflow count
+          *
+          * 2. To compute the instr address range covered by a single line,
+          *    find the address of the next thing and compute the difference.
+          *    The approach used depends on what kind of entry/entries
+          *    follow... 
+          */
          case N_SLINE: {
-            Int lineno = stab[i].n_desc;              
             Int this_addr = (UInt)stab[i].n_value;
 
+            /* Although stored as a short, neg values really are > 32768, hence
+             * the UShort cast.  Then we use an Int to handle overflows. */
+            prev_lineno = lineno;
+            lineno      = (Int)((UShort)stab[i].n_desc);
+
+            if (prev_lineno > lineno + OVERFLOW_DIFFERENCE && same_file) {
+                VG_(message)(Vg_DebugMsg, 
+                             "Line number overflow detected (%d --> %d) in %s", 
+                             prev_lineno, lineno, curr_file_name);
+                lineno_overflows++;
+            }
+            same_file = True;
+
             LOOP:
             if (i+1 >= n_stab_entries) {
                /* If it's the last entry, just guess the range is four;  can't
                 * do any better */
-               next_addr = 4;
+               next_addr = this_addr + 4;
             } else {    
                switch (stab[i+1].n_type) {
                   /* Easy, common case: use address of next entry */
@@ -953,9 +1022,11 @@
                      if ('\0' == * (stabstr + stab[i+1].n_un.n_strx) ) {
                         next_addr = (UInt)stab[i+1].n_value;
                      } else {
-                        VG_(printf)("unhandled stabs case: N_FUN start %d %s\n",
-                                   i, (stabstr + stab[i+1].n_un.n_strx) );
-                        VG_(panic)("unhandled N_FUN stabs case");
+                        VG_(message)(Vg_DebugMsg, 
+                                     "warning: function %s missing closing "
+                                     "N_FUN stab at entry %d",
+                                     curr_fn_name, i );
+                        next_addr = this_addr;  /* assume zero-size loc */
                      }
                      break;
 
@@ -977,18 +1048,9 @@
                }
             }
             
-            //Int offset2 = (i+1 < n_stab_entries && 68 == stab[i+1].n_type
-            //              ? (UInt)stab[i+1].n_value - 1
-            //              : offset + 1);
-            //if (i+1 < n_stab_entries) {
-            //    int x;
-            //    if (68 != (x = stab[i+1].n_type)) {
-            //        VG_(printf)("%d  ", x);
-            //    }
-            //}
-
             addLineInfo ( si, curr_filenmoff, curr_fnbaseaddr + this_addr, 
-                          curr_fnbaseaddr + next_addr - 1, lineno );
+                          curr_fnbaseaddr + next_addr,
+                          lineno + lineno_overflows * LINENO_OVERFLOW, i);
             break;
          }
 
@@ -996,11 +1058,22 @@
             if ('\0' != (stabstr + stab[i].n_un.n_strx)[0] ) {
                /* N_FUN with a name -- indicates the start of a fn.  */
                curr_fnbaseaddr = si->offset + (Addr)stab[i].n_value;
+               curr_fn_name = stabstr + stab[i].n_un.n_strx;
+            } else {
+               curr_fn_name = no_fn_name;
             }
             break;
          }
 
-         case N_SO: case N_SOL:
+         case N_SOL:
+            if (lineno_overflows != 0) {
+               VG_(panic)("Can't currently handle include files in very long "
+                          "(> 65535 lines) files.  Sorry.");
+            }
+            /* fall through! */
+         case N_SO: 
+            lineno_overflows = 0;
+
          /* seems to give lots of locations in header files */
          /* case 130: */ /* BINCL */
          {