module: gem5stats: enable asynchronous origins in gem5 stats file

Currently, reset_origin() in the gem5stats module enables to virtually
truncate the existing dumps from the gem5 statistics file so that any
subsequent match() calls will apply only on new dumps. However, the
current implementation resets the origin of the stats file unilaterally,
without other clients knowing about it, hence leading to data being
lost.

This commits removes the reset_origin() method and provides a different
API to handle virtual truncatures in the stats file. Namely, the match()
method now takes a base_dump parameter letting clients specifiying from
which dump they want match() to apply. This feature could also be usefull
if someone wanted to add off-line processing features for statistics
files.
diff --git a/devlib/instrument/gem5power.py b/devlib/instrument/gem5power.py
index 6411b3f..7715ec1 100644
--- a/devlib/instrument/gem5power.py
+++ b/devlib/instrument/gem5power.py
@@ -52,6 +52,7 @@
         self.target.gem5stats.book_roi(self.roi_label)
         self.sample_period_ns = 10000000
         self.target.gem5stats.start_periodic_dump(0, self.sample_period_ns)
+        self._base_stats_dump = 0
 
     def start(self):
         self.target.gem5stats.roi_start(self.roi_label)
@@ -64,11 +65,12 @@
         with open(outfile, 'wb') as wfh:
             writer = csv.writer(wfh)
             writer.writerow([c.label for c in self.active_channels]) # headers
-            for rec, rois in self.target.gem5stats.match_iter(active_sites, [self.roi_label]):
+            for rec, rois in self.target.gem5stats.match_iter(active_sites, 
+                    [self.roi_label], self._base_stats_dump):
                 writer.writerow([float(rec[s]) for s in active_sites])
         return MeasurementsCsv(outfile, self.active_channels)
     
     def reset(self, sites=None, kinds=None, channels=None):
         super(Gem5PowerInstrument, self).reset(sites, kinds, channels)
-        self.target.gem5stats.reset_origin()
+        self._base_stats_dump = self.target.gem5stats.next_dump_no()
 
diff --git a/devlib/module/gem5stats.py b/devlib/module/gem5stats.py
index f919905..9109751 100644
--- a/devlib/module/gem5stats.py
+++ b/devlib/module/gem5stats.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import logging
 import os.path
 from collections import defaultdict
@@ -66,6 +67,7 @@
         self._stats_file_path = os.path.join(target.platform.gem5_out_dir,
                                             'stats.txt')
         self.rois = {}
+        self._dump_pos_cache = {0: 0}
 
     def book_roi(self, label):
         if label in self.rois:
@@ -103,27 +105,27 @@
             raise ValueError(msg.format(delay_ns, period_ns))
         self.target.execute('m5 dumpresetstats {} {}'.format(delay_ns, period_ns))
     
-    def match(self, keys, rois_labels):
+    def match(self, keys, rois_labels, base_dump=0):
         '''
         Tries to match the list of keys passed as parameter over the statistics
-        dumps covered by selected ROIs since origin. Returns a dict indexed by 
-        key parameters containing a dict indexed by ROI labels containing an 
-        in-order list of records for the key under consideration during the 
-        active intervals of the ROI.
+        dumps covered by selected ROIs since ``base_dump``. Returns a dict 
+        indexed by key parameters containing a dict indexed by ROI labels 
+        containing an in-order list of records for the key under consideration
+        during the active intervals of the ROI.
 
         Keys must match fields in gem5's statistics log file. Key example:
             system.cluster0.cores0.power_model.static_power
         '''
         records = defaultdict(lambda : defaultdict(list))
-        for record, active_rois in self.match_iter(keys, rois_labels):
+        for record, active_rois in self.match_iter(keys, rois_labels, base_dump):
             for key in record:
                 for roi_label in active_rois:
                     records[key][roi_label].append(record[key])
         return records
 
-    def match_iter(self, keys, rois_labels):
+    def match_iter(self, keys, rois_labels, base_dump=0):
         '''
-        Yields for each dump since origin a pair containing:
+        Yields for each dump since ``base_dump`` a pair containing:
         1. a dict storing the values corresponding to each of the specified keys
         2. the list of currently active ROIs among those passed as parameters.
 
@@ -142,24 +144,50 @@
             return (roi.field in dump) and (int(dump[roi.field]) == 1)
 
         with open(self._stats_file_path, 'r') as stats_file:
-            stats_file.seek(self._current_origin)
+            self._goto_dump(stats_file, base_dump)
             for dump in iter_statistics_dump(stats_file):
                 active_rois = [l for l in rois_labels if roi_active(l, dump)]
                 if active_rois:
                     record = {k: dump[k] for k in keys}
                     yield (record, active_rois)
 
-
-    def reset_origin(self):
+    def next_dump_no(self):
         '''
-        Place origin right after the last full dump in the file
+        Returns the number of the next dump to be written to the stats file.
+        
+        For example, if next_dump_no is called while there are 5 (0 to 4) full 
+        dumps in the stats file, it will return 5. This will be usefull to know
+        from which dump one should match() in the future to get only data from
+        now on.
         '''
-        last_dump_tail = self._current_origin
-        # Dump & reset stats to start from a fresh state
-        self.target.execute('m5 dumpresetstats')
         with open(self._stats_file_path, 'r') as stats_file:
-            for line in stats_file:
-                if GEM5STATS_DUMP_TAIL in line:
-                    last_dump_tail = stats_file.tell()
-        self._current_origin = last_dump_tail
+            # _goto_dump reach EOF and returns the total number of dumps + 1
+            return self._goto_dump(stats_file, sys.maxint)
+    
+    def _goto_dump(self, stats_file, target_dump):
+        if target_dump < 0:
+            raise HostError('Cannot go to dump {}'.format(target_dump))
+
+        # Go to required dump quickly if it was visited before
+        if target_dump in self._dump_pos_cache:
+            stats_file.seek(self._dump_pos_cache[target_dump])
+            return target_dump
+        # Or start from the closest dump already visited before the required one
+        prev_dumps = filter(lambda x: x < target_dump, self._dump_pos_cache.keys())
+        curr_dump = max(prev_dumps)
+        curr_pos = self._dump_pos_cache[curr_dump]
+        stats_file.seek(curr_pos)
+        
+        # And iterate until target_dump
+        dump_iterator = iter_statistics_dump(stats_file)
+        while curr_dump < target_dump:
+            try:
+                dump = dump_iterator.next()
+            except StopIteration:
+                break
+            # End of passed dump is beginning og next one
+            curr_pos = stats_file.tell()
+            curr_dump += 1
+        self._dump_pos_cache[curr_dump] = curr_pos
+        return curr_dump