module: Control and parse gem5's statistics log file

Gem5's statistics log file contains plenty of interesting information
that are not exposed so far. This module enables control and parsing of
the statistics file by:
 - configuring periodic dumps of statistics;
 - marking Regions of Interest (ROIs);
 - and extracting values of specific fields during the ROIs.
diff --git a/devlib/module/gem5stats.py b/devlib/module/gem5stats.py
new file mode 100644
index 0000000..3d109aa
--- /dev/null
+++ b/devlib/module/gem5stats.py
@@ -0,0 +1,148 @@
+#    Copyright 2017 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os.path
+from collections import defaultdict
+
+import devlib
+from devlib.module import Module
+from devlib.platform import Platform
+from devlib.platform.gem5 import Gem5SimulationPlatform
+from devlib.utils.gem5 import iter_statistics_dump, GEM5STATS_ROI_NUMBER, GEM5STATS_DUMP_TAIL
+
+
+class Gem5ROI:
+    def __init__(self, number, target):
+        self.target = target
+        self.number = number
+        self.running = False
+
+    def start(self):
+        if self.running:
+            return False
+        self.target.execute('m5 roistart {}'.format(self.number))
+        self.running = True
+        return True
+    
+    def stop(self):
+        if not self.running:
+            return False
+        self.target.execute('m5 roiend {}'.format(self.number))
+        self.running = False
+        return True
+    
+class Gem5StatsModule(Module):
+    '''
+    Module controlling Region of Interest (ROIs) markers, satistics dump 
+    frequency and parsing statistics log file when using gem5 platforms.
+
+    ROIs are identified by user-defined labels and need to be booked prior to
+    use. The translation of labels into gem5 ROI numbers will be performed
+    internally in order to avoid conflicts between multiple clients.
+    '''
+    name = 'gem5stats'
+
+    @staticmethod
+    def probe(target):
+       return isinstance(target.platform, Gem5SimulationPlatform)
+
+    def __init__(self, target):
+        super(Gem5StatsModule, self).__init__(target)
+        self._current_origin = 0
+        self._stats_file_path = os.path.join(target.platform.gem5_out_dir,
+                                            'stats.txt')
+        self.rois = {}
+
+    def book_roi(self, label):
+        if label in self.rois:
+            raise KeyError('ROI label {} already used'.format(label))
+        if len(self.rois) >= GEM5STATS_ROI_NUMBER:
+            raise RuntimeError('Too many ROIs reserved')
+        all_rois = set(xrange(GEM5STATS_ROI_NUMBER))
+        used_rois = set([roi.number for roi in self.rois.values()])
+        avail_rois = all_rois - used_rois
+        self.rois[label] = Gem5ROI(list(avail_rois)[0], self.target)
+
+    def free_roi(self, label):
+        if label not in self.rois:
+            raise KeyError('ROI label {} not reserved yet'.format(label))
+        self.rois[label].stop()
+        del self.rois[label]
+
+    def roi_start(self, label):
+        if label not in self.rois:
+            raise KeyError('Incorrect ROI label: {}'.format(label))
+        if not self.rois[label].start():
+            raise TargetError('ROI {} was already running'.format(label))
+    
+    def roi_end(self, label):
+        if label not in self.rois:
+            raise KeyError('Incorrect ROI label: {}'.format(label))
+        if not self.rois[label].stop():
+            raise TargetError('ROI {} was not running'.format(label))
+
+    def start_periodic_dump(self, delay_ns=0, period_ns=10000000):
+        # Default period is 10ms because it's roughly what's needed to have
+        # accurate power estimations
+        if delay_ns < 0 or period_ns < 0:
+            msg = 'Delay ({}) and period ({}) for periodic dumps must be positive'
+            raise ValueError(msg.format(delay_ns, period_ns))
+        self.target.execute('m5 dumpresetstats {} {}'.format(delay_ns, period_ns))
+    
+    def match(self, keys, rois_labels):
+        '''
+        Tries to match the list of keys passed as parameter over the statistics
+        dumps covered by selected ROIs since origin. Returns a dict indexed by 
+        key parameters containing a dict indexed by ROI labels containing an 
+        in-order list of records for the key under consideration during the 
+        active intervals of the ROI.
+
+        Keys must match fields in gem5's statistics log file. Key example:
+            system.cluster0.cores0.power_model.static_power
+        '''
+        for label in rois_labels:
+            if label not in self.rois:
+                raise KeyError('Impossible to match ROI label {}'.format(label))
+            if self.rois[label].running:
+                self.logger.warning('Trying to match records in statistics file'
+                        ' while ROI {} is running'.format(label))
+
+        records = {}
+        for key in keys:
+            records[key] = defaultdict(list)
+        with open(self._stats_file_path, 'r') as stats_file:
+            stats_file.seek(self._current_origin)
+            for dump in iter_statistics_dump(stats_file):
+                for label in rois_labels:
+                    # Save records only when ROIs are ON
+                    roi_field = 'ROI::{}'.format(self.rois[label].number)
+                    if (roi_field in dump) and (int(dump[roi_field]) == 1):
+                        for key in keys:
+                            records[key][label].append(dump[key])
+        return records
+
+    def reset_origin(self):
+        '''
+        Place origin right after the last full dump in the file
+        '''
+        last_dump_tail = self._current_origin
+        # Dump & reset stats to start from a fresh state
+        self.target.execute('m5 dumpresetstats')
+        with open(self._stats_file_path, 'r') as stats_file:
+            for line in stats_file:
+                if GEM5STATS_DUMP_TAIL in line:
+                    last_dump_tail = stats_file.tell()
+        self._current_origin = last_dump_tail
+
diff --git a/devlib/utils/gem5.py b/devlib/utils/gem5.py
new file mode 100644
index 0000000..c609d70
--- /dev/null
+++ b/devlib/utils/gem5.py
@@ -0,0 +1,43 @@
+#    Copyright 2017 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+
+
+GEM5STATS_FIELD_REGEX = re.compile("^(?P<key>[^- ]\S*) +(?P<value>[^#]+).+$")
+GEM5STATS_DUMP_HEAD = '---------- Begin Simulation Statistics ----------'
+GEM5STATS_DUMP_TAIL = '---------- End Simulation Statistics   ----------'
+GEM5STATS_ROI_NUMBER = 8
+
+
+def iter_statistics_dump(stats_file):
+    '''
+    Yields statistics dumps as dicts. The parameter is assumed to be a stream 
+    reading from the statistics log file.
+    '''
+    cur_dump = {}
+    while True:
+        line = stats_file.readline()
+        if not line:
+            break
+        if GEM5STATS_DUMP_TAIL in line:
+            yield cur_dump
+            cur_dump = {}
+        else:
+            res = GEM5STATS_FIELD_REGEX.match(line) 
+            if res:
+                k = res.group("key")
+                v = res.group("value").split()
+                cur_dump[k] = v[0] if len(v)==1 else set(v)
+