Merge pull request #161 from qperret/gem5/stats/match-regex

module/gem5stats: enhance match() with regex support
diff --git a/devlib/instrument/gem5power.py b/devlib/instrument/gem5power.py
index d265440..4b145d9 100644
--- a/devlib/instrument/gem5power.py
+++ b/devlib/instrument/gem5power.py
@@ -72,7 +72,7 @@
             sites_to_match = [self.site_mapping.get(s, s) for s in active_sites]
             for rec, rois in self.target.gem5stats.match_iter(sites_to_match,
                     [self.roi_label], self._base_stats_dump):
-                writer.writerow([float(rec[s]) for s in active_sites])
+                writer.writerow([rec[s] for s in active_sites])
         return MeasurementsCsv(outfile, self.active_channels, self.sample_rate_hz)
 
     def reset(self, sites=None, kinds=None, channels=None):
diff --git a/devlib/module/gem5stats.py b/devlib/module/gem5stats.py
index 9109751..0f0fbd7 100644
--- a/devlib/module/gem5stats.py
+++ b/devlib/module/gem5stats.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import re
 import sys
 import logging
 import os.path
@@ -107,14 +108,55 @@
     
     def match(self, keys, rois_labels, base_dump=0):
         '''
-        Tries to match the list of keys passed as parameter over the statistics
-        dumps covered by selected ROIs since ``base_dump``. Returns a dict 
-        indexed by key parameters containing a dict indexed by ROI labels 
-        containing an in-order list of records for the key under consideration
-        during the active intervals of the ROI.
+        Extract specific values from the statistics log file of gem5
 
-        Keys must match fields in gem5's statistics log file. Key example:
-            system.cluster0.cores0.power_model.static_power
+        :param keys: a list of key name or regular expression patterns that
+            will be matched in the fields of the statistics file. ``match()``
+            returns only the values of fields matching at least one these
+            keys.
+        :type keys: list
+
+        :param rois_labels: list of ROIs labels. ``match()`` returns the 
+            values of the specified fields only during dumps spanned by at
+            least one of these ROIs.
+        :type rois_label: list
+
+        :param base_dump: dump number from which ``match()`` should operate. By 
+            specifying a non-zero dump number, one can virtually truncate 
+            the head of the stats file and ignore all dumps before a specific
+            instant. The value of ``base_dump`` will typically (but not 
+            necessarily) be the result of a previous call to ``next_dump_no``.
+            Default value is 0.
+        :type base_dump: int
+
+        :returns: a dict indexed by key parameters containing a dict indexed by
+        ROI labels containing an in-order list of records for the key under
+        consideration during the active intervals of the ROI. 
+        
+        Example of return value:
+         * Result of match(['sim_'],['roi_1']):
+            {
+                'sim_inst': 
+                {
+                    'roi_1': [265300176, 267975881]
+                }
+                'sim_ops': 
+                {
+                    'roi_1': [324395787, 327699419]
+                }
+                'sim_seconds': 
+                {
+                    'roi_1': [0.199960, 0.199897]
+                }
+                'sim_freq': 
+                {
+                    'roi_1': [1000000000000, 1000000000000]
+                }
+                'sim_ticks': 
+                {
+                    'roi_1': [199960234227, 199896897330]
+                }
+            }
         '''
         records = defaultdict(lambda : defaultdict(list))
         for record, active_rois in self.match_iter(keys, rois_labels, base_dump):
@@ -125,12 +167,27 @@
 
     def match_iter(self, keys, rois_labels, base_dump=0):
         '''
-        Yields for each dump since ``base_dump`` a pair containing:
-        1. a dict storing the values corresponding to each of the specified keys
-        2. the list of currently active ROIs among those passed as parameters.
+        Yield specific values dump-by-dump from the statistics log file of gem5
 
-        Keys must match fields in gem5's statistics log file. Key example:
-            system.cluster0.cores0.power_model.static_power
+        :param keys: same as ``match()``
+        :param rois_labels: same as ``match()``
+        :param base_dump: same as ``match()``
+        :returns: a pair containing:
+            1. a dict storing the values corresponding to each of the found keys
+            2. the list of currently active ROIs among those passed as parameters
+
+        Example of return value:
+         * Result of match_iter(['sim_'],['roi_1', 'roi_2']).next()
+            ( 
+                { 
+                    'sim_inst': 265300176,
+                    'sim_ops': 324395787,
+                    'sim_seconds': 0.199960, 
+                    'sim_freq': 1000000000000,
+                    'sim_ticks': 199960234227,
+                },
+                [ 'roi_1 ' ] 
+            )
         '''
         for label in rois_labels:
             if label not in self.rois:
@@ -139,6 +196,10 @@
                 self.logger.warning('Trying to match records in statistics file'
                         ' while ROI {} is running'.format(label))
         
+        # Construct one large regex that concatenates all keys because
+        # matching one large expression is more efficient than several smaller
+        all_keys_re = re.compile('|'.join(keys))
+        
         def roi_active(roi_label, dump):
             roi = self.rois[roi_label]
             return (roi.field in dump) and (int(dump[roi.field]) == 1)
@@ -148,8 +209,8 @@
             for dump in iter_statistics_dump(stats_file):
                 active_rois = [l for l in rois_labels if roi_active(l, dump)]
                 if active_rois:
-                    record = {k: dump[k] for k in keys}
-                    yield (record, active_rois)
+                    rec = {k: dump[k] for k in dump if all_keys_re.search(k)}
+                    yield (rec, active_rois)
 
     def next_dump_no(self):
         '''
diff --git a/devlib/utils/gem5.py b/devlib/utils/gem5.py
index c609d70..0ca42ec 100644
--- a/devlib/utils/gem5.py
+++ b/devlib/utils/gem5.py
@@ -13,6 +13,9 @@
 # limitations under the License.
 
 import re
+import logging
+
+from devlib.utils.types import numeric
 
 
 GEM5STATS_FIELD_REGEX = re.compile("^(?P<key>[^- ]\S*) +(?P<value>[^#]+).+$")
@@ -20,6 +23,8 @@
 GEM5STATS_DUMP_TAIL = '---------- End Simulation Statistics   ----------'
 GEM5STATS_ROI_NUMBER = 8
 
+logger = logging.getLogger('gem5')
+
 
 def iter_statistics_dump(stats_file):
     '''
@@ -38,6 +43,11 @@
             res = GEM5STATS_FIELD_REGEX.match(line) 
             if res:
                 k = res.group("key")
-                v = res.group("value").split()
-                cur_dump[k] = v[0] if len(v)==1 else set(v)
+                vtext = res.group("value")
+                try:
+                    v = map(numeric, vtext.split())
+                    cur_dump[k] = v[0] if len(v)==1 else set(v)
+                except ValueError:
+                    msg = 'Found non-numeric entry in gem5 stats ({}: {})'
+                    logger.warning(msg.format(k, vtext))