Merge pull request #161 from qperret/gem5/stats/match-regex
module/gem5stats: enhance match() with regex support
diff --git a/devlib/instrument/gem5power.py b/devlib/instrument/gem5power.py
index d265440..4b145d9 100644
--- a/devlib/instrument/gem5power.py
+++ b/devlib/instrument/gem5power.py
@@ -72,7 +72,7 @@
sites_to_match = [self.site_mapping.get(s, s) for s in active_sites]
for rec, rois in self.target.gem5stats.match_iter(sites_to_match,
[self.roi_label], self._base_stats_dump):
- writer.writerow([float(rec[s]) for s in active_sites])
+ writer.writerow([rec[s] for s in active_sites])
return MeasurementsCsv(outfile, self.active_channels, self.sample_rate_hz)
def reset(self, sites=None, kinds=None, channels=None):
diff --git a/devlib/module/gem5stats.py b/devlib/module/gem5stats.py
index 9109751..0f0fbd7 100644
--- a/devlib/module/gem5stats.py
+++ b/devlib/module/gem5stats.py
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import re
import sys
import logging
import os.path
@@ -107,14 +108,55 @@
def match(self, keys, rois_labels, base_dump=0):
'''
- Tries to match the list of keys passed as parameter over the statistics
- dumps covered by selected ROIs since ``base_dump``. Returns a dict
- indexed by key parameters containing a dict indexed by ROI labels
- containing an in-order list of records for the key under consideration
- during the active intervals of the ROI.
+ Extract specific values from the statistics log file of gem5
- Keys must match fields in gem5's statistics log file. Key example:
- system.cluster0.cores0.power_model.static_power
+ :param keys: a list of key name or regular expression patterns that
+ will be matched in the fields of the statistics file. ``match()``
+ returns only the values of fields matching at least one these
+ keys.
+ :type keys: list
+
+ :param rois_labels: list of ROIs labels. ``match()`` returns the
+ values of the specified fields only during dumps spanned by at
+ least one of these ROIs.
+ :type rois_label: list
+
+ :param base_dump: dump number from which ``match()`` should operate. By
+ specifying a non-zero dump number, one can virtually truncate
+ the head of the stats file and ignore all dumps before a specific
+ instant. The value of ``base_dump`` will typically (but not
+ necessarily) be the result of a previous call to ``next_dump_no``.
+ Default value is 0.
+ :type base_dump: int
+
+ :returns: a dict indexed by key parameters containing a dict indexed by
+ ROI labels containing an in-order list of records for the key under
+ consideration during the active intervals of the ROI.
+
+ Example of return value:
+ * Result of match(['sim_'],['roi_1']):
+ {
+ 'sim_inst':
+ {
+ 'roi_1': [265300176, 267975881]
+ }
+ 'sim_ops':
+ {
+ 'roi_1': [324395787, 327699419]
+ }
+ 'sim_seconds':
+ {
+ 'roi_1': [0.199960, 0.199897]
+ }
+ 'sim_freq':
+ {
+ 'roi_1': [1000000000000, 1000000000000]
+ }
+ 'sim_ticks':
+ {
+ 'roi_1': [199960234227, 199896897330]
+ }
+ }
'''
records = defaultdict(lambda : defaultdict(list))
for record, active_rois in self.match_iter(keys, rois_labels, base_dump):
@@ -125,12 +167,27 @@
def match_iter(self, keys, rois_labels, base_dump=0):
'''
- Yields for each dump since ``base_dump`` a pair containing:
- 1. a dict storing the values corresponding to each of the specified keys
- 2. the list of currently active ROIs among those passed as parameters.
+ Yield specific values dump-by-dump from the statistics log file of gem5
- Keys must match fields in gem5's statistics log file. Key example:
- system.cluster0.cores0.power_model.static_power
+ :param keys: same as ``match()``
+ :param rois_labels: same as ``match()``
+ :param base_dump: same as ``match()``
+ :returns: a pair containing:
+ 1. a dict storing the values corresponding to each of the found keys
+ 2. the list of currently active ROIs among those passed as parameters
+
+ Example of return value:
+ * Result of match_iter(['sim_'],['roi_1', 'roi_2']).next()
+ (
+ {
+ 'sim_inst': 265300176,
+ 'sim_ops': 324395787,
+ 'sim_seconds': 0.199960,
+ 'sim_freq': 1000000000000,
+ 'sim_ticks': 199960234227,
+ },
+ [ 'roi_1 ' ]
+ )
'''
for label in rois_labels:
if label not in self.rois:
@@ -139,6 +196,10 @@
self.logger.warning('Trying to match records in statistics file'
' while ROI {} is running'.format(label))
+ # Construct one large regex that concatenates all keys because
+ # matching one large expression is more efficient than several smaller
+ all_keys_re = re.compile('|'.join(keys))
+
def roi_active(roi_label, dump):
roi = self.rois[roi_label]
return (roi.field in dump) and (int(dump[roi.field]) == 1)
@@ -148,8 +209,8 @@
for dump in iter_statistics_dump(stats_file):
active_rois = [l for l in rois_labels if roi_active(l, dump)]
if active_rois:
- record = {k: dump[k] for k in keys}
- yield (record, active_rois)
+ rec = {k: dump[k] for k in dump if all_keys_re.search(k)}
+ yield (rec, active_rois)
def next_dump_no(self):
'''
diff --git a/devlib/utils/gem5.py b/devlib/utils/gem5.py
index c609d70..0ca42ec 100644
--- a/devlib/utils/gem5.py
+++ b/devlib/utils/gem5.py
@@ -13,6 +13,9 @@
# limitations under the License.
import re
+import logging
+
+from devlib.utils.types import numeric
GEM5STATS_FIELD_REGEX = re.compile("^(?P<key>[^- ]\S*) +(?P<value>[^#]+).+$")
@@ -20,6 +23,8 @@
GEM5STATS_DUMP_TAIL = '---------- End Simulation Statistics ----------'
GEM5STATS_ROI_NUMBER = 8
+logger = logging.getLogger('gem5')
+
def iter_statistics_dump(stats_file):
'''
@@ -38,6 +43,11 @@
res = GEM5STATS_FIELD_REGEX.match(line)
if res:
k = res.group("key")
- v = res.group("value").split()
- cur_dump[k] = v[0] if len(v)==1 else set(v)
+ vtext = res.group("value")
+ try:
+ v = map(numeric, vtext.split())
+ cur_dump[k] = v[0] if len(v)==1 else set(v)
+ except ValueError:
+ msg = 'Found non-numeric entry in gem5 stats ({}: {})'
+ logger.warning(msg.format(k, vtext))