libs/utils/perf_analysis.py - platform/external/lisa - Gitiles

 # SPDX-License-Identifier: Apache-2.0
 #
 # Copyright (C) 2015, ARM Limited and contributors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may
 # not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #

 import glob
 import matplotlib.gridspec as gridspec
 import matplotlib.pyplot as plt
 import numpy as np
 import os
 import pandas as pd
 import pylab as pl
 import re
 import sys
 import trappy

 # Configure logging
 import logging

 # Regexp to match an rt-app generated logfile
 TASK_NAME_RE = re.compile('.*\/rt-app-(.+)-[0-9]+.log')

 class PerfAnalysis(object):

     def __init__(self, datadir, tasks=None):

         # Dataframe of all tasks performance data
         self.perf_data = {}

         # Folder containing all rt-app data
         self.datadir = None

         # Load performance data generated by rt-app workloads
         self.__loadRTAData(datadir, tasks)

         # Keep track of the datadir from where data have been loaded
         if len(self.perf_data) == 0:
             raise ValueError('No performance data found on folder [{0:s}]'\
                     .format(datadir))

         self.datadir = datadir

     def __taskNameFromLog(self, logfile):
         tname_match = re.search(TASK_NAME_RE, logfile)
         if tname_match is None:
             raise ValueError('The logfile [{0:s}] is not from rt-app'\
                     .format(logfile))
         return tname_match.group(1)

     def __logfileFromTaskName(self, taskname):
         for logfile in glob.glob(
                 '{0:s}/rt-app-{1:s}.log'.format(self.datadir, taskname)):
             return logfile
         raise ValueError('No rt-app logfile found for task [{0:s}]'\
                 .format(taskname))

     def tasks(self):
         """
         Return the list of tasks for which performance data have been loaded
         """
         if self.datadir is None:
             raise ValueError("rt-app performance data not (yet) loaded")
         return self.perf_data.keys()

     def logfile(self, task):
         """
         Return the logfile for the specified task
         """
         if task not in self.perf_data:
             raise ValueError('No logfile loaded for task [{0:s}]'\
                     .format(task))
         return self.perf_data[task]['logfile']

     def df(self, task):
         """
         Return the PANDAS dataframe with the performance data for the
         specified task
         """
         if self.datadir is None:
             raise ValueError("rt-app performance data not (yet) loaded")
         if task not in self.perf_data:
             raise ValueError('No dataframe loaded for task [{0:s}]'\
                     .format(task))
         return self.perf_data[task]['df']

     def __loadRTAData(self, datadir, tasks):
         """
         Load peformance data of an rt-app workload
         """

         if tasks is None:
             # Lookup for all rt-app logfile into the specified datadir
             for logfile in glob.glob('{0:s}/rt-app-*.log'.format(datadir)):
                 task_name = self.__taskNameFromLog(logfile)
                 self.perf_data[task_name] = {}
                 self.perf_data[task_name]['logfile'] = logfile
                 logging.debug('Found rt-app logfile for task [%s]', task_name)
         else:
             # Lookup for specified rt-app task logfile into specified datadir
             for task in tasks:
                 logfile = self.__logfileFromTaskName(task)
                 self.perf_data[task_name] = {}
                 self.perf_data[task_name]['logfile'] = logfile
                 logging.debug('Found rt-app logfile for task [%s]', task_name)

         # Load all the found logfile into a dataset
         for task in self.perf_data.keys():
             logging.debug('Loading dataframe for task [%s]...', task)
             df = pd.read_table(self.logfile(task),
                     sep='\s+',
                     skiprows=1,
                     header=0,
                     usecols=[1,2,3,4,7,8,9,10],
                     names=[
                         'Cycles', 'Run' ,'Period', 'Timestamp',
                         'Slack', 'CRun', 'CPeriod', 'WKPLatency'
                     ])
             # Normalize time to [s] with origin on the first event
             start_time = df['Timestamp'][0]/1e6
             df['Time'] = df['Timestamp']/1e6 - start_time
             df.set_index(['Time'], inplace=True)
             # Add performance metrics column, performance is defined as:
             #             slack
             #   perf = -------------
             #          period - run
             df['PerfIndex'] = df['Slack'] / (df['CPeriod'] - df['CRun'])

             # Keep track of the loaded dataframe
             self.perf_data[task]['df'] = df

     def plotPerf(self, task, title=None):
         """
         Plot the Latency/Slack and Performance data for the specified task
         """
         # Grid
         gs = gridspec.GridSpec(2, 2, height_ratios=[4,1], width_ratios=[3,1]);
         gs.update(wspace=0.1, hspace=0.1);
         # Figure
         plt.figure(figsize=(16, 2*6));
         if title:
             plt.suptitle(title, y=.97, fontsize=16,
                     horizontalalignment='center');
         # Plot: Slack and Latency
         axes = plt.subplot(gs[0,0]);
         axes.set_title('Task [{0:s}] (start) Latency and (completion) Slack'\
                 .format(task));
         data = self.df(task)[['Slack', 'WKPLatency']]
         data.plot(ax=axes, drawstyle='steps-post', style=['r', 'g']);
         # axes.set_xlim(x_min, x_max);
         axes.xaxis.set_visible(False);
         # Plot: Performance
         axes = plt.subplot(gs[1,0]);
         axes.set_title('Task [{0:s}] Performance Index'.format(task));
         data = self.df(task)[['PerfIndex',]]
         data.plot(ax=axes, drawstyle='steps-post');
         axes.set_ylim(0, 2);
         # axes.set_xlim(x_min, x_max);
         # Plot: Slack Histogram
         axes = plt.subplot(gs[0:2,1]);
         data = self.df(task)[['PerfIndex',]]
         data.hist(bins=30, ax=axes, alpha=0.4);
         # axes.set_xlim(x_min, x_max);
         pindex_avg = data.mean()[0];
         pindex_std = data.std()[0];
         logging.info('PerfIndex, Task [%s] avg: %.2f, std: %.2f',
                 task, pindex_avg, pindex_std)
         axes.axvline(pindex_avg, color='b', linestyle='--', linewidth=2);


         # Save generated plots into datadir
         figname = '{}/task_perf_{}.png'.format(self.datadir, task)
         pl.savefig(figname, bbox_inches='tight')
	# SPDX-License-Identifier: Apache-2.0
	#
	# Copyright (C) 2015, ARM Limited and contributors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License"); you may
	# not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#

	import glob
	import matplotlib.gridspec as gridspec
	import matplotlib.pyplot as plt
	import numpy as np
	import os
	import pandas as pd
	import pylab as pl
	import re
	import sys
	import trappy

	# Configure logging
	import logging

	# Regexp to match an rt-app generated logfile
	TASK_NAME_RE = re.compile('.*\/rt-app-(.+)-[0-9]+.log')

	class PerfAnalysis(object):

	def __init__(self, datadir, tasks=None):

	# Dataframe of all tasks performance data
	self.perf_data = {}

	# Folder containing all rt-app data
	self.datadir = None

	# Load performance data generated by rt-app workloads
	self.__loadRTAData(datadir, tasks)

	# Keep track of the datadir from where data have been loaded
	if len(self.perf_data) == 0:
	raise ValueError('No performance data found on folder [{0:s}]'\
	.format(datadir))

	self.datadir = datadir

	def __taskNameFromLog(self, logfile):
	tname_match = re.search(TASK_NAME_RE, logfile)
	if tname_match is None:
	raise ValueError('The logfile [{0:s}] is not from rt-app'\
	.format(logfile))
	return tname_match.group(1)

	def __logfileFromTaskName(self, taskname):
	for logfile in glob.glob(
	'{0:s}/rt-app-{1:s}.log'.format(self.datadir, taskname)):
	return logfile
	raise ValueError('No rt-app logfile found for task [{0:s}]'\
	.format(taskname))

	def tasks(self):
	"""
	Return the list of tasks for which performance data have been loaded
	"""
	if self.datadir is None:
	raise ValueError("rt-app performance data not (yet) loaded")
	return self.perf_data.keys()

	def logfile(self, task):
	"""
	Return the logfile for the specified task
	"""
	if task not in self.perf_data:
	raise ValueError('No logfile loaded for task [{0:s}]'\
	.format(task))
	return self.perf_data[task]['logfile']

	def df(self, task):
	"""
	Return the PANDAS dataframe with the performance data for the
	specified task
	"""
	if self.datadir is None:
	raise ValueError("rt-app performance data not (yet) loaded")
	if task not in self.perf_data:
	raise ValueError('No dataframe loaded for task [{0:s}]'\
	.format(task))
	return self.perf_data[task]['df']

	def __loadRTAData(self, datadir, tasks):
	"""
	Load peformance data of an rt-app workload
	"""

	if tasks is None:
	# Lookup for all rt-app logfile into the specified datadir
	for logfile in glob.glob('{0:s}/rt-app-*.log'.format(datadir)):
	task_name = self.__taskNameFromLog(logfile)
	self.perf_data[task_name] = {}
	self.perf_data[task_name]['logfile'] = logfile
	logging.debug('Found rt-app logfile for task [%s]', task_name)
	else:
	# Lookup for specified rt-app task logfile into specified datadir
	for task in tasks:
	logfile = self.__logfileFromTaskName(task)
	self.perf_data[task_name] = {}
	self.perf_data[task_name]['logfile'] = logfile
	logging.debug('Found rt-app logfile for task [%s]', task_name)

	# Load all the found logfile into a dataset
	for task in self.perf_data.keys():
	logging.debug('Loading dataframe for task [%s]...', task)
	df = pd.read_table(self.logfile(task),
	sep='\s+',
	skiprows=1,
	header=0,
	usecols=[1,2,3,4,7,8,9,10],
	names=[
	'Cycles', 'Run' ,'Period', 'Timestamp',
	'Slack', 'CRun', 'CPeriod', 'WKPLatency'
	])
	# Normalize time to [s] with origin on the first event
	start_time = df['Timestamp'][0]/1e6
	df['Time'] = df['Timestamp']/1e6 - start_time
	df.set_index(['Time'], inplace=True)
	# Add performance metrics column, performance is defined as:
	# slack
	# perf = -------------
	# period - run
	df['PerfIndex'] = df['Slack'] / (df['CPeriod'] - df['CRun'])

	# Keep track of the loaded dataframe
	self.perf_data[task]['df'] = df

	def plotPerf(self, task, title=None):
	"""
	Plot the Latency/Slack and Performance data for the specified task
	"""
	# Grid
	gs = gridspec.GridSpec(2, 2, height_ratios=[4,1], width_ratios=[3,1]);
	gs.update(wspace=0.1, hspace=0.1);
	# Figure
	plt.figure(figsize=(16, 2*6));
	if title:
	plt.suptitle(title, y=.97, fontsize=16,
	horizontalalignment='center');
	# Plot: Slack and Latency
	axes = plt.subplot(gs[0,0]);
	axes.set_title('Task [{0:s}] (start) Latency and (completion) Slack'\
	.format(task));
	data = self.df(task)[['Slack', 'WKPLatency']]
	data.plot(ax=axes, drawstyle='steps-post', style=['r', 'g']);
	# axes.set_xlim(x_min, x_max);
	axes.xaxis.set_visible(False);
	# Plot: Performance
	axes = plt.subplot(gs[1,0]);
	axes.set_title('Task [{0:s}] Performance Index'.format(task));
	data = self.df(task)[['PerfIndex',]]
	data.plot(ax=axes, drawstyle='steps-post');
	axes.set_ylim(0, 2);
	# axes.set_xlim(x_min, x_max);
	# Plot: Slack Histogram
	axes = plt.subplot(gs[0:2,1]);
	data = self.df(task)[['PerfIndex',]]
	data.hist(bins=30, ax=axes, alpha=0.4);
	# axes.set_xlim(x_min, x_max);
	pindex_avg = data.mean()[0];
	pindex_std = data.std()[0];
	logging.info('PerfIndex, Task [%s] avg: %.2f, std: %.2f',
	task, pindex_avg, pindex_std)
	axes.axvline(pindex_avg, color='b', linestyle='--', linewidth=2);


	# Save generated plots into datadir
	figname = '{}/task_perf_{}.png'.format(self.datadir, task)
	pl.savefig(figname, bbox_inches='tight')