Merge pull request #350 from derkling/new_analysis_features

New analysis features
diff --git a/libs/utils/analysis/frequency_analysis.py b/libs/utils/analysis/frequency_analysis.py
index ad50597..9a617b8 100644
--- a/libs/utils/analysis/frequency_analysis.py
+++ b/libs/utils/analysis/frequency_analysis.py
@@ -27,6 +27,7 @@
 
 from analysis_module import AnalysisModule
 from trace import ResidencyTime, ResidencyData
+from bart.common.Utils import area_under_curve
 
 
 class FrequencyAnalysis(AnalysisModule):
@@ -218,6 +219,119 @@
 
         return (avg_lfreq/1e3, avg_bfreq/1e3)
 
+    def plotCPUFrequencies(self, cpus=None):
+        """
+        Plot frequency for the specified CPUs (or all if not specified).
+        If sched_overutilized events are available, the plots will also show the
+        intervals of time where the system was overutilized.
+
+        The generated plots are also saved as PNG images under the folder
+        specified by the `plots_dir` parameter of :class:`Trace`.
+
+        :param cpus: the list of CPUs to plot, if None it generate a plot
+                     for each available CPU
+        :type cpus: int or list(int)
+
+        :return: a dictionary of average frequency for each CPU.
+        """
+        if not self._trace.hasEvents('cpu_frequency'):
+            self._log.warning('Events [cpu_frequency] not found, plot DISABLED!')
+            return
+        df = self._dfg_trace_event('cpu_frequency')
+
+        if cpus is None:
+            # Generate plots only for available CPUs
+            cpus = range(df.cpu.max()+1)
+        else:
+            # Generate plots only specified CPUs
+            cpus = listify(cpus)
+
+        chained_assignment = pd.options.mode.chained_assignment
+        pd.options.mode.chained_assignment = None
+
+        freq = {}
+        for cpu_id in listify(cpus):
+            # Extract CPUs' frequencies and scale them to [MHz]
+            _df = df[df.cpu == cpu_id]
+            if _df.empty:
+                self._log.warning('No [cpu_frequency] events for CPU%d, '
+                                  'plot DISABLED!', cpu_id)
+                continue
+            _df['frequency'] = _df.frequency / 1e3
+
+            # Compute AVG frequency for this CPU
+            avg_freq = 0
+            if len(_df) > 1:
+                timespan = _df.index[-1] - _df.index[0]
+                avg_req = area_under_curve(_df['frequency']) / timespan
+
+            # Store DF for plotting
+            freq[cpu_id] = {
+                'df'  : _df,
+                'avg' : avg_freq,
+            }
+
+        pd.options.mode.chained_assignment = chained_assignment
+
+        plots_count = len(freq)
+        if not plots_count:
+            return
+
+        # Setup CPUs plots
+        fig, pltaxes = plt.subplots(len(freq), 1, figsize=(16, 4 * plots_count))
+
+        avg_freqs = {}
+        for plot_idx, cpu_id in enumerate(freq):
+
+            # CPU frequencies and average value
+            _df = freq[cpu_id]['df']
+            _avg = freq[cpu_id]['avg']
+
+            # Plot average frequency
+            try:
+                axes = pltaxes[plot_idx]
+            except TypeError:
+                axes = pltaxes
+            axes.set_title('CPU{:2d} Frequency'.format(cpu_id))
+            axes.axhline(_avg, color='r', linestyle='--', linewidth=2)
+
+            # Set plot limit based on CPU min/max frequencies
+            for cluster,cpus in self._platform['clusters'].iteritems():
+                if cpu_id not in cpus:
+                    continue
+                axes.set_ylim(
+                        (self._platform['freqs'][cluster][0] - 100000)/1e3,
+                        (self._platform['freqs'][cluster][-1] + 100000)/1e3
+                )
+                break
+
+            # Plot CPU frequency transitions
+            _df['frequency'].plot(style=['r-'], ax=axes,
+                                  drawstyle='steps-post', alpha=0.4)
+
+            # Plot overutilzied regions (if signal available)
+            self._trace.analysis.status.plotOverutilized(axes)
+
+            # Finalize plot
+            axes.set_xlim(self._trace.x_min, self._trace.x_max)
+            axes.set_ylabel('MHz')
+            axes.grid(True)
+            if plot_idx + 1 < plots_count:
+                axes.set_xticklabels([])
+                axes.set_xlabel('')
+
+            avg_freqs[cpu_id] = _avg/1e3
+            self._log.info('CPU%02d average frequency: %.3f GHz',
+                           cpu_id, avg_freqs[cpu_id])
+
+        # Save generated plots into datadir
+        figname = '{}/{}cpus_freqs.png'\
+                  .format(self._trace.plots_dir, self._trace.plots_prefix)
+        pl.savefig(figname, bbox_inches='tight')
+
+        return avg_freqs
+
+
     def plotCPUFrequencyResidency(self, cpus=None, pct=False, active=False):
         """
         Plot per-CPU frequency residency. big CPUs are plotted first and then
diff --git a/libs/utils/analysis/tasks_analysis.py b/libs/utils/analysis/tasks_analysis.py
index c76340a..7fc749b 100644
--- a/libs/utils/analysis/tasks_analysis.py
+++ b/libs/utils/analysis/tasks_analysis.py
@@ -200,6 +200,13 @@
         Tasks PELT signals:
                 load_sum, util_sum, period_contrib, sched_overutilized
 
+        At least one of the previous signals must be specified to get a valid
+        plot.
+
+        Addidional custom signals can be specified and they will be represented
+        in the "Task signals plots" if they represent valid keys of the task
+        load/utilization trace event (e.g. sched_load_avg_task).
+
         Note:
             sched_overutilized: enable the plotting of overutilization bands on
                                 top of each subplot
@@ -252,13 +259,18 @@
                 # Third plot: tasks's load
                 {'load_sum', 'util_sum', 'period_contrib'}
         ]
-        for signals_to_plot in plots_signals:
+        hr = []
+        ysize = 0
+        for plot_id, signals_to_plot in enumerate(plots_signals):
             signals_to_plot = signals_to_plot.intersection(signals)
             if len(signals_to_plot):
                 plots_count = plots_count + 1
+                # Use bigger size only for the first plot
+                hr.append(3 if plot_id == 0 else 1)
+                ysize = ysize + (8 if plot_id else 4)
 
         # Grid
-        gs = gridspec.GridSpec(plots_count, 1, height_ratios=[2, 1, 1])
+        gs = gridspec.GridSpec(plots_count, 1, height_ratios=hr)
         gs.update(wspace=0.1, hspace=0.1)
 
         # Build list of all PIDs for each task_name to plot
@@ -272,6 +284,8 @@
             pids_to_plot.extend(self._trace.getTaskByName(task))
 
         for tid in pids_to_plot:
+            savefig = False
+
             task_name = self._trace.getTaskByPid(tid)
             if len(task_name) == 1:
                 task_name = task_name[0]
@@ -281,7 +295,7 @@
             plot_id = 0
 
             # For each task create a figure with plots_count plots
-            plt.figure(figsize=(16, 2*6+3))
+            plt.figure(figsize=(16, ysize))
             plt.suptitle('Task Signals',
                          y=.94, fontsize=16, horizontalalignment='center')
 
@@ -294,9 +308,8 @@
                                .format(tid, task_name))
                 plot_id = plot_id + 1
                 is_last = (plot_id == plots_count)
-                if 'sched_overutilized' in signals:
-                    signals_to_plot.append('sched_overutilized')
-                self._plotTaskSignals(axes, tid, signals_to_plot, is_last)
+                self._plotTaskSignals(axes, tid, signals, is_last)
+                savefig = True
 
             # Plot CPUs residency
             signals_to_plot = {'residencies'}
@@ -312,6 +325,7 @@
                 if 'sched_overutilized' in signals:
                     signals_to_plot.append('sched_overutilized')
                 self._plotTaskResidencies(axes, tid, signals_to_plot, is_last)
+                savefig = True
 
             # Plot PELT signals
             signals_to_plot = {'load_sum', 'util_sum', 'period_contrib'}
@@ -324,6 +338,11 @@
                 if 'sched_overutilized' in signals:
                     signals_to_plot.append('sched_overutilized')
                 self._plotTaskPelt(axes, tid, signals_to_plot)
+                savefig = True
+
+            if not savefig:
+                self._log.warning('Nothing to plot for %s', task_name)
+                continue
 
             # Save generated plots into datadir
             if isinstance(task_name, list):
@@ -628,10 +647,12 @@
         util_df = self._dfg_trace_event('sched_load_avg_task')
 
         # Plot load and util
-        signals_to_plot = list({'load_avg', 'util_avg'}.intersection(signals))
-        if len(signals_to_plot):
-            data = util_df[util_df.pid == tid][signals_to_plot]
-            data.plot(ax=axes, drawstyle='steps-post')
+        signals_to_plot = set(signals).difference({'boosted_util'})
+        for signal in signals_to_plot:
+            if signal not in util_df.columns:
+                continue
+            data = util_df[util_df.pid == tid][signal]
+            data.plot(ax=axes, drawstyle='steps-post', legend=True)
 
         # Plot boost utilization if available
         if 'boosted_util' in signals and \
@@ -656,10 +677,10 @@
                 'LITTLE capacity tip/max: %d/%d, big capacity tip/max: %d/%d',
                 tip_lcap, max_lcap, tip_bcap, max_bcap
             )
-            axes.axhline(tip_lcap, color='g', linestyle='--', linewidth=1)
-            axes.axhline(max_lcap, color='g', linestyle='-', linewidth=2)
-            axes.axhline(tip_bcap, color='r', linestyle='--', linewidth=1)
-            axes.axhline(max_bcap, color='r', linestyle='-', linewidth=2)
+            axes.axhline(tip_lcap, color='y', linestyle=':', linewidth=2)
+            axes.axhline(max_lcap, color='y', linestyle='--', linewidth=2)
+            axes.axhline(tip_bcap, color='r', linestyle=':', linewidth=2)
+            axes.axhline(max_bcap, color='r', linestyle='--', linewidth=2)
 
         axes.set_ylim(0, 1100)
         axes.set_xlim(self._trace.x_min, self._trace.x_max)