Handle duplicate Index values in data frame
Adding a test case for checking if duplicates are
handled properly
Change-Id: Ib04be8b414e16e164e93b8beaa33bffd4b40abe7
Signed-off-by: Kapileshwar Singh <kapileshwar.singh@arm.com>
diff --git a/cr2/plotter/AttrConf.py b/cr2/plotter/AttrConf.py
index f9bf363..dce7c57 100644
--- a/cr2/plotter/AttrConf.py
+++ b/cr2/plotter/AttrConf.py
@@ -19,6 +19,7 @@
CONCAT = False
PIVOT = "__CR2_PIVOT_DEFAULT"
PIVOT_VAL = "__CR2_DEFAULT_PIVOT_VAL"
+DUPLICATE_VALUE_MAX_DELTA = 0.000001
MPL_STYLE = {
'axes.axisbelow': True,
diff --git a/cr2/plotter/Constraint.py b/cr2/plotter/Constraint.py
index 9910cdc..1cab9eb 100644
--- a/cr2/plotter/Constraint.py
+++ b/cr2/plotter/Constraint.py
@@ -57,7 +57,19 @@
self._pivot = pivot
self._column = column
self._template = template
- self.result = self._apply()
+ self._dup_resolved = False
+ self._data = self.populate_data_frame()
+
+ try:
+ self.result = self._apply()
+ except ValueError:
+ if not self._dup_resolved:
+ self._handle_duplicate_index()
+ try:
+ self.result = self._apply()
+ except:
+ raise ValueError("Unable to handle duplicates")
+
self.run_index = run_index
def _apply(self):
@@ -65,7 +77,7 @@
on the input column.
Do we need pivot_val?
"""
- data = self.get_data_frame()
+ data = self._data
result = {}
try:
@@ -93,12 +105,39 @@
criterion = criterion & data[key].map(
lambda x: x in self._filters[key])
values = values[criterion]
+
val_series = values[data[self._pivot] == pivot_val]
result[pivot_val] = val_series
return result
- def get_data_frame(self):
+ def _handle_duplicate_index(self):
+ """Handle duplicate values in index"""
+ data = self._data
+ self._dup_resolved = True
+ index = data.index
+ new_index = index.values
+
+ dups = index.get_duplicates()
+ for dup in dups:
+ # Leave one of the values intact
+ dup_index_left = index.searchsorted(dup, side="left")
+ dup_index_right = index.searchsorted(dup, side="right") - 1
+ num_dups = dup_index_right - dup_index_left + 1
+ delta = (index[dup_index_right + 1] - dup) / num_dups
+
+ if delta > AttrConf.DUPLICATE_VALUE_MAX_DELTA:
+ delta = AttrConf.DUPLICATE_VALUE_MAX_DELTA
+
+ # Add a delta to the others
+ dup_index_left += 1
+ while dup_index_left <= dup_index_right:
+ new_index[dup_index_left] += delta
+ delta += delta
+ dup_index_left += 1
+ self._data = self._data.reindex(new_index)
+
+ def populate_data_frame(self):
"""Return the data frame"""
data_container = getattr(
self._cr2_run,
@@ -217,7 +256,6 @@
run_idx,
self._filters))
-
def get_all_pivots(self):
"""Return a union of the pivot values"""
pivot_vals = []
diff --git a/tests/test_duplicates.py b/tests/test_duplicates.py
new file mode 100644
index 0000000..fbdc8da
--- /dev/null
+++ b/tests/test_duplicates.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+
+import unittest
+import matplotlib
+import pandas as pd
+import utils_tests
+import cr2
+import shutil
+
+from test_thermal import BaseTestThermal
+
+
+class TestPlotterDupVals(BaseTestThermal):
+
+ """Test Duplicate Entries in plotter"""
+
+ def __init__(self, *args, **kwargs):
+ super(TestPlotterDupVals, self).__init__(*args, **kwargs)
+
+ def test_plotter_duplicates(self):
+ """Test that plotter handles duplicates fine"""
+ with open("trace.txt", "w") as fout:
+ fout.write("""version = 6
+cpus=6
+rcuos/2-22 [001] 0000.018510: sched_load_avg_sg: cpus=00000001 load=0 utilization=0
+rcuos/2-22 [001] 6550.018611: sched_load_avg_sg: cpus=00000002 load=1 utilization=1
+rcuos/2-22 [001] 6550.018611: sched_load_avg_sg: cpus=00000004 load=2 utilization=2
+rcuos/2-22 [001] 6550.018612: sched_load_avg_sg: cpus=00000001 load=2 utilization=3
+rcuos/2-22 [001] 6550.018624: sched_load_avg_sg: cpus=00000002 load=1 utilization=4
+rcuos/2-22 [001] 6550.018625: sched_load_avg_sg: cpus=00000002 load=2 utilization=5
+rcuos/2-22 [001] 6550.018626: sched_load_avg_sg: cpus=00000002 load=3 utilization=6
+rcuos/2-22 [001] 6550.018627: sched_load_avg_sg: cpus=00000002 load=1 utilization=7
+rcuos/2-22 [001] 6550.018628: sched_load_avg_sg: cpus=00000004 load=2 utilization=8\n""")
+ fout.close()
+ run1 = cr2.Run(name="first")
+ l = cr2.LinePlot(
+ run1,
+ cr2.sched.SchedLoadAvgSchedGroup,
+ column=['utilization'],
+ filters={
+ "load": [
+ 1,
+ 2]},
+ pivot="cpus",
+ marker='o',
+ linestyle='none',
+ per_line=3)
+ l.view()
+ matplotlib.pyplot.close('all')
+
+ def test_plotter_triplicates(self):
+
+ """Test that plotter handles triplicates fine"""
+
+ with open("trace.txt", "w") as fout:
+ fout.write("""version = 6
+cpus=6
+rcuos/2-22 [001] 0000.018510: sched_load_avg_sg: cpus=00000001 load=0 utilization=0
+rcuos/2-22 [001] 6550.018611: sched_load_avg_sg: cpus=00000002 load=1 utilization=1
+rcuos/2-22 [001] 6550.018611: sched_load_avg_sg: cpus=00000004 load=2 utilization=2
+rcuos/2-22 [001] 6550.018611: sched_load_avg_sg: cpus=00000004 load=2 utilization=2
+rcuos/2-22 [001] 6550.018612: sched_load_avg_sg: cpus=00000001 load=2 utilization=3
+rcuos/2-22 [001] 6550.018624: sched_load_avg_sg: cpus=00000002 load=1 utilization=4
+rcuos/2-22 [001] 6550.018625: sched_load_avg_sg: cpus=00000002 load=2 utilization=5
+rcuos/2-22 [001] 6550.018626: sched_load_avg_sg: cpus=00000002 load=3 utilization=6
+rcuos/2-22 [001] 6550.018627: sched_load_avg_sg: cpus=00000002 load=1 utilization=7
+rcuos/2-22 [001] 6550.018628: sched_load_avg_sg: cpus=00000004 load=2 utilization=8\n""")
+ fout.close()
+
+ run1 = cr2.Run(name="first")
+ l = cr2.LinePlot(
+ run1,
+ cr2.sched.SchedLoadAvgSchedGroup,
+ column=['utilization'],
+ filters={
+ "load": [
+ 1,
+ 2]},
+ pivot="cpus",
+ marker='o',
+ linestyle='none',
+ per_line=3)
+ l.view()
+ matplotlib.pyplot.close('all')