Handle duplicate Index values in data frame Adding a test case for checking if duplicates are handled properly Change-Id: Ib04be8b414e16e164e93b8beaa33bffd4b40abe7 Signed-off-by: Kapileshwar Singh <kapileshwar.singh@arm.com>

commit: 77d1677f62f77e337057eb332f856faada52604a [log] [tgz]
author: Kapileshwar Singh <kapileshwar.singh@arm.com> Wed Jan 21 18:07:15 2015 +0000
committer: Javi Merino <javi.merino@arm.com> Wed Aug 12 16:03:45 2015 +0100
tree: 2fbbf5707028e02d767f61a7dd31610773fe764b
parent: 1882075d72faef8e7d14e2eab0623c1f97819bef [diff]
diff --git a/cr2/plotter/AttrConf.py b/cr2/plotter/AttrConf.py
index f9bf363..dce7c57 100644
--- a/cr2/plotter/AttrConf.py
+++ b/cr2/plotter/AttrConf.py

@@ -19,6 +19,7 @@
 CONCAT = False
 PIVOT = "__CR2_PIVOT_DEFAULT"
 PIVOT_VAL = "__CR2_DEFAULT_PIVOT_VAL"
+DUPLICATE_VALUE_MAX_DELTA = 0.000001
 
 MPL_STYLE = {
     'axes.axisbelow': True,

diff --git a/cr2/plotter/Constraint.py b/cr2/plotter/Constraint.py
index 9910cdc..1cab9eb 100644
--- a/cr2/plotter/Constraint.py
+++ b/cr2/plotter/Constraint.py

@@ -57,7 +57,19 @@
         self._pivot = pivot
         self._column = column
         self._template = template
-        self.result = self._apply()
+        self._dup_resolved = False
+        self._data = self.populate_data_frame()
+
+        try:
+            self.result = self._apply()
+        except ValueError:
+            if not self._dup_resolved:
+                self._handle_duplicate_index()
+                try:
+                    self.result = self._apply()
+                except:
+                    raise ValueError("Unable to handle duplicates")
+
         self.run_index = run_index
 
     def _apply(self):
@@ -65,7 +77,7 @@
            on the input column.
            Do we need pivot_val?
         """
-        data = self.get_data_frame()
+        data = self._data
         result = {}
 
         try:
@@ -93,12 +105,39 @@
                     criterion = criterion & data[key].map(
                         lambda x: x in self._filters[key])
                     values = values[criterion]
+
             val_series = values[data[self._pivot] == pivot_val]
             result[pivot_val] = val_series
 
         return result
 
-    def get_data_frame(self):
+    def _handle_duplicate_index(self):
+        """Handle duplicate values in index"""
+        data = self._data
+        self._dup_resolved = True
+        index = data.index
+        new_index = index.values
+
+        dups = index.get_duplicates()
+        for dup in dups:
+            # Leave one of the values intact
+            dup_index_left = index.searchsorted(dup, side="left")
+            dup_index_right = index.searchsorted(dup, side="right") - 1
+            num_dups = dup_index_right - dup_index_left + 1
+            delta = (index[dup_index_right + 1] - dup) / num_dups
+
+            if delta > AttrConf.DUPLICATE_VALUE_MAX_DELTA:
+                delta = AttrConf.DUPLICATE_VALUE_MAX_DELTA
+
+            # Add a delta to the others
+            dup_index_left += 1
+            while dup_index_left <= dup_index_right:
+                new_index[dup_index_left] += delta
+                delta += delta
+                dup_index_left += 1
+        self._data = self._data.reindex(new_index)
+
+    def populate_data_frame(self):
         """Return the data frame"""
         data_container = getattr(
             self._cr2_run,
@@ -217,7 +256,6 @@
                     run_idx,
                     self._filters))
 
-
     def get_all_pivots(self):
         """Return a union of the pivot values"""
         pivot_vals = []

diff --git a/tests/test_duplicates.py b/tests/test_duplicates.py
new file mode 100644
index 0000000..fbdc8da
--- /dev/null
+++ b/tests/test_duplicates.py

@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+
+import unittest
+import matplotlib
+import pandas as pd
+import utils_tests
+import cr2
+import shutil
+
+from test_thermal import BaseTestThermal
+
+
+class TestPlotterDupVals(BaseTestThermal):
+
+    """Test Duplicate Entries in plotter"""
+
+    def __init__(self, *args, **kwargs):
+        super(TestPlotterDupVals, self).__init__(*args, **kwargs)
+
+    def test_plotter_duplicates(self):
+        """Test that plotter handles duplicates fine"""
+        with open("trace.txt", "w") as fout:
+            fout.write("""version = 6
+cpus=6
+rcuos/2-22 [001] 0000.018510: sched_load_avg_sg: cpus=00000001 load=0 utilization=0
+rcuos/2-22 [001] 6550.018611: sched_load_avg_sg: cpus=00000002 load=1 utilization=1
+rcuos/2-22 [001] 6550.018611: sched_load_avg_sg: cpus=00000004 load=2 utilization=2
+rcuos/2-22 [001] 6550.018612: sched_load_avg_sg: cpus=00000001 load=2 utilization=3
+rcuos/2-22 [001] 6550.018624: sched_load_avg_sg: cpus=00000002 load=1 utilization=4
+rcuos/2-22 [001] 6550.018625: sched_load_avg_sg: cpus=00000002 load=2 utilization=5
+rcuos/2-22 [001] 6550.018626: sched_load_avg_sg: cpus=00000002 load=3 utilization=6
+rcuos/2-22 [001] 6550.018627: sched_load_avg_sg: cpus=00000002 load=1 utilization=7
+rcuos/2-22 [001] 6550.018628: sched_load_avg_sg: cpus=00000004 load=2 utilization=8\n""")
+            fout.close()
+        run1 = cr2.Run(name="first")
+        l = cr2.LinePlot(
+            run1,
+            cr2.sched.SchedLoadAvgSchedGroup,
+            column=['utilization'],
+            filters={
+                "load": [
+                    1,
+                    2]},
+            pivot="cpus",
+            marker='o',
+            linestyle='none',
+            per_line=3)
+        l.view()
+        matplotlib.pyplot.close('all')
+
+    def test_plotter_triplicates(self):
+
+        """Test that plotter handles triplicates fine"""
+
+        with open("trace.txt", "w") as fout:
+            fout.write("""version = 6
+cpus=6
+rcuos/2-22 [001] 0000.018510: sched_load_avg_sg: cpus=00000001 load=0 utilization=0
+rcuos/2-22 [001] 6550.018611: sched_load_avg_sg: cpus=00000002 load=1 utilization=1
+rcuos/2-22 [001] 6550.018611: sched_load_avg_sg: cpus=00000004 load=2 utilization=2
+rcuos/2-22 [001] 6550.018611: sched_load_avg_sg: cpus=00000004 load=2 utilization=2
+rcuos/2-22 [001] 6550.018612: sched_load_avg_sg: cpus=00000001 load=2 utilization=3
+rcuos/2-22 [001] 6550.018624: sched_load_avg_sg: cpus=00000002 load=1 utilization=4
+rcuos/2-22 [001] 6550.018625: sched_load_avg_sg: cpus=00000002 load=2 utilization=5
+rcuos/2-22 [001] 6550.018626: sched_load_avg_sg: cpus=00000002 load=3 utilization=6
+rcuos/2-22 [001] 6550.018627: sched_load_avg_sg: cpus=00000002 load=1 utilization=7
+rcuos/2-22 [001] 6550.018628: sched_load_avg_sg: cpus=00000004 load=2 utilization=8\n""")
+            fout.close()
+
+        run1 = cr2.Run(name="first")
+        l = cr2.LinePlot(
+            run1,
+            cr2.sched.SchedLoadAvgSchedGroup,
+            column=['utilization'],
+            filters={
+                "load": [
+                    1,
+                    2]},
+            pivot="cpus",
+            marker='o',
+            linestyle='none',
+            per_line=3)
+        l.view()
+        matplotlib.pyplot.close('all')
commit	77d1677f62f77e337057eb332f856faada52604a	[log] [tgz]
author	Kapileshwar Singh <kapileshwar.singh@arm.com>	Wed Jan 21 18:07:15 2015 +0000
committer	Javi Merino <javi.merino@arm.com>	Wed Aug 12 16:03:45 2015 +0100
tree	2fbbf5707028e02d767f61a7dd31610773fe764b
parent	1882075d72faef8e7d14e2eab0623c1f97819bef [diff]