libs/utils/analysis: handle events with duplicate timestamps
It might happen that a trace has two (or more) events with the same timestamp.
Pandas does not allow reindexing of duplicate indexes.
For this reason, cluster active signal is built by joining dataframes instead of
creating the dataframe from a dictionary of series we perform a join between
dataframe of active signals.
Also, frequency residency computation does not perform reindexing anymore but
rather a join operation. As a consequence, because series data structures do not
support join, it is necessary to undo what 37a7192 does and keep `cluster_freqs`
as a dataframe instead.
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
diff --git a/libs/utils/analysis/frequency_analysis.py b/libs/utils/analysis/frequency_analysis.py
index 395e2ab..ddf098f 100644
--- a/libs/utils/analysis/frequency_analysis.py
+++ b/libs/utils/analysis/frequency_analysis.py
@@ -368,13 +368,13 @@
logging.warn('Cluster frequency is NOT coherent,'
'cannot compute residency!')
return None
- cluster_freqs = freq_df[freq_df.cpu == _cluster[0]].frequency
+ cluster_freqs = freq_df[freq_df.cpu == _cluster[0]]
# Compute TOTAL Time
time_intervals = cluster_freqs.index[1:] - cluster_freqs.index[:-1]
total_time = pd.DataFrame({
'time': time_intervals,
- 'frequency': [f/1000.0 for f in cluster_freqs.iloc[:-1]]
+ 'frequency': [f/1000.0 for f in cluster_freqs.iloc[:-1].frequency]
})
total_time = total_time.groupby(['frequency']).sum()
@@ -390,15 +390,14 @@
# - freq_active, square wave of the form:
# freq_active[t] == 1 if at time t the frequency is f
# freq_active[t] == 0 otherwise
- available_freqs = sorted(cluster_freqs.unique())
- new_idx = sorted(cluster_freqs.index.tolist() +
- cluster_active.index.tolist())
- cluster_freqs = cluster_freqs.reindex(new_idx, method='ffill')
- cluster_active = cluster_active.reindex(new_idx, method='ffill')
+ available_freqs = sorted(cluster_freqs.frequency.unique())
+ cluster_freqs = cluster_freqs.join(
+ cluster_active.to_frame(name='active'), how='outer')
+ cluster_freqs.fillna(method='ffill', inplace=True)
nonidle_time = []
for f in available_freqs:
- freq_active = cluster_freqs.apply(lambda x: 1 if x == f else 0)
- active_t = cluster_active * freq_active
+ freq_active = cluster_freqs.frequency.apply(lambda x: 1 if x == f else 0)
+ active_t = cluster_freqs.active * freq_active
# Compute total time by integrating the square wave
nonidle_time.append(self._trace.integrate_square_wave(active_t))
diff --git a/libs/utils/trace.py b/libs/utils/trace.py
index 1c4440e..e1a94cb 100644
--- a/libs/utils/trace.py
+++ b/libs/utils/trace.py
@@ -788,11 +788,13 @@
'cannot compute cluster active signal!')
return None
- cpu_active = {}
- for cpu in cluster:
- cpu_active[cpu] = self.getCPUActiveSignal(cpu)
+ active = self.getCPUActiveSignal(cluster[0]).to_frame(name=cluster[0])
+ for cpu in cluster[1:]:
+ active = active.join(
+ self.getCPUActiveSignal(cpu).to_frame(name=cpu),
+ how='outer'
+ )
- active = pd.DataFrame(cpu_active)
active.fillna(method='ffill', inplace=True)
# Cluster active is the OR between the actives on each CPU