Blame - trappy/utils.py - platform/external/trappy

blob: a06ff1d1cabbc10d51da885028dd13b45f51cf49 [file] [log] [blame]

Brendan Jackman	e81fdcb	2017-01-04 17:10:29 +0000	[diff] [blame]	1	# Copyright 2015-2017 ARM Limited
Javi Merino	b95a4c5	2015-11-26 11:51:53 +0000	[diff] [blame]	2	#
				3	# Licensed under the Apache License, Version 2.0 (the "License");
				4	# you may not use this file except in compliance with the License.
				5	# You may obtain a copy of the License at
				6	#
				7	# http://www.apache.org/licenses/LICENSE-2.0
				8	#
				9	# Unless required by applicable law or agreed to in writing, software
				10	# distributed under the License is distributed on an "AS IS" BASIS,
				11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	# See the License for the specific language governing permissions and
				13	# limitations under the License.
				14	#
				15
Joel Fernandes	89ce9a0	2017-07-08 13:38:55 -0700	[diff] [blame]	16	import pandas as pd
				17	import numpy as np
				18
Javi Merino	b95a4c5	2015-11-26 11:51:53 +0000	[diff] [blame]	19	"""Generic functions that can be used in multiple places in trappy
				20	"""
				21
				22	def listify(to_select):
				23	"""Utitlity function to handle both single and
				24	list inputs
				25	"""
				26
				27	if not isinstance(to_select, list):
				28	to_select = [to_select]
				29
				30	return to_select
Kapileshwar Singh	6f3c26c	2015-12-06 18:23:13 +0000	[diff] [blame]	31
				32	def handle_duplicate_index(data,
				33	max_delta=0.000001):
				34	"""Handle duplicate values in index
				35
				36	:param data: The timeseries input
				37	:type data: :mod:`pandas.Series`
				38
				39	:param max_delta: Maximum interval adjustment value that
				40	will be added to duplicate indices
				41	:type max_delta: float
				42
				43	Consider the following case where a series needs to be reindexed
				44	to a new index (which can be required when different series need to
				45	be combined and compared):
				46	::
				47
				48	import pandas
				49	values = [0, 1, 2, 3, 4]
				50	index = [0.0, 1.0, 1.0, 6.0, 7.0]
				51	series = pandas.Series(values, index=index)
				52	new_index = [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 7.0]
				53	series.reindex(new_index)
				54
				55	The above code fails with:
				56	::
				57
				58	ValueError: cannot reindex from a duplicate axis
				59
				60	The function :func:`handle_duplicate_axis` changes the duplicate values
				61	to
				62	::
				63
				64	>>> import pandas
				65	>>> from trappy.utils import handle_duplicate_index
				66
				67	>>> values = [0, 1, 2, 3, 4]
				68	index = [0.0, 1.0, 1.0, 6.0, 7.0]
				69	series = pandas.Series(values, index=index)
				70	series = handle_duplicate_index(series)
				71	print series.index.values
				72	>>> [ 0. 1. 1.000001 6. 7. ]
				73
				74	"""
				75
				76	index = data.index
				77	new_index = index.values
				78
				79	dups = index.get_duplicates()
				80
				81	for dup in dups:
				82	# Leave one of the values intact
				83	dup_index_left = index.searchsorted(dup, side="left")
				84	dup_index_right = index.searchsorted(dup, side="right") - 1
				85	num_dups = dup_index_right - dup_index_left + 1
				86
				87	# Calculate delta that needs to be added to each duplicate
				88	# index
Javi Merino	08c8d29	2016-01-14 16:17:26 +0000	[diff] [blame]	89	try:
				90	delta = (index[dup_index_right + 1] - dup) / num_dups
				91	except IndexError:
				92	# dup_index_right + 1 is outside of the series (i.e. the
				93	# dup is at the end of the series).
				94	delta = max_delta
Kapileshwar Singh	6f3c26c	2015-12-06 18:23:13 +0000	[diff] [blame]	95
				96	# Clamp the maximum delta added to max_delta
				97	if delta > max_delta:
				98	delta = max_delta
				99
				100	# Add a delta to the others
				101	dup_index_left += 1
				102	while dup_index_left <= dup_index_right:
				103	new_index[dup_index_left] += delta
				104	delta += delta
				105	dup_index_left += 1
				106
				107	return data.reindex(new_index)
Joel Fernandes	89ce9a0	2017-07-08 13:38:55 -0700	[diff] [blame]	108
Joel Fernandes	49f4c42	2017-07-10 19:33:09 -0700	[diff] [blame]	109	# Iterate fast over all rows in a data frame and apply fn
				110	def apply_callback(df, fn, *kwargs):
				111	iters = df.itertuples()
				112	event_tuple = iters.next()
				113
				114	# Column names beginning with underscore will not be preserved in tuples
				115	# due to constraints on namedtuple field names, so store mappings from
				116	# column name to column number for each trace event.
				117	col_idxs = { name: idx for idx, name in enumerate(['Time'] + df.columns.tolist()) }
				118
				119	while True:
				120	if not event_tuple:
				121	break
				122	event_dict = { col: event_tuple[idx] for col, idx in col_idxs.iteritems() }
				123
				124	if kwargs:
				125	fn(event_dict, kwargs)
				126	else:
				127	fn(event_dict)
				128
				129	event_tuple = next(iters, None)
				130
				131
Joel Fernandes	89ce9a0	2017-07-08 13:38:55 -0700	[diff] [blame]	132	def merge_dfs(pr_df, sec_df, pivot):
				133	# Keep track of last secondary event
				134	pivot_map = {}
				135
				136	# An array accumating dicts with merged data
				137	merged_data = []
				138	def df_fn(data):
				139	# Store the latest secondary info
				140	if data['Time'][0] == 'secondary':
				141	pivot_map[data[pivot]] = data
				142	# Get rid of primary/secondary labels
				143	data['Time'] = data['Time'][1]
				144	return
				145
				146	# Propogate latest secondary info
				147	for key, value in data.iteritems():
				148	if key == pivot:
				149	continue
Joel Fernandes	effd7b5	2017-07-13 21:48:09 -0700	[diff] [blame]	150	# Fast check for if value is nan (faster than np.isnan + try/except)
				151	if value != value and pivot_map.has_key(data[pivot]):
				152	data[key] = pivot_map[data[pivot]][key]
Joel Fernandes	89ce9a0	2017-07-08 13:38:55 -0700	[diff] [blame]	153
				154	# Get rid of primary/secondary labels
				155	data['Time'] = data['Time'][1]
				156	merged_data.append(data)
				157
Joel Fernandes	6d69188	2017-07-15 00:59:27 -0700	[diff] [blame]	158	df = pd.concat([pr_df, sec_df], keys=['primary', 'secondary']).sort_values(by='__line')
Joel Fernandes	49f4c42	2017-07-10 19:33:09 -0700	[diff] [blame]	159	apply_callback(df, df_fn)
Joel Fernandes	89ce9a0	2017-07-08 13:38:55 -0700	[diff] [blame]	160	merged_df = pd.DataFrame.from_dict(merged_data)
				161	merged_df.set_index('Time', inplace=True)
				162
				163	return merged_df