Blame - user_activity_benchmarks/utils.py - platform/external/toolchain-utils

blob: 009b241a537db9160d7832859acaa4b20bdb490a [file] [log] [blame]

Evelina Dumitrescu	c7faa09	2016-09-28 15:13:29 -0700	[diff] [blame]	1	# Copyright 2016 The Chromium OS Authors. All rights reserved.
				2	# Use of this source code is governed by a BSD-style license that can be
				3	# found in the LICENSE file.
				4	"""Utility functions for parsing pprof, CWP data and Chrome OS groups files."""
				5
				6	from collections import defaultdict
				7
				8	import csv
				9	import os
				10	import re
				11
				12	SEPARATOR_REGEX = re.compile(r'-+\+-+')
				13	FUNCTION_STATISTIC_REGEX = \
				14	re.compile(r'(\S+)\s+(\S+)%\s+(\S+)%\s+(\S+)\s+(\S+)%')
				15	CHILD_FUNCTION_PERCENTAGE_REGEX = re.compile(r'([0-9.]+)%')
				16	FUNCTION_KEY_SEPARATOR_REGEX = re.compile(r'\\|\s+')
				17	# Constants used to identify if a function is common in the pprof and CWP
				18	# files.
				19	COMMON_FUNCTION = 'common'
				20	EXTRA_FUNCTION = 'extra'
				21	PARENT_CHILD_FUNCTIONS_SEPARATOR = ';;'
				22	# List of pairs of strings used for make substitutions in file names to make
				23	# CWP and pprof data consistent.
Evelina Dumitrescu	63dce85	2016-10-19 18:13:34 -0700	[diff] [blame]	24	FILE_NAME_REPLACING_PAIR_STRINGS = [('gnawty', 'BOARD'),
				25	('amd64-generic', 'BOARD'),
Evelina Dumitrescu	c7faa09	2016-09-28 15:13:29 -0700	[diff] [blame]	26	(' ../sysdeps', ',sysdeps'),
				27	(' ../nptl', ',nptl'),
				28	(' aes-x86_64.s', ',aes-x86_64.s'),
				29	(' (inline)', ''),
				30	(' (partial-inline)', ''),
				31	(' ../', ','),
				32	('../', '')]
				33	# Separator used to delimit the function from the file name.
				34	FUNCTION_FILE_SEPARATOR = ' /'
				35
				36
				37	def MakeCWPAndPprofFileNamesConsistent(file_name):
				38	"""Makes the CWP and pprof file names consistent.
				39
				40	For the same function, it may happen for some file paths to differ slightly
				41	in the CWP data compared to the pprof output. In a file name, for each tuple
				42	element of the list, we substitute the first element with the second one.
				43
				44	Args:
				45	file_name: A string representing the name of the file.
				46
				47	Returns:
				48	A string representing the modified name of tihe file.
				49	"""
				50	file_name = file_name.replace(', ', '; ')
				51	for replacing_pair_string in FILE_NAME_REPLACING_PAIR_STRINGS:
				52	file_name = file_name.replace(replacing_pair_string[0],
				53	replacing_pair_string[1])
				54
				55	return file_name
				56
				57	def MakePprofFunctionKey(function_and_file_name):
				58	"""Creates the function key from the function and file name.
				59
				60	Parsing the the pprof --top and --tree outputs is difficult due to the fact
				61	that it hard to extract the function and file name (i.e the function names
				62	can have a lot of unexpected charachters such as spaces, operators etc).
				63	For the moment, we used FUNCTION_FILE_SEPARATOR as delimiter between the
				64	function and the file name. However, there are some cases where the file name
				65	does not start with / and we treat this cases separately (i.e ../sysdeps,
				66	../nptl, aes-x86_64.s).
				67
				68	Args:
				69	function_and_file_name: A string representing the function and the file name
				70	as it appears in the pprof output.
				71
				72	Returns:
				73	A string representing the function key, composed from the function and file
				74	name, comma separated.
				75	"""
				76	# TODO(evelinad): Use pprof --topproto instead of pprof --top to parse
				77	# protobuffers instead of text output. Investigate if there is an equivalent
				78	# for pprof --tree that gives protobuffer output.
				79	#
				80	# In the CWP output, we replace the , with ; as a workaround for parsing
				81	# csv files. We do the same for the pprof output.
				82	#
				83	# TODO(evelinad): Use dremel --csv_dialect=excel-tab in the queries for
				84	# replacing the , delimiter with tab.
				85	function_and_file_name = function_and_file_name.replace(', ', '; ')
				86	# If the function and file name sequence contains the FUNCTION_FILE_SEPARATOR,
				87	# we normalize the path name of the file and make the string subtitutions
				88	# to make the CWP and pprof data consistent. The returned key is composed
				89	# from the function name and normalized file path name, separated by a comma.
				90	# If the function and file name does not contain the FUNCTION_FILE_SEPARATOR,
				91	# we just do the strings substitution.
				92	if FUNCTION_FILE_SEPARATOR in function_and_file_name:
				93	function_name, file_name = \
				94	function_and_file_name.split(FUNCTION_FILE_SEPARATOR)
				95	file_name = \
				96	MakeCWPAndPprofFileNamesConsistent(os.path.normpath("/" + file_name))
				97	return ','.join([function_name, file_name])
				98
				99	return MakeCWPAndPprofFileNamesConsistent(function_and_file_name)
				100
Evelina Dumitrescu	731ad07	2016-10-20 16:05:38 -0700	[diff] [blame]	101
				102	def ComputeCWPCummulativeInclusiveStatistics(cwp_inclusive_count_statistics):
				103	"""Computes the cumulative inclusive count value of a function.
				104
				105	A function might appear declared in multiple files or objects. When
				106	computing the fraction of the inclusive count value from a child function to
				107	the parent function, we take into consideration the sum of the
				108	inclusive_count
				109	count values from all the ocurences of that function.
				110
				111	Args:
				112	cwp_inclusive_count_statistics: A dict containing the inclusive count
				113	statistics extracted by the ParseCWPInclusiveCountFile method.
				114
				115	Returns:
				116	A dict having as a ket the name of the function and as a value the sum of
				117	the inclusive count values of the occurences of the functions from all
				118	the files and objects.
				119	"""
				120	cwp_inclusive_count_statistics_cumulative = defaultdict(int)
				121
				122	for function_key, function_statistics \
				123	in cwp_inclusive_count_statistics.iteritems():
				124	function_name, _ = function_key.split(',')
				125	cwp_inclusive_count_statistics_cumulative[function_name] += \
				126	function_statistics[1]
				127
				128	return cwp_inclusive_count_statistics_cumulative
				129
				130	def ComputeCWPChildFunctionsFractions(cwp_inclusive_count_statistics_cumulative,
				131	cwp_pairwise_inclusive_count_statistics):
				132	"""Computes the fractions of the inclusive count values for child functions.
				133
				134	The fraction represents the inclusive count value of a child function over
				135	the one of the parent function.
				136
				137	Args:
				138	cwp_inclusive_count_statistics_cumulative: A dict containing the
				139	cumulative inclusive count values of the CWP functions.
				140	cwp_pairwise_inclusive_count_statistics: A dict containing the inclusive
				141	count statistics for pairs of parent and child functions. The key is the
				142	parent function. The value is a dict with the key the name of the child
				143	function and the file name, comma separated, and the value is the
				144	inclusive count value of the pair of parent and child functions.
				145
				146	Returns:
				147	A dict containing the inclusive count statistics for pairs of parent
				148	and child functions. The key is the parent function. The value is a
				149	dict with the key the name of the child function and the file name,
				150	comma separated, and the value is the inclusive count fraction of the
				151	child function out of the parent function.
				152	"""
				153
				154	pairwise_inclusive_count_fractions = {}
				155
				156	for parent_function_key, child_functions_metrics in \
				157	cwp_pairwise_inclusive_count_statistics.iteritems():
				158	child_functions_fractions = {}
				159	parent_function_inclusive_count = \
				160	cwp_inclusive_count_statistics_cumulative.get(parent_function_key, 0.0)
				161
				162	if parent_function_key in cwp_inclusive_count_statistics_cumulative:
				163	for child_function_key, child_function_inclusive_count \
				164	in child_functions_metrics.iteritems():
				165	child_functions_fractions[child_function_key] = \
				166	child_function_inclusive_count / parent_function_inclusive_count
				167	else:
				168	for child_function_key, child_function_inclusive_count \
				169	in child_functions_metrics.iteritems():
				170	child_functions_fractions[child_function_key] = 0.0
				171	pairwise_inclusive_count_fractions[parent_function_key] = \
				172	child_functions_fractions
				173
				174	return pairwise_inclusive_count_fractions
				175
Evelina Dumitrescu	c7faa09	2016-09-28 15:13:29 -0700	[diff] [blame]	176	def ParseFunctionGroups(cwp_function_groups_lines):
				177	"""Parses the contents of the function groups file.
				178
				179	Args:
				180	cwp_function_groups_lines: A list of the lines contained in the CWP
				181	function groups file. A line contains the group name and the file path
				182	that describes the group, separated by a space.
				183
				184	Returns:
				185	A list of tuples containing the group name and the file path.
				186	"""
				187	# The order of the groups mentioned in the cwp_function_groups file
				188	# matters. A function declared in a file will belong to the first
				189	# mentioned group that matches its path to the one of the file.
				190	# It is possible to have multiple paths that belong to the same group.
				191	return [tuple(line.split()) for line in cwp_function_groups_lines]
				192
				193
				194	def ParsePprofTopOutput(file_name):
				195	"""Parses a file that contains the output of the pprof --top command.
				196
				197	Args:
				198	file_name: The name of the file containing the pprof --top output.
				199
				200	Returns:
				201	A dict having as a key the name of the function and the file containing
				202	the declaration of the function, separated by a comma, and as a value
				203	a tuple containing the flat, flat percentage, sum percentage, cummulative
				204	and cummulative percentage values.
				205	"""
				206
				207	pprof_top_statistics = {}
				208
				209	# In the pprof top output, the statistics of the functions start from the
				210	# 6th line.
				211	with open(file_name) as input_file:
				212	pprof_top_content = input_file.readlines()[6:]
				213
				214	for line in pprof_top_content:
				215	function_statistic_match = FUNCTION_STATISTIC_REGEX.search(line)
				216	flat, flat_p, sum_p, cum, cum_p = function_statistic_match.groups()
				217	flat_p = str(float(flat_p) / 100.0)
				218	sum_p = str(float(sum_p) / 100.0)
				219	cum_p = str(float(cum_p) / 100.0)
				220	lookup_index = function_statistic_match.end()
				221	function_and_file_name = line[lookup_index + 2 : -1]
				222	key = MakePprofFunctionKey(function_and_file_name)
				223	pprof_top_statistics[key] = (flat, flat_p, sum_p, cum, cum_p)
				224	return pprof_top_statistics
				225
				226
				227	def ParsePprofTreeOutput(file_name):
				228	"""Parses a file that contains the output of the pprof --tree command.
				229
				230	Args:
				231	file_name: The name of the file containing the pprof --tree output.
				232
				233	Returns:
				234	A dict including the statistics for pairs of parent and child functions.
				235	The key is the name of the parent function and the file where the
				236	function is declared, separated by a comma. The value is a dict having as
				237	a key the name of the child function and the file where the function is
				238	delcared, comma separated and as a value the percentage of time the
				239	parent function spends in the child function.
				240	"""
				241
				242	# In the pprof output, the statistics of the functions start from the 9th
				243	# line.
				244	with open(file_name) as input_file:
				245	pprof_tree_content = input_file.readlines()[9:]
				246
				247	pprof_tree_statistics = defaultdict(lambda: defaultdict(float))
				248	track_child_functions = False
				249
				250	# The statistics of a given function, its parent and child functions are
				251	# included between two separator marks.
				252	# All the parent function statistics are above the line containing the
				253	# statistics of the given function.
				254	# All the statistics of a child function are below the statistics of the
				255	# given function.
				256	# The statistics of a parent or a child function contain the calls, calls
				257	# percentage, the function name and the file where the function is declared.
				258	# The statistics of the given function contain the flat, flat percentage,
				259	# sum percentage, cummulative, cummulative percentage, function name and the
				260	# name of the file containing the declaration of the function.
				261	for line in pprof_tree_content:
				262	separator_match = SEPARATOR_REGEX.search(line)
				263
				264	if separator_match:
				265	track_child_functions = False
				266	continue
				267
				268	parent_function_statistic_match = FUNCTION_STATISTIC_REGEX.search(line)
				269
				270	if parent_function_statistic_match:
				271	track_child_functions = True
				272	lookup_index = parent_function_statistic_match.end()
				273	parent_function_key_match = \
				274	FUNCTION_KEY_SEPARATOR_REGEX.search(line, pos=lookup_index)
				275	lookup_index = parent_function_key_match.end()
				276	parent_function_key = MakePprofFunctionKey(line[lookup_index:-1])
				277	continue
				278
				279	if not track_child_functions:
				280	continue
				281
				282	child_function_statistic_match = \
				283	CHILD_FUNCTION_PERCENTAGE_REGEX.search(line)
				284	child_function_percentage = \
				285	float(child_function_statistic_match.group(1))
				286	lookup_index = child_function_statistic_match.end()
				287	child_function_key_match = \
				288	FUNCTION_KEY_SEPARATOR_REGEX.search(line, pos=lookup_index)
				289	lookup_index = child_function_key_match.end()
				290	child_function_key = MakePprofFunctionKey(line[lookup_index:-1])
				291
				292	pprof_tree_statistics[parent_function_key][child_function_key] += \
				293	child_function_percentage / 100.0
				294
				295	return pprof_tree_statistics
				296
				297
				298	def ParseCWPInclusiveCountFile(file_name):
				299	"""Parses the CWP inclusive count files.
				300
				301	A line should contain the name of the function, the file name with the
				302	declaration, the inclusive count and inclusive count fraction out of the
				303	total extracted inclusive count values.
				304
				305	Args:
				306	file_name: The file containing the inclusive count values of the CWP
				307	functions.
				308
				309	Returns:
				310	A dict containing the inclusive count statistics. The key is the name of
				311	the function and the file name, comma separated. The value represents a
				312	tuple with the object name containing the function declaration, the
				313	inclusive count and inclusive count fraction values, and a marker to
				314	identify if the function is present in one of the benchmark profiles.
				315	"""
				316	cwp_inclusive_count_statistics = defaultdict(lambda: ('', 0, 0.0, 0))
				317
				318	with open(file_name) as input_file:
				319	statistics_reader = csv.DictReader(input_file, delimiter=',')
				320	for statistic in statistics_reader:
				321	function_name = statistic['function']
				322	file_name = MakeCWPAndPprofFileNamesConsistent(
				323	os.path.normpath(statistic['file']))
				324	dso_name = statistic['dso']
				325	inclusive_count = statistic['inclusive_count']
				326	inclusive_count_fraction = statistic['inclusive_count_fraction']
				327
				328	# We ignore the lines that have empty fields(i.e they specify only the
				329	# addresses of the functions and the inclusive counts values).
				330	if all([
				331	function_name, file_name, dso_name, inclusive_count,
				332	inclusive_count_fraction
				333	]):
				334	key = '%s,%s' % (function_name, file_name)
				335
				336	# There might be situations where a function appears in multiple files
				337	# or objects. Such situations can occur when in the Dremel queries there
				338	# are not specified the Chrome OS version and the name of the board (i.e
				339	# the files can belong to different kernel or library versions).
				340	inclusive_count_sum = \
				341	cwp_inclusive_count_statistics[key][1] + int(inclusive_count)
				342	inclusive_count_fraction_sum = \
				343	cwp_inclusive_count_statistics[key][2] + \
				344	float(inclusive_count_fraction)
				345
				346	# All the functions are initially marked as EXTRA_FUNCTION.
				347	value = \
				348	(dso_name, inclusive_count_sum, inclusive_count_fraction_sum,
				349	EXTRA_FUNCTION)
				350	cwp_inclusive_count_statistics[key] = value
				351
				352	return cwp_inclusive_count_statistics
				353
				354
				355	def ParseCWPPairwiseInclusiveCountFile(file_name):
				356	"""Parses the CWP pairwise inclusive count files.
				357
				358	A line of the file should contain a pair of a parent and a child function,
				359	concatenated by the PARENT_CHILD_FUNCTIONS_SEPARATOR, the name of the file
				360	where the child function is declared and the inclusive count fractions of
				361	the pair of functions out of the total amount of inclusive count values.
				362
				363	Args:
				364	file_name: The file containing the pairwise inclusive_count statistics of
				365	the
				366	CWP functions.
				367
				368	Returns:
				369	A dict containing the statistics of the parent functions and each of
				370	their child functions. The key of the dict is the name of the parent
				371	function. The value is a dict having as a key the name of the child
				372	function with its file name separated by a ',' and as a value the
				373	inclusive count value of the parent-child function pair.
				374	"""
				375	pairwise_inclusive_count_statistics = defaultdict(lambda: defaultdict(float))
				376
				377	with open(file_name) as input_file:
				378	statistics_reader = csv.DictReader(input_file, delimiter=',')
				379
				380	for statistic in statistics_reader:
				381	parent_function_name, child_function_name = \
				382	statistic['parent_child_functions'].split(
				383	PARENT_CHILD_FUNCTIONS_SEPARATOR)
				384	child_function_file_name = MakeCWPAndPprofFileNamesConsistent(
				385	os.path.normpath(statistic['child_function_file']))
				386	inclusive_count = statistic['inclusive_count']
				387
				388	# There might be situations where a child function appears in
				389	# multiple files or objects. Such situations can occur when in the
				390	# Dremel queries are not specified the Chrome OS version and the
				391	# name of the board (i.e the files can belong to different kernel or
				392	# library versions), when the child function is a template function
				393	# that is declared in a header file or there are name collisions
				394	# between multiple executable objects.
				395	# If a pair of child and parent functions appears multiple times, we
				396	# add their inclusive count values.
				397	child_function_key = ','.join(
				398	[child_function_name, child_function_file_name])
				399	pairwise_inclusive_count_statistics[parent_function_name] \
				400	[child_function_key] += float(inclusive_count)
				401
				402	return pairwise_inclusive_count_statistics