Blame - user_activity_benchmarks/utils.py - platform/external/toolchain-utils

blob: ead56df68504f31755f5a9bebd96928e847603ce [file] [log] [blame]

Evelina Dumitrescu	c7faa09	2016-09-28 15:13:29 -0700	[diff] [blame^]	1	# Copyright 2016 The Chromium OS Authors. All rights reserved.
				2	# Use of this source code is governed by a BSD-style license that can be
				3	# found in the LICENSE file.
				4	"""Utility functions for parsing pprof, CWP data and Chrome OS groups files."""
				5
				6	from collections import defaultdict
				7
				8	import csv
				9	import os
				10	import re
				11
				12	SEPARATOR_REGEX = re.compile(r'-+\+-+')
				13	FUNCTION_STATISTIC_REGEX = \
				14	re.compile(r'(\S+)\s+(\S+)%\s+(\S+)%\s+(\S+)\s+(\S+)%')
				15	CHILD_FUNCTION_PERCENTAGE_REGEX = re.compile(r'([0-9.]+)%')
				16	FUNCTION_KEY_SEPARATOR_REGEX = re.compile(r'\\|\s+')
				17	# Constants used to identify if a function is common in the pprof and CWP
				18	# files.
				19	COMMON_FUNCTION = 'common'
				20	EXTRA_FUNCTION = 'extra'
				21	PARENT_CHILD_FUNCTIONS_SEPARATOR = ';;'
				22	# List of pairs of strings used for make substitutions in file names to make
				23	# CWP and pprof data consistent.
				24	FILE_NAME_REPLACING_PAIR_STRINGS = [('/build/gnawty', '/build/BOARD'),
				25	('/build/amd64-generic', '/build/BOARD'),
				26	(' ../sysdeps', ',sysdeps'),
				27	(' ../nptl', ',nptl'),
				28	(' aes-x86_64.s', ',aes-x86_64.s'),
				29	(' (inline)', ''),
				30	(' (partial-inline)', ''),
				31	(' ../', ','),
				32	('../', '')]
				33	# Separator used to delimit the function from the file name.
				34	FUNCTION_FILE_SEPARATOR = ' /'
				35
				36
				37	def MakeCWPAndPprofFileNamesConsistent(file_name):
				38	"""Makes the CWP and pprof file names consistent.
				39
				40	For the same function, it may happen for some file paths to differ slightly
				41	in the CWP data compared to the pprof output. In a file name, for each tuple
				42	element of the list, we substitute the first element with the second one.
				43
				44	Args:
				45	file_name: A string representing the name of the file.
				46
				47	Returns:
				48	A string representing the modified name of tihe file.
				49	"""
				50	file_name = file_name.replace(', ', '; ')
				51	for replacing_pair_string in FILE_NAME_REPLACING_PAIR_STRINGS:
				52	file_name = file_name.replace(replacing_pair_string[0],
				53	replacing_pair_string[1])
				54
				55	return file_name
				56
				57	def MakePprofFunctionKey(function_and_file_name):
				58	"""Creates the function key from the function and file name.
				59
				60	Parsing the the pprof --top and --tree outputs is difficult due to the fact
				61	that it hard to extract the function and file name (i.e the function names
				62	can have a lot of unexpected charachters such as spaces, operators etc).
				63	For the moment, we used FUNCTION_FILE_SEPARATOR as delimiter between the
				64	function and the file name. However, there are some cases where the file name
				65	does not start with / and we treat this cases separately (i.e ../sysdeps,
				66	../nptl, aes-x86_64.s).
				67
				68	Args:
				69	function_and_file_name: A string representing the function and the file name
				70	as it appears in the pprof output.
				71
				72	Returns:
				73	A string representing the function key, composed from the function and file
				74	name, comma separated.
				75	"""
				76	# TODO(evelinad): Use pprof --topproto instead of pprof --top to parse
				77	# protobuffers instead of text output. Investigate if there is an equivalent
				78	# for pprof --tree that gives protobuffer output.
				79	#
				80	# In the CWP output, we replace the , with ; as a workaround for parsing
				81	# csv files. We do the same for the pprof output.
				82	#
				83	# TODO(evelinad): Use dremel --csv_dialect=excel-tab in the queries for
				84	# replacing the , delimiter with tab.
				85	function_and_file_name = function_and_file_name.replace(', ', '; ')
				86	# If the function and file name sequence contains the FUNCTION_FILE_SEPARATOR,
				87	# we normalize the path name of the file and make the string subtitutions
				88	# to make the CWP and pprof data consistent. The returned key is composed
				89	# from the function name and normalized file path name, separated by a comma.
				90	# If the function and file name does not contain the FUNCTION_FILE_SEPARATOR,
				91	# we just do the strings substitution.
				92	if FUNCTION_FILE_SEPARATOR in function_and_file_name:
				93	function_name, file_name = \
				94	function_and_file_name.split(FUNCTION_FILE_SEPARATOR)
				95	file_name = \
				96	MakeCWPAndPprofFileNamesConsistent(os.path.normpath("/" + file_name))
				97	return ','.join([function_name, file_name])
				98
				99	return MakeCWPAndPprofFileNamesConsistent(function_and_file_name)
				100
				101	def ParseFunctionGroups(cwp_function_groups_lines):
				102	"""Parses the contents of the function groups file.
				103
				104	Args:
				105	cwp_function_groups_lines: A list of the lines contained in the CWP
				106	function groups file. A line contains the group name and the file path
				107	that describes the group, separated by a space.
				108
				109	Returns:
				110	A list of tuples containing the group name and the file path.
				111	"""
				112	# The order of the groups mentioned in the cwp_function_groups file
				113	# matters. A function declared in a file will belong to the first
				114	# mentioned group that matches its path to the one of the file.
				115	# It is possible to have multiple paths that belong to the same group.
				116	return [tuple(line.split()) for line in cwp_function_groups_lines]
				117
				118
				119	def ParsePprofTopOutput(file_name):
				120	"""Parses a file that contains the output of the pprof --top command.
				121
				122	Args:
				123	file_name: The name of the file containing the pprof --top output.
				124
				125	Returns:
				126	A dict having as a key the name of the function and the file containing
				127	the declaration of the function, separated by a comma, and as a value
				128	a tuple containing the flat, flat percentage, sum percentage, cummulative
				129	and cummulative percentage values.
				130	"""
				131
				132	pprof_top_statistics = {}
				133
				134	# In the pprof top output, the statistics of the functions start from the
				135	# 6th line.
				136	with open(file_name) as input_file:
				137	pprof_top_content = input_file.readlines()[6:]
				138
				139	for line in pprof_top_content:
				140	function_statistic_match = FUNCTION_STATISTIC_REGEX.search(line)
				141	flat, flat_p, sum_p, cum, cum_p = function_statistic_match.groups()
				142	flat_p = str(float(flat_p) / 100.0)
				143	sum_p = str(float(sum_p) / 100.0)
				144	cum_p = str(float(cum_p) / 100.0)
				145	lookup_index = function_statistic_match.end()
				146	function_and_file_name = line[lookup_index + 2 : -1]
				147	key = MakePprofFunctionKey(function_and_file_name)
				148	pprof_top_statistics[key] = (flat, flat_p, sum_p, cum, cum_p)
				149	return pprof_top_statistics
				150
				151
				152	def ParsePprofTreeOutput(file_name):
				153	"""Parses a file that contains the output of the pprof --tree command.
				154
				155	Args:
				156	file_name: The name of the file containing the pprof --tree output.
				157
				158	Returns:
				159	A dict including the statistics for pairs of parent and child functions.
				160	The key is the name of the parent function and the file where the
				161	function is declared, separated by a comma. The value is a dict having as
				162	a key the name of the child function and the file where the function is
				163	delcared, comma separated and as a value the percentage of time the
				164	parent function spends in the child function.
				165	"""
				166
				167	# In the pprof output, the statistics of the functions start from the 9th
				168	# line.
				169	with open(file_name) as input_file:
				170	pprof_tree_content = input_file.readlines()[9:]
				171
				172	pprof_tree_statistics = defaultdict(lambda: defaultdict(float))
				173	track_child_functions = False
				174
				175	# The statistics of a given function, its parent and child functions are
				176	# included between two separator marks.
				177	# All the parent function statistics are above the line containing the
				178	# statistics of the given function.
				179	# All the statistics of a child function are below the statistics of the
				180	# given function.
				181	# The statistics of a parent or a child function contain the calls, calls
				182	# percentage, the function name and the file where the function is declared.
				183	# The statistics of the given function contain the flat, flat percentage,
				184	# sum percentage, cummulative, cummulative percentage, function name and the
				185	# name of the file containing the declaration of the function.
				186	for line in pprof_tree_content:
				187	separator_match = SEPARATOR_REGEX.search(line)
				188
				189	if separator_match:
				190	track_child_functions = False
				191	continue
				192
				193	parent_function_statistic_match = FUNCTION_STATISTIC_REGEX.search(line)
				194
				195	if parent_function_statistic_match:
				196	track_child_functions = True
				197	lookup_index = parent_function_statistic_match.end()
				198	parent_function_key_match = \
				199	FUNCTION_KEY_SEPARATOR_REGEX.search(line, pos=lookup_index)
				200	lookup_index = parent_function_key_match.end()
				201	parent_function_key = MakePprofFunctionKey(line[lookup_index:-1])
				202	continue
				203
				204	if not track_child_functions:
				205	continue
				206
				207	child_function_statistic_match = \
				208	CHILD_FUNCTION_PERCENTAGE_REGEX.search(line)
				209	child_function_percentage = \
				210	float(child_function_statistic_match.group(1))
				211	lookup_index = child_function_statistic_match.end()
				212	child_function_key_match = \
				213	FUNCTION_KEY_SEPARATOR_REGEX.search(line, pos=lookup_index)
				214	lookup_index = child_function_key_match.end()
				215	child_function_key = MakePprofFunctionKey(line[lookup_index:-1])
				216
				217	pprof_tree_statistics[parent_function_key][child_function_key] += \
				218	child_function_percentage / 100.0
				219
				220	return pprof_tree_statistics
				221
				222
				223	def ParseCWPInclusiveCountFile(file_name):
				224	"""Parses the CWP inclusive count files.
				225
				226	A line should contain the name of the function, the file name with the
				227	declaration, the inclusive count and inclusive count fraction out of the
				228	total extracted inclusive count values.
				229
				230	Args:
				231	file_name: The file containing the inclusive count values of the CWP
				232	functions.
				233
				234	Returns:
				235	A dict containing the inclusive count statistics. The key is the name of
				236	the function and the file name, comma separated. The value represents a
				237	tuple with the object name containing the function declaration, the
				238	inclusive count and inclusive count fraction values, and a marker to
				239	identify if the function is present in one of the benchmark profiles.
				240	"""
				241	cwp_inclusive_count_statistics = defaultdict(lambda: ('', 0, 0.0, 0))
				242
				243	with open(file_name) as input_file:
				244	statistics_reader = csv.DictReader(input_file, delimiter=',')
				245	for statistic in statistics_reader:
				246	function_name = statistic['function']
				247	file_name = MakeCWPAndPprofFileNamesConsistent(
				248	os.path.normpath(statistic['file']))
				249	dso_name = statistic['dso']
				250	inclusive_count = statistic['inclusive_count']
				251	inclusive_count_fraction = statistic['inclusive_count_fraction']
				252
				253	# We ignore the lines that have empty fields(i.e they specify only the
				254	# addresses of the functions and the inclusive counts values).
				255	if all([
				256	function_name, file_name, dso_name, inclusive_count,
				257	inclusive_count_fraction
				258	]):
				259	key = '%s,%s' % (function_name, file_name)
				260
				261	# There might be situations where a function appears in multiple files
				262	# or objects. Such situations can occur when in the Dremel queries there
				263	# are not specified the Chrome OS version and the name of the board (i.e
				264	# the files can belong to different kernel or library versions).
				265	inclusive_count_sum = \
				266	cwp_inclusive_count_statistics[key][1] + int(inclusive_count)
				267	inclusive_count_fraction_sum = \
				268	cwp_inclusive_count_statistics[key][2] + \
				269	float(inclusive_count_fraction)
				270
				271	# All the functions are initially marked as EXTRA_FUNCTION.
				272	value = \
				273	(dso_name, inclusive_count_sum, inclusive_count_fraction_sum,
				274	EXTRA_FUNCTION)
				275	cwp_inclusive_count_statistics[key] = value
				276
				277	return cwp_inclusive_count_statistics
				278
				279
				280	def ParseCWPPairwiseInclusiveCountFile(file_name):
				281	"""Parses the CWP pairwise inclusive count files.
				282
				283	A line of the file should contain a pair of a parent and a child function,
				284	concatenated by the PARENT_CHILD_FUNCTIONS_SEPARATOR, the name of the file
				285	where the child function is declared and the inclusive count fractions of
				286	the pair of functions out of the total amount of inclusive count values.
				287
				288	Args:
				289	file_name: The file containing the pairwise inclusive_count statistics of
				290	the
				291	CWP functions.
				292
				293	Returns:
				294	A dict containing the statistics of the parent functions and each of
				295	their child functions. The key of the dict is the name of the parent
				296	function. The value is a dict having as a key the name of the child
				297	function with its file name separated by a ',' and as a value the
				298	inclusive count value of the parent-child function pair.
				299	"""
				300	pairwise_inclusive_count_statistics = defaultdict(lambda: defaultdict(float))
				301
				302	with open(file_name) as input_file:
				303	statistics_reader = csv.DictReader(input_file, delimiter=',')
				304
				305	for statistic in statistics_reader:
				306	parent_function_name, child_function_name = \
				307	statistic['parent_child_functions'].split(
				308	PARENT_CHILD_FUNCTIONS_SEPARATOR)
				309	child_function_file_name = MakeCWPAndPprofFileNamesConsistent(
				310	os.path.normpath(statistic['child_function_file']))
				311	inclusive_count = statistic['inclusive_count']
				312
				313	# There might be situations where a child function appears in
				314	# multiple files or objects. Such situations can occur when in the
				315	# Dremel queries are not specified the Chrome OS version and the
				316	# name of the board (i.e the files can belong to different kernel or
				317	# library versions), when the child function is a template function
				318	# that is declared in a header file or there are name collisions
				319	# between multiple executable objects.
				320	# If a pair of child and parent functions appears multiple times, we
				321	# add their inclusive count values.
				322	child_function_key = ','.join(
				323	[child_function_name, child_function_file_name])
				324	pairwise_inclusive_count_statistics[parent_function_name] \
				325	[child_function_key] += float(inclusive_count)
				326
				327	return pairwise_inclusive_count_statistics