| # Copyright 2016 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Utility functions for parsing pprof, CWP data and Chrome OS groups files.""" |
| |
| from collections import defaultdict |
| |
| import csv |
| import os |
| import re |
| |
| SEPARATOR_REGEX = re.compile(r'-+\+-+') |
| FUNCTION_STATISTIC_REGEX = \ |
| re.compile(r'(\S+)\s+(\S+)%\s+(\S+)%\s+(\S+)\s+(\S+)%') |
| CHILD_FUNCTION_PERCENTAGE_REGEX = re.compile(r'([0-9.]+)%') |
| FUNCTION_KEY_SEPARATOR_REGEX = re.compile(r'\|\s+') |
| # Constants used to identify if a function is common in the pprof and CWP |
| # files. |
| COMMON_FUNCTION = 'common' |
| EXTRA_FUNCTION = 'extra' |
| PARENT_CHILD_FUNCTIONS_SEPARATOR = ';;' |
| # List of pairs of strings used for make substitutions in file names to make |
| # CWP and pprof data consistent. |
| FILE_NAME_REPLACING_PAIR_STRINGS = [('gnawty', 'BOARD'), |
| ('amd64-generic', 'BOARD'), |
| (' ../sysdeps', ',sysdeps'), |
| (' ../nptl', ',nptl'), |
| (' aes-x86_64.s', ',aes-x86_64.s'), |
| (' (inline)', ''), |
| (' (partial-inline)', ''), |
| (' ../', ','), |
| ('../', '')] |
| # Separator used to delimit the function from the file name. |
| FUNCTION_FILE_SEPARATOR = ' /' |
| |
| |
| def MakeCWPAndPprofFileNamesConsistent(file_name): |
| """Makes the CWP and pprof file names consistent. |
| |
| For the same function, it may happen for some file paths to differ slightly |
| in the CWP data compared to the pprof output. In a file name, for each tuple |
| element of the list, we substitute the first element with the second one. |
| |
| Args: |
| file_name: A string representing the name of the file. |
| |
| Returns: |
| A string representing the modified name of tihe file. |
| """ |
| file_name = file_name.replace(', ', '; ') |
| for replacing_pair_string in FILE_NAME_REPLACING_PAIR_STRINGS: |
| file_name = file_name.replace(replacing_pair_string[0], |
| replacing_pair_string[1]) |
| |
| return file_name |
| |
| def MakePprofFunctionKey(function_and_file_name): |
| """Creates the function key from the function and file name. |
| |
| Parsing the the pprof --top and --tree outputs is difficult due to the fact |
| that it hard to extract the function and file name (i.e the function names |
| can have a lot of unexpected charachters such as spaces, operators etc). |
| For the moment, we used FUNCTION_FILE_SEPARATOR as delimiter between the |
| function and the file name. However, there are some cases where the file name |
| does not start with / and we treat this cases separately (i.e ../sysdeps, |
| ../nptl, aes-x86_64.s). |
| |
| Args: |
| function_and_file_name: A string representing the function and the file name |
| as it appears in the pprof output. |
| |
| Returns: |
| A string representing the function key, composed from the function and file |
| name, comma separated. |
| """ |
| # TODO(evelinad): Use pprof --topproto instead of pprof --top to parse |
| # protobuffers instead of text output. Investigate if there is an equivalent |
| # for pprof --tree that gives protobuffer output. |
| # |
| # In the CWP output, we replace the , with ; as a workaround for parsing |
| # csv files. We do the same for the pprof output. |
| # |
| # TODO(evelinad): Use dremel --csv_dialect=excel-tab in the queries for |
| # replacing the , delimiter with tab. |
| function_and_file_name = function_and_file_name.replace(', ', '; ') |
| # If the function and file name sequence contains the FUNCTION_FILE_SEPARATOR, |
| # we normalize the path name of the file and make the string subtitutions |
| # to make the CWP and pprof data consistent. The returned key is composed |
| # from the function name and normalized file path name, separated by a comma. |
| # If the function and file name does not contain the FUNCTION_FILE_SEPARATOR, |
| # we just do the strings substitution. |
| if FUNCTION_FILE_SEPARATOR in function_and_file_name: |
| function_name, file_name = \ |
| function_and_file_name.split(FUNCTION_FILE_SEPARATOR) |
| file_name = \ |
| MakeCWPAndPprofFileNamesConsistent(os.path.normpath("/" + file_name)) |
| return ','.join([function_name, file_name]) |
| |
| return MakeCWPAndPprofFileNamesConsistent(function_and_file_name) |
| |
| |
| def ComputeCWPCummulativeInclusiveStatistics(cwp_inclusive_count_statistics): |
| """Computes the cumulative inclusive count value of a function. |
| |
| A function might appear declared in multiple files or objects. When |
| computing the fraction of the inclusive count value from a child function to |
| the parent function, we take into consideration the sum of the |
| inclusive_count |
| count values from all the ocurences of that function. |
| |
| Args: |
| cwp_inclusive_count_statistics: A dict containing the inclusive count |
| statistics extracted by the ParseCWPInclusiveCountFile method. |
| |
| Returns: |
| A dict having as a ket the name of the function and as a value the sum of |
| the inclusive count values of the occurences of the functions from all |
| the files and objects. |
| """ |
| cwp_inclusive_count_statistics_cumulative = defaultdict(int) |
| |
| for function_key, function_statistics \ |
| in cwp_inclusive_count_statistics.iteritems(): |
| function_name, _ = function_key.split(',') |
| cwp_inclusive_count_statistics_cumulative[function_name] += \ |
| function_statistics[1] |
| |
| return cwp_inclusive_count_statistics_cumulative |
| |
| def ComputeCWPChildFunctionsFractions(cwp_inclusive_count_statistics_cumulative, |
| cwp_pairwise_inclusive_count_statistics): |
| """Computes the fractions of the inclusive count values for child functions. |
| |
| The fraction represents the inclusive count value of a child function over |
| the one of the parent function. |
| |
| Args: |
| cwp_inclusive_count_statistics_cumulative: A dict containing the |
| cumulative inclusive count values of the CWP functions. |
| cwp_pairwise_inclusive_count_statistics: A dict containing the inclusive |
| count statistics for pairs of parent and child functions. The key is the |
| parent function. The value is a dict with the key the name of the child |
| function and the file name, comma separated, and the value is the |
| inclusive count value of the pair of parent and child functions. |
| |
| Returns: |
| A dict containing the inclusive count statistics for pairs of parent |
| and child functions. The key is the parent function. The value is a |
| dict with the key the name of the child function and the file name, |
| comma separated, and the value is the inclusive count fraction of the |
| child function out of the parent function. |
| """ |
| |
| pairwise_inclusive_count_fractions = {} |
| |
| for parent_function_key, child_functions_metrics in \ |
| cwp_pairwise_inclusive_count_statistics.iteritems(): |
| child_functions_fractions = {} |
| parent_function_inclusive_count = \ |
| cwp_inclusive_count_statistics_cumulative.get(parent_function_key, 0.0) |
| |
| if parent_function_key in cwp_inclusive_count_statistics_cumulative: |
| for child_function_key, child_function_inclusive_count \ |
| in child_functions_metrics.iteritems(): |
| child_functions_fractions[child_function_key] = \ |
| child_function_inclusive_count / parent_function_inclusive_count |
| else: |
| for child_function_key, child_function_inclusive_count \ |
| in child_functions_metrics.iteritems(): |
| child_functions_fractions[child_function_key] = 0.0 |
| pairwise_inclusive_count_fractions[parent_function_key] = \ |
| child_functions_fractions |
| |
| return pairwise_inclusive_count_fractions |
| |
| def ParseFunctionGroups(cwp_function_groups_lines): |
| """Parses the contents of the function groups file. |
| |
| Args: |
| cwp_function_groups_lines: A list of the lines contained in the CWP |
| function groups file. A line contains the group name and the file path |
| that describes the group, separated by a space. |
| |
| Returns: |
| A list of tuples containing the group name and the file path. |
| """ |
| # The order of the groups mentioned in the cwp_function_groups file |
| # matters. A function declared in a file will belong to the first |
| # mentioned group that matches its path to the one of the file. |
| # It is possible to have multiple paths that belong to the same group. |
| return [tuple(line.split()) for line in cwp_function_groups_lines] |
| |
| |
| def ParsePprofTopOutput(file_name): |
| """Parses a file that contains the output of the pprof --top command. |
| |
| Args: |
| file_name: The name of the file containing the pprof --top output. |
| |
| Returns: |
| A dict having as a key the name of the function and the file containing |
| the declaration of the function, separated by a comma, and as a value |
| a tuple containing the flat, flat percentage, sum percentage, cummulative |
| and cummulative percentage values. |
| """ |
| |
| pprof_top_statistics = {} |
| |
| # In the pprof top output, the statistics of the functions start from the |
| # 6th line. |
| with open(file_name) as input_file: |
| pprof_top_content = input_file.readlines()[6:] |
| |
| for line in pprof_top_content: |
| function_statistic_match = FUNCTION_STATISTIC_REGEX.search(line) |
| flat, flat_p, sum_p, cum, cum_p = function_statistic_match.groups() |
| flat_p = str(float(flat_p) / 100.0) |
| sum_p = str(float(sum_p) / 100.0) |
| cum_p = str(float(cum_p) / 100.0) |
| lookup_index = function_statistic_match.end() |
| function_and_file_name = line[lookup_index + 2 : -1] |
| key = MakePprofFunctionKey(function_and_file_name) |
| pprof_top_statistics[key] = (flat, flat_p, sum_p, cum, cum_p) |
| return pprof_top_statistics |
| |
| |
| def ParsePprofTreeOutput(file_name): |
| """Parses a file that contains the output of the pprof --tree command. |
| |
| Args: |
| file_name: The name of the file containing the pprof --tree output. |
| |
| Returns: |
| A dict including the statistics for pairs of parent and child functions. |
| The key is the name of the parent function and the file where the |
| function is declared, separated by a comma. The value is a dict having as |
| a key the name of the child function and the file where the function is |
| delcared, comma separated and as a value the percentage of time the |
| parent function spends in the child function. |
| """ |
| |
| # In the pprof output, the statistics of the functions start from the 9th |
| # line. |
| with open(file_name) as input_file: |
| pprof_tree_content = input_file.readlines()[9:] |
| |
| pprof_tree_statistics = defaultdict(lambda: defaultdict(float)) |
| track_child_functions = False |
| |
| # The statistics of a given function, its parent and child functions are |
| # included between two separator marks. |
| # All the parent function statistics are above the line containing the |
| # statistics of the given function. |
| # All the statistics of a child function are below the statistics of the |
| # given function. |
| # The statistics of a parent or a child function contain the calls, calls |
| # percentage, the function name and the file where the function is declared. |
| # The statistics of the given function contain the flat, flat percentage, |
| # sum percentage, cummulative, cummulative percentage, function name and the |
| # name of the file containing the declaration of the function. |
| for line in pprof_tree_content: |
| separator_match = SEPARATOR_REGEX.search(line) |
| |
| if separator_match: |
| track_child_functions = False |
| continue |
| |
| parent_function_statistic_match = FUNCTION_STATISTIC_REGEX.search(line) |
| |
| if parent_function_statistic_match: |
| track_child_functions = True |
| lookup_index = parent_function_statistic_match.end() |
| parent_function_key_match = \ |
| FUNCTION_KEY_SEPARATOR_REGEX.search(line, pos=lookup_index) |
| lookup_index = parent_function_key_match.end() |
| parent_function_key = MakePprofFunctionKey(line[lookup_index:-1]) |
| continue |
| |
| if not track_child_functions: |
| continue |
| |
| child_function_statistic_match = \ |
| CHILD_FUNCTION_PERCENTAGE_REGEX.search(line) |
| child_function_percentage = \ |
| float(child_function_statistic_match.group(1)) |
| lookup_index = child_function_statistic_match.end() |
| child_function_key_match = \ |
| FUNCTION_KEY_SEPARATOR_REGEX.search(line, pos=lookup_index) |
| lookup_index = child_function_key_match.end() |
| child_function_key = MakePprofFunctionKey(line[lookup_index:-1]) |
| |
| pprof_tree_statistics[parent_function_key][child_function_key] += \ |
| child_function_percentage / 100.0 |
| |
| return pprof_tree_statistics |
| |
| |
| def ParseCWPInclusiveCountFile(file_name): |
| """Parses the CWP inclusive count files. |
| |
| A line should contain the name of the function, the file name with the |
| declaration, the inclusive count and inclusive count fraction out of the |
| total extracted inclusive count values. |
| |
| Args: |
| file_name: The file containing the inclusive count values of the CWP |
| functions. |
| |
| Returns: |
| A dict containing the inclusive count statistics. The key is the name of |
| the function and the file name, comma separated. The value represents a |
| tuple with the object name containing the function declaration, the |
| inclusive count and inclusive count fraction values, and a marker to |
| identify if the function is present in one of the benchmark profiles. |
| """ |
| cwp_inclusive_count_statistics = defaultdict(lambda: ('', 0, 0.0, 0)) |
| |
| with open(file_name) as input_file: |
| statistics_reader = csv.DictReader(input_file, delimiter=',') |
| for statistic in statistics_reader: |
| function_name = statistic['function'] |
| file_name = MakeCWPAndPprofFileNamesConsistent( |
| os.path.normpath(statistic['file'])) |
| dso_name = statistic['dso'] |
| inclusive_count = statistic['inclusive_count'] |
| inclusive_count_fraction = statistic['inclusive_count_fraction'] |
| |
| # We ignore the lines that have empty fields(i.e they specify only the |
| # addresses of the functions and the inclusive counts values). |
| if all([ |
| function_name, file_name, dso_name, inclusive_count, |
| inclusive_count_fraction |
| ]): |
| key = '%s,%s' % (function_name, file_name) |
| |
| # There might be situations where a function appears in multiple files |
| # or objects. Such situations can occur when in the Dremel queries there |
| # are not specified the Chrome OS version and the name of the board (i.e |
| # the files can belong to different kernel or library versions). |
| inclusive_count_sum = \ |
| cwp_inclusive_count_statistics[key][1] + int(inclusive_count) |
| inclusive_count_fraction_sum = \ |
| cwp_inclusive_count_statistics[key][2] + \ |
| float(inclusive_count_fraction) |
| |
| # All the functions are initially marked as EXTRA_FUNCTION. |
| value = \ |
| (dso_name, inclusive_count_sum, inclusive_count_fraction_sum, |
| EXTRA_FUNCTION) |
| cwp_inclusive_count_statistics[key] = value |
| |
| return cwp_inclusive_count_statistics |
| |
| |
| def ParseCWPPairwiseInclusiveCountFile(file_name): |
| """Parses the CWP pairwise inclusive count files. |
| |
| A line of the file should contain a pair of a parent and a child function, |
| concatenated by the PARENT_CHILD_FUNCTIONS_SEPARATOR, the name of the file |
| where the child function is declared and the inclusive count fractions of |
| the pair of functions out of the total amount of inclusive count values. |
| |
| Args: |
| file_name: The file containing the pairwise inclusive_count statistics of |
| the |
| CWP functions. |
| |
| Returns: |
| A dict containing the statistics of the parent functions and each of |
| their child functions. The key of the dict is the name of the parent |
| function. The value is a dict having as a key the name of the child |
| function with its file name separated by a ',' and as a value the |
| inclusive count value of the parent-child function pair. |
| """ |
| pairwise_inclusive_count_statistics = defaultdict(lambda: defaultdict(float)) |
| |
| with open(file_name) as input_file: |
| statistics_reader = csv.DictReader(input_file, delimiter=',') |
| |
| for statistic in statistics_reader: |
| parent_function_name, child_function_name = \ |
| statistic['parent_child_functions'].split( |
| PARENT_CHILD_FUNCTIONS_SEPARATOR) |
| child_function_file_name = MakeCWPAndPprofFileNamesConsistent( |
| os.path.normpath(statistic['child_function_file'])) |
| inclusive_count = statistic['inclusive_count'] |
| |
| # There might be situations where a child function appears in |
| # multiple files or objects. Such situations can occur when in the |
| # Dremel queries are not specified the Chrome OS version and the |
| # name of the board (i.e the files can belong to different kernel or |
| # library versions), when the child function is a template function |
| # that is declared in a header file or there are name collisions |
| # between multiple executable objects. |
| # If a pair of child and parent functions appears multiple times, we |
| # add their inclusive count values. |
| child_function_key = ','.join( |
| [child_function_name, child_function_file_name]) |
| pairwise_inclusive_count_statistics[parent_function_name] \ |
| [child_function_key] += float(inclusive_count) |
| |
| return pairwise_inclusive_count_statistics |