| /** |
| * @file opannotate.cpp |
| * Implement opannotate utility |
| * |
| * @remark Copyright 2003 OProfile authors |
| * @remark Read the file COPYING |
| * |
| * @author John Levon |
| * @author Philippe Elie |
| */ |
| |
| #include <iostream> |
| #include <sstream> |
| #include <algorithm> |
| #include <iomanip> |
| #include <fstream> |
| #include <utility> |
| |
| #include "op_exception.h" |
| #include "op_header.h" |
| #include "profile.h" |
| #include "populate.h" |
| #include "op_sample_file.h" |
| #include "cverb.h" |
| #include "string_manip.h" |
| #include "demangle_symbol.h" |
| #include "child_reader.h" |
| #include "op_file.h" |
| #include "file_manip.h" |
| #include "arrange_profiles.h" |
| #include "opannotate_options.h" |
| #include "profile_container.h" |
| #include "symbol_sort.h" |
| #include "image_errors.h" |
| |
| using namespace std; |
| using namespace options; |
| |
| namespace { |
| |
| size_t nr_events; |
| |
| scoped_ptr<profile_container> samples; |
| |
| /// how opannotate was invoked |
| string cmdline; |
| |
| /// empty annotation fill string |
| string annotation_fill; |
| |
| /// string used as start / end comment to annotate source |
| string const begin_comment("/* "); |
| string const in_comment(" * "); |
| string const end_comment(" */"); |
| |
| /// field width for the sample count |
| unsigned int const count_width = 6; |
| |
| string get_annotation_fill() |
| { |
| string str; |
| |
| for (size_t i = 0; i < nr_events; ++i) { |
| str += string(count_width, ' ') + ' '; |
| str += string(percent_width, ' '); |
| } |
| |
| for (size_t i = 1; i < nr_events; ++i) { |
| str += " "; |
| } |
| |
| str += " :"; |
| return str; |
| } |
| |
| |
| symbol_entry const * find_symbol(string const & image_name, |
| string const & str_vma) |
| { |
| // do not use the bfd equivalent: |
| // - it does not skip space at begin |
| // - we does not need cross architecture compile so the native |
| // strtoull must work, assuming unsigned long long can contain a vma |
| // and on 32/64 bits box bfd_vma is 64 bits |
| bfd_vma vma = strtoull(str_vma.c_str(), NULL, 16); |
| |
| return samples->find_symbol(image_name, vma); |
| } |
| |
| |
| void output_info(ostream & out) |
| { |
| out << begin_comment << '\n'; |
| |
| out << in_comment << "Command line: " << cmdline << '\n' |
| << in_comment << '\n'; |
| |
| out << in_comment << "Interpretation of command line:" << '\n'; |
| |
| if (!assembly) { |
| out << in_comment |
| << "Output annotated source file with samples" << '\n'; |
| |
| if (options::threshold != 0) { |
| out << in_comment |
| << "Output files where samples count reach " |
| << options::threshold << "% of the samples\n"; |
| } else { |
| out << in_comment << "Output all files" << '\n'; |
| } |
| } else { |
| out << in_comment |
| << "Output annotated assembly listing with samples" |
| << '\n'; |
| |
| if (!objdump_params.empty()) { |
| out << in_comment << "Passing the following " |
| "additional arguments to objdump ; \""; |
| for (size_t i = 0 ; i < objdump_params.size() ; ++i) |
| out << objdump_params[i] << " "; |
| out << "\"" << '\n'; |
| } |
| } |
| |
| out << in_comment << '\n'; |
| |
| out << in_comment << classes.cpuinfo << endl; |
| if (!classes.event.empty()) |
| out << in_comment << classes.event << endl; |
| |
| for (size_t i = 0; i < classes.v.size(); ++i) |
| out << in_comment << classes.v[i].longname << endl; |
| |
| out << end_comment << '\n'; |
| } |
| |
| |
| string count_str(count_array_t const & count, |
| count_array_t const & total) |
| { |
| ostringstream os; |
| for (size_t i = 0; i < nr_events; ++i) { |
| os << setw(count_width) << count[i] << ' '; |
| |
| os << format_percent(op_ratio(count[i], total[i]) * 100.0, |
| percent_int_width, percent_fract_width); |
| } |
| return os.str(); |
| } |
| |
| |
| string asm_line_annotation(symbol_entry const * last_symbol, |
| string const & value) |
| { |
| // do not use the bfd equivalent: |
| // - it does not skip space at begin |
| // - we does not need cross architecture compile so the native |
| // strtoull must work, assuming unsigned long long can contain a vma |
| // and on 32/64 bits box bfd_vma is 64 bits |
| // gcc 2.91.66 workaround |
| bfd_vma vma = 0; |
| vma = strtoull(value.c_str(), NULL, 16); |
| |
| string str; |
| |
| sample_entry const * sample = samples->find_sample(last_symbol, vma); |
| if (sample) { |
| str += count_str(sample->counts, samples->samples_count()); |
| for (size_t i = 1; i < nr_events; ++i) |
| str += " "; |
| str += " :"; |
| } else { |
| str = annotation_fill; |
| } |
| |
| return str; |
| } |
| |
| |
| string symbol_annotation(symbol_entry const * symbol) |
| { |
| if (!symbol) |
| return string(); |
| |
| string annot = count_str(symbol->sample.counts, |
| samples->samples_count()); |
| if (annot.empty()) |
| return string(); |
| |
| string const & symname = symbol_names.demangle(symbol->name); |
| |
| string str = " "; |
| str += begin_comment + symname + " total: "; |
| str += count_str(symbol->sample.counts, samples->samples_count()); |
| str += end_comment; |
| return str; |
| } |
| |
| |
| /// return true if this line contains a symbol name in objdump formatting |
| /// symbol are on the form 08030434 <symbol_name>: we need to be strict |
| /// here to avoid any interpretation of a source line as a symbol line |
| bool is_symbol_line(string const & str, string::size_type pos) |
| { |
| if (str[pos] != ' ' || str[pos + 1] != '<') |
| return false; |
| |
| return str[str.length() - 1] == ':'; |
| } |
| |
| |
| symbol_entry const * output_objdump_asm_line(symbol_entry const * last_symbol, |
| string const & app_name, string const & str, |
| symbol_collection const & symbols, |
| bool & do_output) |
| { |
| // output of objdump is a human readable form and can contain some |
| // ambiguity so this code is dirty. It is also optimized a little bit |
| // so it is difficult to simplify it without breaking something ... |
| |
| // line of interest are: "[:space:]*[:xdigit:]?[ :]", the last char of |
| // this regexp dis-ambiguate between a symbol line and an asm line. If |
| // source contain line of this form an ambiguity occur and we rely on |
| // the robustness of this code. |
| |
| size_t pos = 0; |
| while (pos < str.length() && isspace(str[pos])) |
| ++pos; |
| |
| if (pos == str.length() || !isxdigit(str[pos])) { |
| if (do_output) { |
| cout << annotation_fill << str << '\n'; |
| return last_symbol; |
| } |
| } |
| |
| while (pos < str.length() && isxdigit(str[pos])) |
| ++pos; |
| |
| if (pos == str.length() || (!isspace(str[pos]) && str[pos] != ':')) { |
| if (do_output) { |
| cout << annotation_fill << str << '\n'; |
| return last_symbol; |
| } |
| } |
| |
| if (is_symbol_line(str, pos)) { |
| last_symbol = find_symbol(app_name, str); |
| |
| // ! complexity: linear in number of symbol must use sorted |
| // by address vector and lower_bound ? |
| // Note this use a pointer comparison. It work because symbols |
| // pointer are unique |
| if (find(symbols.begin(), symbols.end(), last_symbol) |
| != symbols.end()) { |
| do_output = true; |
| } else { |
| do_output = false; |
| } |
| |
| if (do_output) |
| cout << str << symbol_annotation(last_symbol) << '\n'; |
| |
| } else { |
| // not a symbol, probably an asm line. |
| if (do_output) |
| cout << asm_line_annotation(last_symbol, str) |
| << str << '\n'; |
| } |
| |
| return last_symbol; |
| } |
| |
| |
| void do_one_output_objdump(symbol_collection const & symbols, |
| string const & app_name, bfd_vma start, bfd_vma end) |
| { |
| vector<string> args; |
| |
| args.push_back("-d"); |
| args.push_back("--no-show-raw-insn"); |
| if (source) |
| args.push_back("-S"); |
| |
| if (start || end != ~(bfd_vma)0) { |
| ostringstream arg1, arg2; |
| arg1 << "--start-address=" << start; |
| arg2 << "--stop-address=" << end; |
| args.push_back(arg1.str()); |
| args.push_back(arg2.str()); |
| } |
| |
| if (!objdump_params.empty()) { |
| for (size_t i = 0 ; i < objdump_params.size() ; ++i) |
| args.push_back(objdump_params[i]); |
| } |
| |
| args.push_back(app_name); |
| child_reader reader("objdump", args); |
| if (reader.error()) { |
| cerr << "An error occur during the execution of objdump:\n\n"; |
| cerr << reader.error_str() << endl; |
| return; |
| } |
| |
| // to filter output of symbols (filter based on command line options) |
| bool do_output = true; |
| |
| symbol_entry const * last_symbol = 0; |
| string str; |
| while (reader.getline(str)) { |
| last_symbol = output_objdump_asm_line(last_symbol, app_name, |
| str, symbols, do_output); |
| } |
| |
| // objdump always returns SUCCESS so we must rely on the stderr state |
| // of objdump. If objdump error message is cryptic our own error |
| // message will be probably also cryptic |
| ostringstream std_err; |
| ostringstream std_out; |
| reader.get_data(std_out, std_err); |
| if (std_err.str().length()) { |
| cerr << "An error occur during the execution of objdump:\n\n"; |
| cerr << std_err.str() << endl; |
| return ; |
| } |
| |
| // force error code to be acquired |
| reader.terminate_process(); |
| |
| // required because if objdump stop by signal all above things suceeed |
| // (signal error message are not output through stdout/stderr) |
| if (reader.error()) { |
| cerr << "An error occur during the execution of objdump:\n\n"; |
| cerr << reader.error_str() << endl; |
| return; |
| } |
| } |
| |
| |
| void output_objdump_asm(symbol_collection const & symbols, |
| string const & app_name) |
| { |
| // this is only an optimisation, we can either filter output by |
| // directly calling objdump and rely on the symbol filtering or |
| // we can call objdump with the right parameter to just disassemble |
| // the needed part. This is a real win only when calling objdump |
| // a medium number of times, I dunno if the used threshold is optimal |
| // but it is a conservative value. |
| size_t const max_objdump_exec = 50; |
| if (symbols.size() <= max_objdump_exec) { |
| symbol_collection::const_iterator cit = symbols.begin(); |
| symbol_collection::const_iterator end = symbols.end(); |
| for (; cit != end; ++cit) { |
| bfd_vma start = (*cit)->sample.vma; |
| bfd_vma end = start + (*cit)->size; |
| do_one_output_objdump(symbols, app_name, start, end); |
| } |
| } else { |
| do_one_output_objdump(symbols, app_name, 0, ~bfd_vma(0)); |
| } |
| } |
| |
| |
| bool output_asm(string const & app_name) |
| { |
| profile_container::symbol_choice choice; |
| choice.threshold = options::threshold; |
| choice.image_name = app_name; |
| choice.match_image = true; |
| symbol_collection symbols = samples->select_symbols(choice); |
| |
| if (!symbols.empty()) { |
| sort_options options; |
| options.add_sort_option(sort_options::sample); |
| options.sort(symbols, false, false); |
| |
| output_info(cout); |
| |
| output_objdump_asm(symbols, app_name); |
| |
| return true; |
| } |
| |
| return false; |
| } |
| |
| |
| string const source_line_annotation(debug_name_id filename, size_t linenr) |
| { |
| string str; |
| |
| count_array_t counts = samples->samples_count(filename, linenr); |
| if (!counts.zero()) { |
| str += count_str(counts, samples->samples_count()); |
| for (size_t i = 1; i < nr_events; ++i) |
| str += " "; |
| str += " :"; |
| } else { |
| str = annotation_fill; |
| } |
| |
| return str; |
| } |
| |
| |
| string source_symbol_annotation(debug_name_id filename, size_t linenr) |
| { |
| symbol_entry const * symbol = samples->find_symbol(filename, linenr); |
| |
| return symbol_annotation(symbol); |
| } |
| |
| |
| void output_per_file_info(ostream & out, debug_name_id filename, |
| count_array_t const & total_file_count) |
| { |
| out << begin_comment << '\n' |
| << in_comment << "Total samples for file : " |
| << '"' << debug_names.name(filename) << '"' |
| << '\n'; |
| out << in_comment << '\n' << in_comment |
| << count_str(total_file_count, samples->samples_count()) |
| << '\n'; |
| out << end_comment << '\n' << '\n'; |
| } |
| |
| |
| string const line0_info(debug_name_id filename) |
| { |
| string annotation = source_line_annotation(filename, 0); |
| if (trim(annotation, " \t:").empty()) |
| return string(); |
| |
| string str = "<credited to line zero> "; |
| str += annotation; |
| return str; |
| } |
| |
| |
| void do_output_one_file(ostream & out, istream & in, debug_name_id filename, |
| bool header) |
| { |
| count_array_t count = samples->samples_count(filename); |
| |
| if (header) { |
| output_per_file_info(out, filename, count); |
| out << line0_info(filename) << '\n'; |
| } |
| |
| |
| if (in) { |
| string str; |
| |
| for (size_t linenr = 1 ; getline(in, str) ; ++linenr) { |
| out << source_line_annotation(filename, linenr) << str |
| << source_symbol_annotation(filename, linenr) |
| << '\n'; |
| } |
| |
| } else { |
| // FIXME : we have no input file : we just outputfooter |
| // so on user can known total nr of samples for this source |
| // later we must add code that iterate through symbol in this |
| // file to output one annotation for each symbol. To do this we |
| // need a select_symbol(filename); in profile_container which |
| // fall back to the implementation in symbol_container |
| // using a lazilly build symbol_map sorted by filename |
| // (necessary functors already exist in symbol_functors.h) |
| } |
| |
| if (!header) { |
| output_per_file_info(out, filename, count); |
| out << line0_info(filename) << '\n'; |
| } |
| } |
| |
| |
| void output_one_file(istream & in, debug_name_id filename, |
| string const & source) |
| { |
| if (output_dir.empty()) { |
| do_output_one_file(cout, in, filename, true); |
| return; |
| } |
| |
| string const out_file = op_realpath(output_dir + source); |
| |
| /* Just because you're paranoid doesn't mean they're not out to |
| * get you ... |
| * |
| * This is just a lame final safety check. If we found the |
| * source, then "source" should be canonical already, and |
| * can't escape from the output dir. We can't use op_realpath() |
| * alone as that needs the file to exist already. |
| * |
| * Let's not complain again if we couldn't find the file anyway. |
| */ |
| if (out_file.find("/../") != string::npos) { |
| if (in) { |
| cerr << "refusing to create non-canonical filename " |
| << out_file << endl; |
| } |
| return; |
| } else if (!is_prefix(out_file, output_dir)) { |
| if (in) { |
| cerr << "refusing to create file " << out_file |
| << " outside of output directory " << output_dir |
| << endl; |
| } |
| return; |
| } |
| |
| if (is_files_identical(out_file, source)) { |
| cerr << "input and output files are identical: " |
| << out_file << endl; |
| return; |
| } |
| |
| if (create_path(out_file.c_str())) { |
| cerr << "unable to create file: " |
| << '"' << op_dirname(out_file) << '"' << endl; |
| return; |
| } |
| |
| ofstream out(out_file.c_str()); |
| if (!out) { |
| cerr << "unable to open output file " |
| << '"' << out_file << '"' << endl; |
| } else { |
| do_output_one_file(out, in, filename, false); |
| output_info(out); |
| } |
| } |
| |
| |
| /* Locate a source file from debug info, which may be relative */ |
| string const locate_source_file(debug_name_id filename_id) |
| { |
| string const origfile = debug_names.name(filename_id); |
| string file = origfile; |
| |
| if (file.empty()) |
| return file; |
| |
| /* Allow absolute paths to be relocated to a different directory */ |
| if (file[0] == '/') { |
| vector<string>::const_iterator cit = base_dirs.begin(); |
| vector<string>::const_iterator end = base_dirs.end(); |
| for (; cit != end; ++cit) { |
| string path = op_realpath(*cit); |
| |
| if (is_prefix(file, path)) { |
| file = file.substr(path.length()); |
| break; |
| } |
| } |
| } |
| |
| vector<string>::const_iterator cit = search_dirs.begin(); |
| vector<string>::const_iterator end = search_dirs.end(); |
| |
| for (; cit != end; ++cit) { |
| string const absfile = op_realpath(*cit + "/" + file); |
| |
| if (op_file_readable(absfile)) { |
| return absfile; |
| } |
| } |
| |
| /* We didn't find a relocated absolute file, or a relative file, |
| * assume the original is correct, accounting for the |
| * possibility it's relative the cwd |
| */ |
| return op_realpath(origfile); |
| } |
| |
| |
| void output_source(path_filter const & filter) |
| { |
| bool const separate_file = !output_dir.empty(); |
| |
| if (!separate_file) |
| output_info(cout); |
| |
| vector<debug_name_id> filenames = |
| samples->select_filename(options::threshold); |
| |
| for (size_t i = 0 ; i < filenames.size() ; ++i) { |
| string const & source = locate_source_file(filenames[i]); |
| |
| if (!filter.match(source)) |
| continue; |
| |
| ifstream in(source.c_str()); |
| |
| // it is common to have empty filename due to the lack |
| // of debug info (eg _init function) so warn only |
| // if the filename is non empty. The case: no debug |
| // info at all has already been checked. |
| if ((!in) && source.length()) { |
| cerr << "opannotate (warning): unable to open for " |
| "reading: " << source << endl; |
| } |
| |
| if (source.length()) { |
| output_one_file(in, filenames[i], source); |
| } |
| } |
| } |
| |
| |
| bool annotate_source(list<string> const & images) |
| { |
| annotation_fill = get_annotation_fill(); |
| |
| if (!output_dir.empty()) { |
| |
| if (create_path(output_dir.c_str())) { |
| cerr << "unable to create " << output_dir |
| << " directory: " << endl; |
| return false; |
| } |
| |
| // Make sure we have an absolute path. |
| output_dir = op_realpath(output_dir); |
| if (output_dir.length() && |
| output_dir[output_dir.length() - 1] != '/') |
| output_dir += '/'; |
| |
| /* Don't let the user stomp on their sources */ |
| if (output_dir == "/") { |
| cerr << "Output path of / would over-write the " |
| "source files" << endl; |
| return false; |
| } |
| } |
| |
| if (assembly) { |
| bool some_output = false; |
| |
| list<string>::const_iterator it = images.begin(); |
| list<string>::const_iterator const end = images.end(); |
| |
| for (; it != end; ++it) { |
| if (output_asm(*it)) { |
| some_output = true; |
| } |
| } |
| |
| if (!some_output) { |
| // It's the only case we must care since we know the |
| // selected image set is not empty |
| cerr << "selected image set doesn't contain any of " |
| << "the selected symbol\n"; |
| } |
| } else { |
| output_source(file_filter); |
| } |
| |
| return true; |
| } |
| |
| |
| int opannotate(options::spec const & spec) |
| { |
| handle_options(spec); |
| |
| nr_events = classes.v.size(); |
| |
| samples.reset(new profile_container(true, true)); |
| |
| list<string> images; |
| |
| list<inverted_profile> iprofiles |
| = invert_profiles(options::archive_path, classes, |
| options::extra_found_images); |
| |
| report_image_errors(iprofiles); |
| |
| list<inverted_profile>::iterator it = iprofiles.begin(); |
| list<inverted_profile>::iterator const end = iprofiles.end(); |
| |
| bool debug_info = false; |
| for (; it != end; ++it) { |
| bool tmp = false; |
| populate_for_image(options::archive_path, *samples, *it, |
| options::symbol_filter, &tmp); |
| images.push_back(it->image); |
| if (tmp) |
| debug_info = true; |
| } |
| |
| if (!debug_info && !options::assembly) { |
| cerr << "no debug information available for any binary " |
| << "selected and --assembly not requested\n"; |
| exit(EXIT_FAILURE); |
| } |
| |
| annotate_source(images); |
| |
| return 0; |
| } |
| |
| } // anonymous namespace |
| |
| |
| int main(int argc, char const * argv[]) |
| { |
| // set the invocation, for the file headers later |
| for (int i = 0 ; i < argc ; ++i) |
| cmdline += string(argv[i]) + " "; |
| |
| return run_pp_tool(argc, argv, opannotate); |
| } |