blob: 741a1f1f569f9e126f8497bf1abdd64c82a4e93e [file] [log] [blame]
/**
* @file opannotate.cpp
* Implement opannotate utility
*
* @remark Copyright 2003 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon
* @author Philippe Elie
*/
#include <iostream>
#include <sstream>
#include <algorithm>
#include <iomanip>
#include <fstream>
#include <utility>
#include "op_exception.h"
#include "op_header.h"
#include "profile.h"
#include "populate.h"
#include "op_sample_file.h"
#include "cverb.h"
#include "string_manip.h"
#include "demangle_symbol.h"
#include "child_reader.h"
#include "op_file.h"
#include "file_manip.h"
#include "arrange_profiles.h"
#include "opannotate_options.h"
#include "profile_container.h"
#include "symbol_sort.h"
#include "image_errors.h"
using namespace std;
using namespace options;
namespace {
size_t nr_events;
scoped_ptr<profile_container> samples;
/// how opannotate was invoked
string cmdline;
/// empty annotation fill string
string annotation_fill;
/// string used as start / end comment to annotate source
string const begin_comment("/* ");
string const in_comment(" * ");
string const end_comment(" */");
/// field width for the sample count
unsigned int const count_width = 6;
string get_annotation_fill()
{
string str;
for (size_t i = 0; i < nr_events; ++i) {
str += string(count_width, ' ') + ' ';
str += string(percent_width, ' ');
}
for (size_t i = 1; i < nr_events; ++i) {
str += " ";
}
str += " :";
return str;
}
symbol_entry const * find_symbol(string const & image_name,
string const & str_vma)
{
// do not use the bfd equivalent:
// - it does not skip space at begin
// - we does not need cross architecture compile so the native
// strtoull must work, assuming unsigned long long can contain a vma
// and on 32/64 bits box bfd_vma is 64 bits
bfd_vma vma = strtoull(str_vma.c_str(), NULL, 16);
return samples->find_symbol(image_name, vma);
}
void output_info(ostream & out)
{
out << begin_comment << '\n';
out << in_comment << "Command line: " << cmdline << '\n'
<< in_comment << '\n';
out << in_comment << "Interpretation of command line:" << '\n';
if (!assembly) {
out << in_comment
<< "Output annotated source file with samples" << '\n';
if (options::threshold != 0) {
out << in_comment
<< "Output files where samples count reach "
<< options::threshold << "% of the samples\n";
} else {
out << in_comment << "Output all files" << '\n';
}
} else {
out << in_comment
<< "Output annotated assembly listing with samples"
<< '\n';
if (!objdump_params.empty()) {
out << in_comment << "Passing the following "
"additional arguments to objdump ; \"";
for (size_t i = 0 ; i < objdump_params.size() ; ++i)
out << objdump_params[i] << " ";
out << "\"" << '\n';
}
}
out << in_comment << '\n';
out << in_comment << classes.cpuinfo << endl;
if (!classes.event.empty())
out << in_comment << classes.event << endl;
for (size_t i = 0; i < classes.v.size(); ++i)
out << in_comment << classes.v[i].longname << endl;
out << end_comment << '\n';
}
string count_str(count_array_t const & count,
count_array_t const & total)
{
ostringstream os;
for (size_t i = 0; i < nr_events; ++i) {
os << setw(count_width) << count[i] << ' ';
os << format_percent(op_ratio(count[i], total[i]) * 100.0,
percent_int_width, percent_fract_width);
}
return os.str();
}
string asm_line_annotation(symbol_entry const * last_symbol,
string const & value)
{
// do not use the bfd equivalent:
// - it does not skip space at begin
// - we does not need cross architecture compile so the native
// strtoull must work, assuming unsigned long long can contain a vma
// and on 32/64 bits box bfd_vma is 64 bits
// gcc 2.91.66 workaround
bfd_vma vma = 0;
vma = strtoull(value.c_str(), NULL, 16);
string str;
sample_entry const * sample = samples->find_sample(last_symbol, vma);
if (sample) {
str += count_str(sample->counts, samples->samples_count());
for (size_t i = 1; i < nr_events; ++i)
str += " ";
str += " :";
} else {
str = annotation_fill;
}
return str;
}
string symbol_annotation(symbol_entry const * symbol)
{
if (!symbol)
return string();
string annot = count_str(symbol->sample.counts,
samples->samples_count());
if (annot.empty())
return string();
string const & symname = symbol_names.demangle(symbol->name);
string str = " ";
str += begin_comment + symname + " total: ";
str += count_str(symbol->sample.counts, samples->samples_count());
str += end_comment;
return str;
}
/// return true if this line contains a symbol name in objdump formatting
/// symbol are on the form 08030434 <symbol_name>: we need to be strict
/// here to avoid any interpretation of a source line as a symbol line
bool is_symbol_line(string const & str, string::size_type pos)
{
if (str[pos] != ' ' || str[pos + 1] != '<')
return false;
return str[str.length() - 1] == ':';
}
symbol_entry const * output_objdump_asm_line(symbol_entry const * last_symbol,
string const & app_name, string const & str,
symbol_collection const & symbols,
bool & do_output)
{
// output of objdump is a human readable form and can contain some
// ambiguity so this code is dirty. It is also optimized a little bit
// so it is difficult to simplify it without breaking something ...
// line of interest are: "[:space:]*[:xdigit:]?[ :]", the last char of
// this regexp dis-ambiguate between a symbol line and an asm line. If
// source contain line of this form an ambiguity occur and we rely on
// the robustness of this code.
size_t pos = 0;
while (pos < str.length() && isspace(str[pos]))
++pos;
if (pos == str.length() || !isxdigit(str[pos])) {
if (do_output) {
cout << annotation_fill << str << '\n';
return last_symbol;
}
}
while (pos < str.length() && isxdigit(str[pos]))
++pos;
if (pos == str.length() || (!isspace(str[pos]) && str[pos] != ':')) {
if (do_output) {
cout << annotation_fill << str << '\n';
return last_symbol;
}
}
if (is_symbol_line(str, pos)) {
last_symbol = find_symbol(app_name, str);
// ! complexity: linear in number of symbol must use sorted
// by address vector and lower_bound ?
// Note this use a pointer comparison. It work because symbols
// pointer are unique
if (find(symbols.begin(), symbols.end(), last_symbol)
!= symbols.end()) {
do_output = true;
} else {
do_output = false;
}
if (do_output)
cout << str << symbol_annotation(last_symbol) << '\n';
} else {
// not a symbol, probably an asm line.
if (do_output)
cout << asm_line_annotation(last_symbol, str)
<< str << '\n';
}
return last_symbol;
}
void do_one_output_objdump(symbol_collection const & symbols,
string const & app_name, bfd_vma start, bfd_vma end)
{
vector<string> args;
args.push_back("-d");
args.push_back("--no-show-raw-insn");
if (source)
args.push_back("-S");
if (start || end != ~(bfd_vma)0) {
ostringstream arg1, arg2;
arg1 << "--start-address=" << start;
arg2 << "--stop-address=" << end;
args.push_back(arg1.str());
args.push_back(arg2.str());
}
if (!objdump_params.empty()) {
for (size_t i = 0 ; i < objdump_params.size() ; ++i)
args.push_back(objdump_params[i]);
}
args.push_back(app_name);
child_reader reader("objdump", args);
if (reader.error()) {
cerr << "An error occur during the execution of objdump:\n\n";
cerr << reader.error_str() << endl;
return;
}
// to filter output of symbols (filter based on command line options)
bool do_output = true;
symbol_entry const * last_symbol = 0;
string str;
while (reader.getline(str)) {
last_symbol = output_objdump_asm_line(last_symbol, app_name,
str, symbols, do_output);
}
// objdump always returns SUCCESS so we must rely on the stderr state
// of objdump. If objdump error message is cryptic our own error
// message will be probably also cryptic
ostringstream std_err;
ostringstream std_out;
reader.get_data(std_out, std_err);
if (std_err.str().length()) {
cerr << "An error occur during the execution of objdump:\n\n";
cerr << std_err.str() << endl;
return ;
}
// force error code to be acquired
reader.terminate_process();
// required because if objdump stop by signal all above things suceeed
// (signal error message are not output through stdout/stderr)
if (reader.error()) {
cerr << "An error occur during the execution of objdump:\n\n";
cerr << reader.error_str() << endl;
return;
}
}
void output_objdump_asm(symbol_collection const & symbols,
string const & app_name)
{
// this is only an optimisation, we can either filter output by
// directly calling objdump and rely on the symbol filtering or
// we can call objdump with the right parameter to just disassemble
// the needed part. This is a real win only when calling objdump
// a medium number of times, I dunno if the used threshold is optimal
// but it is a conservative value.
size_t const max_objdump_exec = 50;
if (symbols.size() <= max_objdump_exec) {
symbol_collection::const_iterator cit = symbols.begin();
symbol_collection::const_iterator end = symbols.end();
for (; cit != end; ++cit) {
bfd_vma start = (*cit)->sample.vma;
bfd_vma end = start + (*cit)->size;
do_one_output_objdump(symbols, app_name, start, end);
}
} else {
do_one_output_objdump(symbols, app_name, 0, ~bfd_vma(0));
}
}
bool output_asm(string const & app_name)
{
profile_container::symbol_choice choice;
choice.threshold = options::threshold;
choice.image_name = app_name;
choice.match_image = true;
symbol_collection symbols = samples->select_symbols(choice);
if (!symbols.empty()) {
sort_options options;
options.add_sort_option(sort_options::sample);
options.sort(symbols, false, false);
output_info(cout);
output_objdump_asm(symbols, app_name);
return true;
}
return false;
}
string const source_line_annotation(debug_name_id filename, size_t linenr)
{
string str;
count_array_t counts = samples->samples_count(filename, linenr);
if (!counts.zero()) {
str += count_str(counts, samples->samples_count());
for (size_t i = 1; i < nr_events; ++i)
str += " ";
str += " :";
} else {
str = annotation_fill;
}
return str;
}
string source_symbol_annotation(debug_name_id filename, size_t linenr)
{
symbol_entry const * symbol = samples->find_symbol(filename, linenr);
return symbol_annotation(symbol);
}
void output_per_file_info(ostream & out, debug_name_id filename,
count_array_t const & total_file_count)
{
out << begin_comment << '\n'
<< in_comment << "Total samples for file : "
<< '"' << debug_names.name(filename) << '"'
<< '\n';
out << in_comment << '\n' << in_comment
<< count_str(total_file_count, samples->samples_count())
<< '\n';
out << end_comment << '\n' << '\n';
}
string const line0_info(debug_name_id filename)
{
string annotation = source_line_annotation(filename, 0);
if (trim(annotation, " \t:").empty())
return string();
string str = "<credited to line zero> ";
str += annotation;
return str;
}
void do_output_one_file(ostream & out, istream & in, debug_name_id filename,
bool header)
{
count_array_t count = samples->samples_count(filename);
if (header) {
output_per_file_info(out, filename, count);
out << line0_info(filename) << '\n';
}
if (in) {
string str;
for (size_t linenr = 1 ; getline(in, str) ; ++linenr) {
out << source_line_annotation(filename, linenr) << str
<< source_symbol_annotation(filename, linenr)
<< '\n';
}
} else {
// FIXME : we have no input file : we just outputfooter
// so on user can known total nr of samples for this source
// later we must add code that iterate through symbol in this
// file to output one annotation for each symbol. To do this we
// need a select_symbol(filename); in profile_container which
// fall back to the implementation in symbol_container
// using a lazilly build symbol_map sorted by filename
// (necessary functors already exist in symbol_functors.h)
}
if (!header) {
output_per_file_info(out, filename, count);
out << line0_info(filename) << '\n';
}
}
void output_one_file(istream & in, debug_name_id filename,
string const & source)
{
if (output_dir.empty()) {
do_output_one_file(cout, in, filename, true);
return;
}
string const out_file = op_realpath(output_dir + source);
/* Just because you're paranoid doesn't mean they're not out to
* get you ...
*
* This is just a lame final safety check. If we found the
* source, then "source" should be canonical already, and
* can't escape from the output dir. We can't use op_realpath()
* alone as that needs the file to exist already.
*
* Let's not complain again if we couldn't find the file anyway.
*/
if (out_file.find("/../") != string::npos) {
if (in) {
cerr << "refusing to create non-canonical filename "
<< out_file << endl;
}
return;
} else if (!is_prefix(out_file, output_dir)) {
if (in) {
cerr << "refusing to create file " << out_file
<< " outside of output directory " << output_dir
<< endl;
}
return;
}
if (is_files_identical(out_file, source)) {
cerr << "input and output files are identical: "
<< out_file << endl;
return;
}
if (create_path(out_file.c_str())) {
cerr << "unable to create file: "
<< '"' << op_dirname(out_file) << '"' << endl;
return;
}
ofstream out(out_file.c_str());
if (!out) {
cerr << "unable to open output file "
<< '"' << out_file << '"' << endl;
} else {
do_output_one_file(out, in, filename, false);
output_info(out);
}
}
/* Locate a source file from debug info, which may be relative */
string const locate_source_file(debug_name_id filename_id)
{
string const origfile = debug_names.name(filename_id);
string file = origfile;
if (file.empty())
return file;
/* Allow absolute paths to be relocated to a different directory */
if (file[0] == '/') {
vector<string>::const_iterator cit = base_dirs.begin();
vector<string>::const_iterator end = base_dirs.end();
for (; cit != end; ++cit) {
string path = op_realpath(*cit);
if (is_prefix(file, path)) {
file = file.substr(path.length());
break;
}
}
}
vector<string>::const_iterator cit = search_dirs.begin();
vector<string>::const_iterator end = search_dirs.end();
for (; cit != end; ++cit) {
string const absfile = op_realpath(*cit + "/" + file);
if (op_file_readable(absfile)) {
return absfile;
}
}
/* We didn't find a relocated absolute file, or a relative file,
* assume the original is correct, accounting for the
* possibility it's relative the cwd
*/
return op_realpath(origfile);
}
void output_source(path_filter const & filter)
{
bool const separate_file = !output_dir.empty();
if (!separate_file)
output_info(cout);
vector<debug_name_id> filenames =
samples->select_filename(options::threshold);
for (size_t i = 0 ; i < filenames.size() ; ++i) {
string const & source = locate_source_file(filenames[i]);
if (!filter.match(source))
continue;
ifstream in(source.c_str());
// it is common to have empty filename due to the lack
// of debug info (eg _init function) so warn only
// if the filename is non empty. The case: no debug
// info at all has already been checked.
if ((!in) && source.length()) {
cerr << "opannotate (warning): unable to open for "
"reading: " << source << endl;
}
if (source.length()) {
output_one_file(in, filenames[i], source);
}
}
}
bool annotate_source(list<string> const & images)
{
annotation_fill = get_annotation_fill();
if (!output_dir.empty()) {
if (create_path(output_dir.c_str())) {
cerr << "unable to create " << output_dir
<< " directory: " << endl;
return false;
}
// Make sure we have an absolute path.
output_dir = op_realpath(output_dir);
if (output_dir.length() &&
output_dir[output_dir.length() - 1] != '/')
output_dir += '/';
/* Don't let the user stomp on their sources */
if (output_dir == "/") {
cerr << "Output path of / would over-write the "
"source files" << endl;
return false;
}
}
if (assembly) {
bool some_output = false;
list<string>::const_iterator it = images.begin();
list<string>::const_iterator const end = images.end();
for (; it != end; ++it) {
if (output_asm(*it)) {
some_output = true;
}
}
if (!some_output) {
// It's the only case we must care since we know the
// selected image set is not empty
cerr << "selected image set doesn't contain any of "
<< "the selected symbol\n";
}
} else {
output_source(file_filter);
}
return true;
}
int opannotate(options::spec const & spec)
{
handle_options(spec);
nr_events = classes.v.size();
samples.reset(new profile_container(true, true));
list<string> images;
list<inverted_profile> iprofiles
= invert_profiles(options::archive_path, classes,
options::extra_found_images);
report_image_errors(iprofiles);
list<inverted_profile>::iterator it = iprofiles.begin();
list<inverted_profile>::iterator const end = iprofiles.end();
bool debug_info = false;
for (; it != end; ++it) {
bool tmp = false;
populate_for_image(options::archive_path, *samples, *it,
options::symbol_filter, &tmp);
images.push_back(it->image);
if (tmp)
debug_info = true;
}
if (!debug_info && !options::assembly) {
cerr << "no debug information available for any binary "
<< "selected and --assembly not requested\n";
exit(EXIT_FAILURE);
}
annotate_source(images);
return 0;
}
} // anonymous namespace
int main(int argc, char const * argv[])
{
// set the invocation, for the file headers later
for (int i = 0 ; i < argc ; ++i)
cmdline += string(argv[i]) + " ";
return run_pp_tool(argc, argv, opannotate);
}