blob: 741a1f1f569f9e126f8497bf1abdd64c82a4e93e [file] [log] [blame]
Upstreamcc2ee171970-01-12 13:46:40 +00001/**
2 * @file opannotate.cpp
3 * Implement opannotate utility
4 *
5 * @remark Copyright 2003 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 * @author Philippe Elie
10 */
11
12#include <iostream>
13#include <sstream>
14#include <algorithm>
15#include <iomanip>
16#include <fstream>
17#include <utility>
18
19#include "op_exception.h"
20#include "op_header.h"
21#include "profile.h"
22#include "populate.h"
23#include "op_sample_file.h"
24#include "cverb.h"
25#include "string_manip.h"
26#include "demangle_symbol.h"
27#include "child_reader.h"
28#include "op_file.h"
29#include "file_manip.h"
30#include "arrange_profiles.h"
31#include "opannotate_options.h"
32#include "profile_container.h"
33#include "symbol_sort.h"
34#include "image_errors.h"
35
36using namespace std;
37using namespace options;
38
39namespace {
40
41size_t nr_events;
42
43scoped_ptr<profile_container> samples;
44
45/// how opannotate was invoked
46string cmdline;
47
48/// empty annotation fill string
49string annotation_fill;
50
51/// string used as start / end comment to annotate source
52string const begin_comment("/* ");
53string const in_comment(" * ");
54string const end_comment(" */");
55
56/// field width for the sample count
57unsigned int const count_width = 6;
58
59string get_annotation_fill()
60{
61 string str;
62
63 for (size_t i = 0; i < nr_events; ++i) {
64 str += string(count_width, ' ') + ' ';
65 str += string(percent_width, ' ');
66 }
67
68 for (size_t i = 1; i < nr_events; ++i) {
69 str += " ";
70 }
71
72 str += " :";
73 return str;
74}
75
76
77symbol_entry const * find_symbol(string const & image_name,
78 string const & str_vma)
79{
80 // do not use the bfd equivalent:
81 // - it does not skip space at begin
82 // - we does not need cross architecture compile so the native
83 // strtoull must work, assuming unsigned long long can contain a vma
84 // and on 32/64 bits box bfd_vma is 64 bits
85 bfd_vma vma = strtoull(str_vma.c_str(), NULL, 16);
86
87 return samples->find_symbol(image_name, vma);
88}
89
90
91void output_info(ostream & out)
92{
93 out << begin_comment << '\n';
94
95 out << in_comment << "Command line: " << cmdline << '\n'
96 << in_comment << '\n';
97
98 out << in_comment << "Interpretation of command line:" << '\n';
99
100 if (!assembly) {
101 out << in_comment
102 << "Output annotated source file with samples" << '\n';
103
104 if (options::threshold != 0) {
105 out << in_comment
106 << "Output files where samples count reach "
107 << options::threshold << "% of the samples\n";
108 } else {
109 out << in_comment << "Output all files" << '\n';
110 }
111 } else {
112 out << in_comment
113 << "Output annotated assembly listing with samples"
114 << '\n';
115
116 if (!objdump_params.empty()) {
117 out << in_comment << "Passing the following "
118 "additional arguments to objdump ; \"";
119 for (size_t i = 0 ; i < objdump_params.size() ; ++i)
120 out << objdump_params[i] << " ";
121 out << "\"" << '\n';
122 }
123 }
124
125 out << in_comment << '\n';
126
127 out << in_comment << classes.cpuinfo << endl;
128 if (!classes.event.empty())
129 out << in_comment << classes.event << endl;
130
131 for (size_t i = 0; i < classes.v.size(); ++i)
132 out << in_comment << classes.v[i].longname << endl;
133
134 out << end_comment << '\n';
135}
136
137
138string count_str(count_array_t const & count,
139 count_array_t const & total)
140{
141 ostringstream os;
142 for (size_t i = 0; i < nr_events; ++i) {
143 os << setw(count_width) << count[i] << ' ';
144
145 os << format_percent(op_ratio(count[i], total[i]) * 100.0,
146 percent_int_width, percent_fract_width);
147 }
148 return os.str();
149}
150
151
152string asm_line_annotation(symbol_entry const * last_symbol,
153 string const & value)
154{
155 // do not use the bfd equivalent:
156 // - it does not skip space at begin
157 // - we does not need cross architecture compile so the native
158 // strtoull must work, assuming unsigned long long can contain a vma
159 // and on 32/64 bits box bfd_vma is 64 bits
160 // gcc 2.91.66 workaround
161 bfd_vma vma = 0;
162 vma = strtoull(value.c_str(), NULL, 16);
163
164 string str;
165
166 sample_entry const * sample = samples->find_sample(last_symbol, vma);
167 if (sample) {
168 str += count_str(sample->counts, samples->samples_count());
169 for (size_t i = 1; i < nr_events; ++i)
170 str += " ";
171 str += " :";
172 } else {
173 str = annotation_fill;
174 }
175
176 return str;
177}
178
179
180string symbol_annotation(symbol_entry const * symbol)
181{
182 if (!symbol)
183 return string();
184
185 string annot = count_str(symbol->sample.counts,
186 samples->samples_count());
187 if (annot.empty())
188 return string();
189
190 string const & symname = symbol_names.demangle(symbol->name);
191
192 string str = " ";
193 str += begin_comment + symname + " total: ";
194 str += count_str(symbol->sample.counts, samples->samples_count());
195 str += end_comment;
196 return str;
197}
198
199
200/// return true if this line contains a symbol name in objdump formatting
201/// symbol are on the form 08030434 <symbol_name>: we need to be strict
202/// here to avoid any interpretation of a source line as a symbol line
203bool is_symbol_line(string const & str, string::size_type pos)
204{
205 if (str[pos] != ' ' || str[pos + 1] != '<')
206 return false;
207
208 return str[str.length() - 1] == ':';
209}
210
211
212symbol_entry const * output_objdump_asm_line(symbol_entry const * last_symbol,
213 string const & app_name, string const & str,
214 symbol_collection const & symbols,
215 bool & do_output)
216{
217 // output of objdump is a human readable form and can contain some
218 // ambiguity so this code is dirty. It is also optimized a little bit
219 // so it is difficult to simplify it without breaking something ...
220
221 // line of interest are: "[:space:]*[:xdigit:]?[ :]", the last char of
222 // this regexp dis-ambiguate between a symbol line and an asm line. If
223 // source contain line of this form an ambiguity occur and we rely on
224 // the robustness of this code.
225
226 size_t pos = 0;
227 while (pos < str.length() && isspace(str[pos]))
228 ++pos;
229
230 if (pos == str.length() || !isxdigit(str[pos])) {
231 if (do_output) {
232 cout << annotation_fill << str << '\n';
233 return last_symbol;
234 }
235 }
236
237 while (pos < str.length() && isxdigit(str[pos]))
238 ++pos;
239
240 if (pos == str.length() || (!isspace(str[pos]) && str[pos] != ':')) {
241 if (do_output) {
242 cout << annotation_fill << str << '\n';
243 return last_symbol;
244 }
245 }
246
247 if (is_symbol_line(str, pos)) {
248 last_symbol = find_symbol(app_name, str);
249
250 // ! complexity: linear in number of symbol must use sorted
251 // by address vector and lower_bound ?
252 // Note this use a pointer comparison. It work because symbols
253 // pointer are unique
254 if (find(symbols.begin(), symbols.end(), last_symbol)
255 != symbols.end()) {
256 do_output = true;
257 } else {
258 do_output = false;
259 }
260
261 if (do_output)
262 cout << str << symbol_annotation(last_symbol) << '\n';
263
264 } else {
265 // not a symbol, probably an asm line.
266 if (do_output)
267 cout << asm_line_annotation(last_symbol, str)
268 << str << '\n';
269 }
270
271 return last_symbol;
272}
273
274
275void do_one_output_objdump(symbol_collection const & symbols,
276 string const & app_name, bfd_vma start, bfd_vma end)
277{
278 vector<string> args;
279
280 args.push_back("-d");
281 args.push_back("--no-show-raw-insn");
282 if (source)
283 args.push_back("-S");
284
285 if (start || end != ~(bfd_vma)0) {
286 ostringstream arg1, arg2;
287 arg1 << "--start-address=" << start;
288 arg2 << "--stop-address=" << end;
289 args.push_back(arg1.str());
290 args.push_back(arg2.str());
291 }
292
293 if (!objdump_params.empty()) {
294 for (size_t i = 0 ; i < objdump_params.size() ; ++i)
295 args.push_back(objdump_params[i]);
296 }
297
298 args.push_back(app_name);
299 child_reader reader("objdump", args);
300 if (reader.error()) {
301 cerr << "An error occur during the execution of objdump:\n\n";
302 cerr << reader.error_str() << endl;
303 return;
304 }
305
306 // to filter output of symbols (filter based on command line options)
307 bool do_output = true;
308
309 symbol_entry const * last_symbol = 0;
310 string str;
311 while (reader.getline(str)) {
312 last_symbol = output_objdump_asm_line(last_symbol, app_name,
313 str, symbols, do_output);
314 }
315
316 // objdump always returns SUCCESS so we must rely on the stderr state
317 // of objdump. If objdump error message is cryptic our own error
318 // message will be probably also cryptic
319 ostringstream std_err;
320 ostringstream std_out;
321 reader.get_data(std_out, std_err);
322 if (std_err.str().length()) {
323 cerr << "An error occur during the execution of objdump:\n\n";
324 cerr << std_err.str() << endl;
325 return ;
326 }
327
328 // force error code to be acquired
329 reader.terminate_process();
330
331 // required because if objdump stop by signal all above things suceeed
332 // (signal error message are not output through stdout/stderr)
333 if (reader.error()) {
334 cerr << "An error occur during the execution of objdump:\n\n";
335 cerr << reader.error_str() << endl;
336 return;
337 }
338}
339
340
341void output_objdump_asm(symbol_collection const & symbols,
342 string const & app_name)
343{
344 // this is only an optimisation, we can either filter output by
345 // directly calling objdump and rely on the symbol filtering or
346 // we can call objdump with the right parameter to just disassemble
347 // the needed part. This is a real win only when calling objdump
348 // a medium number of times, I dunno if the used threshold is optimal
349 // but it is a conservative value.
350 size_t const max_objdump_exec = 50;
351 if (symbols.size() <= max_objdump_exec) {
352 symbol_collection::const_iterator cit = symbols.begin();
353 symbol_collection::const_iterator end = symbols.end();
354 for (; cit != end; ++cit) {
355 bfd_vma start = (*cit)->sample.vma;
356 bfd_vma end = start + (*cit)->size;
357 do_one_output_objdump(symbols, app_name, start, end);
358 }
359 } else {
360 do_one_output_objdump(symbols, app_name, 0, ~bfd_vma(0));
361 }
362}
363
364
365bool output_asm(string const & app_name)
366{
367 profile_container::symbol_choice choice;
368 choice.threshold = options::threshold;
369 choice.image_name = app_name;
370 choice.match_image = true;
371 symbol_collection symbols = samples->select_symbols(choice);
372
373 if (!symbols.empty()) {
374 sort_options options;
375 options.add_sort_option(sort_options::sample);
376 options.sort(symbols, false, false);
377
378 output_info(cout);
379
380 output_objdump_asm(symbols, app_name);
381
382 return true;
383 }
384
385 return false;
386}
387
388
389string const source_line_annotation(debug_name_id filename, size_t linenr)
390{
391 string str;
392
393 count_array_t counts = samples->samples_count(filename, linenr);
394 if (!counts.zero()) {
395 str += count_str(counts, samples->samples_count());
396 for (size_t i = 1; i < nr_events; ++i)
397 str += " ";
398 str += " :";
399 } else {
400 str = annotation_fill;
401 }
402
403 return str;
404}
405
406
407string source_symbol_annotation(debug_name_id filename, size_t linenr)
408{
409 symbol_entry const * symbol = samples->find_symbol(filename, linenr);
410
411 return symbol_annotation(symbol);
412}
413
414
415void output_per_file_info(ostream & out, debug_name_id filename,
416 count_array_t const & total_file_count)
417{
418 out << begin_comment << '\n'
419 << in_comment << "Total samples for file : "
420 << '"' << debug_names.name(filename) << '"'
421 << '\n';
422 out << in_comment << '\n' << in_comment
423 << count_str(total_file_count, samples->samples_count())
424 << '\n';
425 out << end_comment << '\n' << '\n';
426}
427
428
429string const line0_info(debug_name_id filename)
430{
431 string annotation = source_line_annotation(filename, 0);
432 if (trim(annotation, " \t:").empty())
433 return string();
434
435 string str = "<credited to line zero> ";
436 str += annotation;
437 return str;
438}
439
440
441void do_output_one_file(ostream & out, istream & in, debug_name_id filename,
442 bool header)
443{
444 count_array_t count = samples->samples_count(filename);
445
446 if (header) {
447 output_per_file_info(out, filename, count);
448 out << line0_info(filename) << '\n';
449 }
450
451
452 if (in) {
453 string str;
454
455 for (size_t linenr = 1 ; getline(in, str) ; ++linenr) {
456 out << source_line_annotation(filename, linenr) << str
457 << source_symbol_annotation(filename, linenr)
458 << '\n';
459 }
460
461 } else {
462 // FIXME : we have no input file : we just outputfooter
463 // so on user can known total nr of samples for this source
464 // later we must add code that iterate through symbol in this
465 // file to output one annotation for each symbol. To do this we
466 // need a select_symbol(filename); in profile_container which
467 // fall back to the implementation in symbol_container
468 // using a lazilly build symbol_map sorted by filename
469 // (necessary functors already exist in symbol_functors.h)
470 }
471
472 if (!header) {
473 output_per_file_info(out, filename, count);
474 out << line0_info(filename) << '\n';
475 }
476}
477
478
479void output_one_file(istream & in, debug_name_id filename,
480 string const & source)
481{
482 if (output_dir.empty()) {
483 do_output_one_file(cout, in, filename, true);
484 return;
485 }
486
487 string const out_file = op_realpath(output_dir + source);
488
489 /* Just because you're paranoid doesn't mean they're not out to
490 * get you ...
491 *
492 * This is just a lame final safety check. If we found the
493 * source, then "source" should be canonical already, and
494 * can't escape from the output dir. We can't use op_realpath()
495 * alone as that needs the file to exist already.
496 *
497 * Let's not complain again if we couldn't find the file anyway.
498 */
499 if (out_file.find("/../") != string::npos) {
500 if (in) {
501 cerr << "refusing to create non-canonical filename "
502 << out_file << endl;
503 }
504 return;
505 } else if (!is_prefix(out_file, output_dir)) {
506 if (in) {
507 cerr << "refusing to create file " << out_file
508 << " outside of output directory " << output_dir
509 << endl;
510 }
511 return;
512 }
513
514 if (is_files_identical(out_file, source)) {
515 cerr << "input and output files are identical: "
516 << out_file << endl;
517 return;
518 }
519
520 if (create_path(out_file.c_str())) {
521 cerr << "unable to create file: "
522 << '"' << op_dirname(out_file) << '"' << endl;
523 return;
524 }
525
526 ofstream out(out_file.c_str());
527 if (!out) {
528 cerr << "unable to open output file "
529 << '"' << out_file << '"' << endl;
530 } else {
531 do_output_one_file(out, in, filename, false);
532 output_info(out);
533 }
534}
535
536
537/* Locate a source file from debug info, which may be relative */
538string const locate_source_file(debug_name_id filename_id)
539{
540 string const origfile = debug_names.name(filename_id);
541 string file = origfile;
542
543 if (file.empty())
544 return file;
545
546 /* Allow absolute paths to be relocated to a different directory */
547 if (file[0] == '/') {
548 vector<string>::const_iterator cit = base_dirs.begin();
549 vector<string>::const_iterator end = base_dirs.end();
550 for (; cit != end; ++cit) {
551 string path = op_realpath(*cit);
552
553 if (is_prefix(file, path)) {
554 file = file.substr(path.length());
555 break;
556 }
557 }
558 }
559
560 vector<string>::const_iterator cit = search_dirs.begin();
561 vector<string>::const_iterator end = search_dirs.end();
562
563 for (; cit != end; ++cit) {
564 string const absfile = op_realpath(*cit + "/" + file);
565
566 if (op_file_readable(absfile)) {
567 return absfile;
568 }
569 }
570
571 /* We didn't find a relocated absolute file, or a relative file,
572 * assume the original is correct, accounting for the
573 * possibility it's relative the cwd
574 */
575 return op_realpath(origfile);
576}
577
578
579void output_source(path_filter const & filter)
580{
581 bool const separate_file = !output_dir.empty();
582
583 if (!separate_file)
584 output_info(cout);
585
586 vector<debug_name_id> filenames =
587 samples->select_filename(options::threshold);
588
589 for (size_t i = 0 ; i < filenames.size() ; ++i) {
590 string const & source = locate_source_file(filenames[i]);
591
592 if (!filter.match(source))
593 continue;
594
595 ifstream in(source.c_str());
596
597 // it is common to have empty filename due to the lack
598 // of debug info (eg _init function) so warn only
599 // if the filename is non empty. The case: no debug
600 // info at all has already been checked.
601 if ((!in) && source.length()) {
602 cerr << "opannotate (warning): unable to open for "
603 "reading: " << source << endl;
604 }
605
606 if (source.length()) {
607 output_one_file(in, filenames[i], source);
608 }
609 }
610}
611
612
613bool annotate_source(list<string> const & images)
614{
615 annotation_fill = get_annotation_fill();
616
617 if (!output_dir.empty()) {
618
619 if (create_path(output_dir.c_str())) {
620 cerr << "unable to create " << output_dir
621 << " directory: " << endl;
622 return false;
623 }
624
625 // Make sure we have an absolute path.
626 output_dir = op_realpath(output_dir);
627 if (output_dir.length() &&
628 output_dir[output_dir.length() - 1] != '/')
629 output_dir += '/';
630
631 /* Don't let the user stomp on their sources */
632 if (output_dir == "/") {
633 cerr << "Output path of / would over-write the "
634 "source files" << endl;
635 return false;
636 }
637 }
638
639 if (assembly) {
640 bool some_output = false;
641
642 list<string>::const_iterator it = images.begin();
643 list<string>::const_iterator const end = images.end();
644
645 for (; it != end; ++it) {
646 if (output_asm(*it)) {
647 some_output = true;
648 }
649 }
650
651 if (!some_output) {
652 // It's the only case we must care since we know the
653 // selected image set is not empty
654 cerr << "selected image set doesn't contain any of "
655 << "the selected symbol\n";
656 }
657 } else {
658 output_source(file_filter);
659 }
660
661 return true;
662}
663
664
665int opannotate(options::spec const & spec)
666{
667 handle_options(spec);
668
669 nr_events = classes.v.size();
670
671 samples.reset(new profile_container(true, true));
672
673 list<string> images;
674
675 list<inverted_profile> iprofiles
676 = invert_profiles(options::archive_path, classes,
677 options::extra_found_images);
678
679 report_image_errors(iprofiles);
680
681 list<inverted_profile>::iterator it = iprofiles.begin();
682 list<inverted_profile>::iterator const end = iprofiles.end();
683
684 bool debug_info = false;
685 for (; it != end; ++it) {
686 bool tmp = false;
687 populate_for_image(options::archive_path, *samples, *it,
688 options::symbol_filter, &tmp);
689 images.push_back(it->image);
690 if (tmp)
691 debug_info = true;
692 }
693
694 if (!debug_info && !options::assembly) {
695 cerr << "no debug information available for any binary "
696 << "selected and --assembly not requested\n";
697 exit(EXIT_FAILURE);
698 }
699
700 annotate_source(images);
701
702 return 0;
703}
704
705} // anonymous namespace
706
707
708int main(int argc, char const * argv[])
709{
710 // set the invocation, for the file headers later
711 for (int i = 0 ; i < argc ; ++i)
712 cmdline += string(argv[i]) + " ";
713
714 return run_pp_tool(argc, argv, opannotate);
715}