Mike Dodd | 8cfa702 | 2010-11-17 11:12:26 -0800 | [diff] [blame] | 1 | /** |
| 2 | * @file profile_spec.cpp |
| 3 | * Contains a PP profile specification |
| 4 | * |
| 5 | * @remark Copyright 2003 OProfile authors |
| 6 | * @remark Read the file COPYING |
| 7 | * |
| 8 | * @author Philippe Elie |
| 9 | */ |
| 10 | |
| 11 | #include <algorithm> |
| 12 | #include <set> |
| 13 | #include <sstream> |
| 14 | #include <iterator> |
| 15 | #include <iostream> |
| 16 | #include <dirent.h> |
| 17 | |
| 18 | #include "file_manip.h" |
| 19 | #include "op_config.h" |
| 20 | #include "profile_spec.h" |
| 21 | #include "string_manip.h" |
| 22 | #include "glob_filter.h" |
| 23 | #include "locate_images.h" |
| 24 | #include "op_exception.h" |
| 25 | #include "op_header.h" |
| 26 | #include "op_fileio.h" |
| 27 | |
| 28 | using namespace std; |
| 29 | |
| 30 | namespace { |
| 31 | |
| 32 | // PP:3.7, full path, or relative path. If we can't find it, |
| 33 | // we should maintain the original to maintain the wordexp etc. |
| 34 | string const fixup_image_spec(string const & str, extra_images const & extra) |
| 35 | { |
| 36 | // On error find_image_path() return str, so if an occur we will |
| 37 | // use the provided image_name not the fixed one. |
| 38 | image_error error; |
| 39 | return extra.find_image_path(str, error, true); |
| 40 | } |
| 41 | |
| 42 | void fixup_image_spec(vector<string> & images, extra_images const & extra) |
| 43 | { |
| 44 | vector<string>::iterator it = images.begin(); |
| 45 | vector<string>::iterator const end = images.end(); |
| 46 | |
| 47 | for (; it != end; ++it) |
| 48 | *it = fixup_image_spec(*it, extra); |
| 49 | } |
| 50 | |
| 51 | } // anon namespace |
| 52 | |
| 53 | |
| 54 | profile_spec::profile_spec() |
| 55 | : |
| 56 | extra_found_images() |
| 57 | { |
| 58 | parse_table["archive"] = &profile_spec::parse_archive_path; |
| 59 | parse_table["session"] = &profile_spec::parse_session; |
| 60 | parse_table["session-exclude"] = |
| 61 | &profile_spec::parse_session_exclude; |
| 62 | parse_table["image"] = &profile_spec::parse_image; |
| 63 | parse_table["image-exclude"] = &profile_spec::parse_image_exclude; |
| 64 | parse_table["lib-image"] = &profile_spec::parse_lib_image; |
| 65 | parse_table["event"] = &profile_spec::parse_event; |
| 66 | parse_table["count"] = &profile_spec::parse_count; |
| 67 | parse_table["unit-mask"] = &profile_spec::parse_unitmask; |
| 68 | parse_table["tid"] = &profile_spec::parse_tid; |
| 69 | parse_table["tgid"] = &profile_spec::parse_tgid; |
| 70 | parse_table["cpu"] = &profile_spec::parse_cpu; |
| 71 | } |
| 72 | |
| 73 | |
| 74 | void profile_spec::parse(string const & tag_value) |
| 75 | { |
| 76 | string value; |
| 77 | action_t action = get_handler(tag_value, value); |
| 78 | if (!action) { |
| 79 | throw invalid_argument("profile_spec::parse(): not " |
| 80 | "a valid tag \"" + tag_value + "\""); |
| 81 | } |
| 82 | |
| 83 | (this->*action)(value); |
| 84 | } |
| 85 | |
| 86 | |
| 87 | bool profile_spec::is_valid_tag(string const & tag_value) |
| 88 | { |
| 89 | string value; |
| 90 | return get_handler(tag_value, value); |
| 91 | } |
| 92 | |
| 93 | |
| 94 | void profile_spec::set_image_or_lib_name(string const & str) |
| 95 | { |
| 96 | /* FIXME: what does spec say about this being allowed to be |
| 97 | * a comma list or not ? */ |
| 98 | image_or_lib_image.push_back(fixup_image_spec(str, extra_found_images)); |
| 99 | } |
| 100 | |
| 101 | |
| 102 | void profile_spec::parse_archive_path(string const & str) |
| 103 | { |
| 104 | archive_path = op_realpath(str); |
| 105 | } |
| 106 | |
| 107 | |
| 108 | string profile_spec::get_archive_path() const |
| 109 | { |
| 110 | return archive_path; |
| 111 | } |
| 112 | |
| 113 | |
| 114 | void profile_spec::parse_session(string const & str) |
| 115 | { |
| 116 | session = separate_token(str, ','); |
| 117 | } |
| 118 | |
| 119 | |
| 120 | void profile_spec::parse_session_exclude(string const & str) |
| 121 | { |
| 122 | session_exclude = separate_token(str, ','); |
| 123 | } |
| 124 | |
| 125 | |
| 126 | void profile_spec::parse_image(string const & str) |
| 127 | { |
| 128 | image = separate_token(str, ','); |
| 129 | fixup_image_spec(image, extra_found_images); |
| 130 | } |
| 131 | |
| 132 | |
| 133 | void profile_spec::parse_image_exclude(string const & str) |
| 134 | { |
| 135 | image_exclude = separate_token(str, ','); |
| 136 | fixup_image_spec(image_exclude, extra_found_images); |
| 137 | } |
| 138 | |
| 139 | |
| 140 | void profile_spec::parse_lib_image(string const & str) |
| 141 | { |
| 142 | lib_image = separate_token(str, ','); |
| 143 | fixup_image_spec(lib_image, extra_found_images); |
| 144 | } |
| 145 | |
| 146 | |
| 147 | void profile_spec::parse_event(string const & str) |
| 148 | { |
| 149 | event.set(str); |
| 150 | } |
| 151 | |
| 152 | |
| 153 | void profile_spec::parse_count(string const & str) |
| 154 | { |
| 155 | count.set(str); |
| 156 | } |
| 157 | |
| 158 | |
| 159 | void profile_spec::parse_unitmask(string const & str) |
| 160 | { |
| 161 | unitmask.set(str); |
| 162 | } |
| 163 | |
| 164 | |
| 165 | void profile_spec::parse_tid(string const & str) |
| 166 | { |
| 167 | tid.set(str); |
| 168 | } |
| 169 | |
| 170 | |
| 171 | void profile_spec::parse_tgid(string const & str) |
| 172 | { |
| 173 | tgid.set(str); |
| 174 | } |
| 175 | |
| 176 | |
| 177 | void profile_spec::parse_cpu(string const & str) |
| 178 | { |
| 179 | cpu.set(str); |
| 180 | } |
| 181 | |
| 182 | |
| 183 | profile_spec::action_t |
| 184 | profile_spec::get_handler(string const & tag_value, string & value) |
| 185 | { |
| 186 | string::size_type pos = tag_value.find_first_of(':'); |
| 187 | if (pos == string::npos) |
| 188 | return 0; |
| 189 | |
| 190 | string tag(tag_value.substr(0, pos)); |
| 191 | value = tag_value.substr(pos + 1); |
| 192 | |
| 193 | parse_table_t::const_iterator it = parse_table.find(tag); |
| 194 | if (it == parse_table.end()) |
| 195 | return 0; |
| 196 | |
| 197 | return it->second; |
| 198 | } |
| 199 | |
| 200 | |
| 201 | namespace { |
| 202 | |
| 203 | /// return true if the value from the profile spec may match the comma |
| 204 | /// list |
| 205 | template<typename T> |
| 206 | bool comma_match(comma_list<T> const & cl, generic_spec<T> const & value) |
| 207 | { |
| 208 | // if the profile spec is "all" we match the sample file |
| 209 | if (!cl.is_set()) |
| 210 | return true; |
| 211 | |
| 212 | // an "all" sample file should never match specified profile |
| 213 | // spec values |
| 214 | if (!value.is_set()) |
| 215 | return false; |
| 216 | |
| 217 | // now match each profile spec value against the sample file |
| 218 | return cl.match(value.value()); |
| 219 | } |
| 220 | |
| 221 | } |
| 222 | |
| 223 | |
| 224 | bool profile_spec::match(filename_spec const & spec) const |
| 225 | { |
| 226 | bool matched_by_image_or_lib_image = false; |
| 227 | |
| 228 | // We need the true image name not the one based on the sample |
| 229 | // filename for the benefit of module which have /oprofile in their |
| 230 | // sample filename. This allow to specify profile spec based on the |
| 231 | // real name of the image, e.g. 'binary:*oprofile.ko' |
| 232 | string simage = fixup_image_spec(spec.image, extra_found_images); |
| 233 | string slib_image = fixup_image_spec(spec.lib_image, |
| 234 | extra_found_images); |
| 235 | |
| 236 | // PP:3.19 |
| 237 | if (!image_or_lib_image.empty()) { |
| 238 | glob_filter filter(image_or_lib_image, image_exclude); |
| 239 | if (filter.match(simage) || filter.match(slib_image)) |
| 240 | matched_by_image_or_lib_image = true; |
| 241 | } |
| 242 | |
| 243 | if (!matched_by_image_or_lib_image) { |
| 244 | // PP:3.7 3.8 |
| 245 | if (!image.empty()) { |
| 246 | glob_filter filter(image, image_exclude); |
| 247 | if (!filter.match(simage)) |
| 248 | return false; |
| 249 | } else if (!image_or_lib_image.empty()) { |
| 250 | // image.empty() means match all except if user |
| 251 | // specified image_or_lib_image |
| 252 | return false; |
| 253 | } |
| 254 | |
| 255 | // PP:3.9 3.10 |
| 256 | if (!lib_image.empty()) { |
| 257 | glob_filter filter(lib_image, image_exclude); |
| 258 | if (!filter.match(slib_image)) |
| 259 | return false; |
| 260 | } else if (image.empty() && !image_or_lib_image.empty()) { |
| 261 | // lib_image empty means match all except if user |
| 262 | // specified image_or_lib_image *or* we already |
| 263 | // matched this spec through image |
| 264 | return false; |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | if (!matched_by_image_or_lib_image) { |
| 269 | // if we don't match by image_or_lib_image we must try to |
| 270 | // exclude from spec, exclusion from image_or_lib_image has |
| 271 | // been handled above |
| 272 | vector<string> empty; |
| 273 | glob_filter filter(empty, image_exclude); |
| 274 | if (!filter.match(simage)) |
| 275 | return false; |
| 276 | if (!spec.lib_image.empty() && !filter.match(slib_image)) |
| 277 | return false; |
| 278 | } |
| 279 | |
| 280 | if (!event.match(spec.event)) |
| 281 | return false; |
| 282 | |
| 283 | if (!count.match(spec.count)) |
| 284 | return false; |
| 285 | |
| 286 | if (!unitmask.match(spec.unitmask)) |
| 287 | return false; |
| 288 | |
| 289 | if (!comma_match(cpu, spec.cpu)) |
| 290 | return false; |
| 291 | |
| 292 | if (!comma_match(tid, spec.tid)) |
| 293 | return false; |
| 294 | |
| 295 | if (!comma_match(tgid, spec.tgid)) |
| 296 | return false; |
| 297 | |
| 298 | return true; |
| 299 | } |
| 300 | |
| 301 | |
| 302 | profile_spec profile_spec::create(list<string> const & args, |
| 303 | vector<string> const & image_path, |
| 304 | string const & root_path) |
| 305 | { |
| 306 | profile_spec spec; |
| 307 | set<string> tag_seen; |
| 308 | vector<string> temp_image_or_lib; |
| 309 | |
| 310 | list<string>::const_iterator it = args.begin(); |
| 311 | list<string>::const_iterator end = args.end(); |
| 312 | |
| 313 | for (; it != end; ++it) { |
| 314 | if (spec.is_valid_tag(*it)) { |
| 315 | if (tag_seen.find(*it) != tag_seen.end()) { |
| 316 | throw op_runtime_error("tag specified " |
| 317 | "more than once: " + *it); |
| 318 | } |
| 319 | tag_seen.insert(*it); |
| 320 | spec.parse(*it); |
| 321 | } else { |
| 322 | string const file = op_realpath(*it); |
| 323 | temp_image_or_lib.push_back(file); |
| 324 | } |
| 325 | } |
| 326 | |
| 327 | // PP:3.5 no session given means use the current session. |
| 328 | if (spec.session.empty()) |
| 329 | spec.session.push_back("current"); |
| 330 | |
| 331 | bool ok = true; |
| 332 | vector<string>::const_iterator ip_it = image_path.begin(); |
| 333 | for ( ; ip_it != image_path.end(); ++ip_it) { |
| 334 | if (!is_directory(spec.get_archive_path() + "/" + *ip_it)) { |
| 335 | cerr << spec.get_archive_path() + "/" + *ip_it << " isn't a valid directory\n"; |
| 336 | ok = false; |
| 337 | } |
| 338 | } |
| 339 | if (!ok) |
| 340 | throw op_runtime_error("invalid --image-path= options"); |
| 341 | |
| 342 | spec.extra_found_images.populate(image_path, spec.get_archive_path(), |
| 343 | root_path); |
| 344 | vector<string>::const_iterator im = temp_image_or_lib.begin(); |
| 345 | vector<string>::const_iterator last = temp_image_or_lib.end(); |
| 346 | for (; im != last; ++im) |
| 347 | spec.set_image_or_lib_name(*im); |
| 348 | |
| 349 | return spec; |
| 350 | } |
| 351 | |
| 352 | namespace { |
| 353 | |
| 354 | vector<string> filter_session(vector<string> const & session, |
| 355 | vector<string> const & session_exclude) |
| 356 | { |
| 357 | vector<string> result(session); |
| 358 | |
| 359 | if (result.empty()) |
| 360 | result.push_back("current"); |
| 361 | |
| 362 | for (size_t i = 0 ; i < session_exclude.size() ; ++i) { |
| 363 | // FIXME: would we use fnmatch on each item, are we allowed |
| 364 | // to --session=current* ? |
| 365 | vector<string>::iterator it = |
| 366 | find(result.begin(), result.end(), session_exclude[i]); |
| 367 | |
| 368 | if (it != result.end()) |
| 369 | result.erase(it); |
| 370 | } |
| 371 | |
| 372 | return result; |
| 373 | } |
| 374 | |
| 375 | static bool invalid_sample_file; |
| 376 | bool valid_candidate(string const & base_dir, string const & filename, |
| 377 | profile_spec const & spec, bool exclude_dependent, |
| 378 | bool exclude_cg) |
| 379 | { |
| 380 | if (exclude_cg && filename.find("{cg}") != string::npos) |
| 381 | return false; |
| 382 | |
| 383 | // strip out non sample files |
| 384 | string const & sub = filename.substr(base_dir.size(), string::npos); |
| 385 | if (!is_prefix(sub, "/{root}/") && !is_prefix(sub, "/{kern}/")) |
| 386 | return false; |
| 387 | |
| 388 | /* When overflows occur in the oprofile kernel driver's sample |
| 389 | * buffers (caused by too high of a sampling rate), it's possible |
| 390 | * for samples to be mis-attributed. A common scenario is that, |
| 391 | * while profiling process 'abc' running binary 'xzy', the task |
| 392 | * switch for 'abc' gets dropped somehow. Then, samples are taken |
| 393 | * for the 'xyz' binary. In the attempt to attribute the samples to |
| 394 | * the associated binary, the oprofile kernel code examines the |
| 395 | * the memory mappings for the last process for which it recorded |
| 396 | * a task switch. When profiling at a very high rate, the oprofile |
| 397 | * daemon is often the process that is mistakenly examined. Then the |
| 398 | * sample from binary 'xyz' is matched to some file that's open in |
| 399 | * oprofiled's memory space. Because oprofiled has many sample files |
| 400 | * open at any given time, there's a good chance the sample's VMA is |
| 401 | * contained within one of those sample files. So, once finding this |
| 402 | * bogus match, the oprofile kernel records a cookie switch for the |
| 403 | * sample file. This scenario is made even more likely if a high |
| 404 | * sampling rate (e.g., profiling on several events) is paired with |
| 405 | * callgraph data collection. |
| 406 | * |
| 407 | * When the daemon processes this sample data from the kernel, it |
| 408 | * creates a sample file for the sample file, resulting in something |
| 409 | * of the form: |
| 410 | * <session-dir>/[blah]<session-dir>/[blah] |
| 411 | * |
| 412 | * When the sample data is post-processed, the sample file is parsed to |
| 413 | * try to determine the name of the binary, but it gets horribly confused. |
| 414 | * At best, the post-processing tool will spit out some warning messages, |
| 415 | * such as: |
| 416 | * warning: |
| 417 | * /lib64/libdl-2.9.so/CYCLES.10000.0.all.all.all/{dep}/{root}/var/lib/oprofile/samples/current/{root}/lib64/libdl-2.9.so/{dep}/{root}/lib64/libdl-2.9.so/PM_RUN_CYC_GRP12.10000.0.all.all.all |
| 418 | * could not be found. |
| 419 | * |
| 420 | * At worst, the parsing may result in an "invalid argument" runtime error |
| 421 | * because of the inability to parse a sample file whose name contains that |
| 422 | * of another sample file. This typically seems to happen when callgraph |
| 423 | * data is being collected. |
| 424 | * |
| 425 | * The next several lines of code checks if the passed filename |
| 426 | * contains <session-dir>/samples; if so, we discard it as an |
| 427 | * invalid sample file. |
| 428 | */ |
| 429 | |
| 430 | unsigned int j = base_dir.rfind('/'); |
| 431 | string session_samples_dir = base_dir.substr(0, j); |
| 432 | if (sub.find(session_samples_dir) != string::npos) { |
| 433 | invalid_sample_file = true; |
| 434 | return false; |
| 435 | } |
| 436 | |
| 437 | // strip out generated JIT object files for samples of anonymous regions |
| 438 | if (is_jit_sample(sub)) |
| 439 | return false; |
| 440 | |
| 441 | filename_spec file_spec(filename, spec.extra_found_images); |
| 442 | if (spec.match(file_spec)) { |
| 443 | if (exclude_dependent && file_spec.is_dependent()) |
| 444 | return false; |
| 445 | return true; |
| 446 | } |
| 447 | |
| 448 | return false; |
| 449 | } |
| 450 | |
| 451 | |
| 452 | /** |
| 453 | * Print a warning message if we detect any sample buffer overflows |
| 454 | * occurred in the kernel driver. |
| 455 | */ |
| 456 | void warn_if_kern_buffs_overflow(string const & session_samples_dir) |
| 457 | { |
| 458 | DIR * dir; |
| 459 | struct dirent * dirent; |
| 460 | string stats_path; |
| 461 | int ret = 0; |
| 462 | |
| 463 | stats_path = session_samples_dir + "stats/"; |
| 464 | ret = op_read_int_from_file((stats_path + "event_lost_overflow"). |
| 465 | c_str(), 0); |
| 466 | |
| 467 | if (!(dir = opendir(stats_path.c_str()))) { |
| 468 | ret = -1; |
| 469 | goto done; |
| 470 | } |
| 471 | |
| 472 | while ((dirent = readdir(dir)) && !ret) { |
| 473 | int cpu_nr; |
| 474 | string path; |
| 475 | if (sscanf(dirent->d_name, "cpu%d", &cpu_nr) != 1) |
| 476 | continue; |
| 477 | path = stats_path + dirent->d_name + "/"; |
| 478 | ret = op_read_int_from_file((path + "sample_lost_overflow"). |
| 479 | c_str(), 0); |
| 480 | } |
| 481 | closedir(dir); |
| 482 | |
| 483 | done: |
Jeff Brown | 7a33c86 | 2011-02-02 14:00:44 -0800 | [diff] [blame] | 484 | if (ret > 0) { |
Mike Dodd | 8cfa702 | 2010-11-17 11:12:26 -0800 | [diff] [blame] | 485 | cerr << "WARNING! The OProfile kernel driver reports sample " |
| 486 | << "buffer overflows." << endl; |
| 487 | cerr << "Such overflows can result in incorrect sample attribution" |
| 488 | << ", invalid sample" << endl |
| 489 | << "files and other symptoms. " |
| 490 | << "See the oprofiled.log for details." << endl; |
| 491 | cerr << "You should adjust your sampling frequency to eliminate" |
| 492 | << " (or at least minimize)" << endl |
| 493 | << "these overflows." << endl; |
| 494 | } |
| 495 | } |
| 496 | |
| 497 | |
| 498 | } // anonymous namespace |
| 499 | |
| 500 | |
| 501 | list<string> profile_spec::generate_file_list(bool exclude_dependent, |
| 502 | bool exclude_cg) const |
| 503 | { |
| 504 | // FIXME: isn't remove_duplicates faster than doing this, then copy() ? |
| 505 | set<string> unique_files; |
| 506 | |
| 507 | vector<string> sessions = filter_session(session, session_exclude); |
| 508 | |
| 509 | if (sessions.empty()) { |
| 510 | ostringstream os; |
| 511 | os << "No session given\n" |
| 512 | << "included session was:\n"; |
| 513 | copy(session.begin(), session.end(), |
| 514 | ostream_iterator<string>(os, "\n")); |
| 515 | os << "excluded session was:\n"; |
| 516 | copy(session_exclude.begin(), session_exclude.end(), |
| 517 | ostream_iterator<string>(os, "\n")); |
| 518 | throw invalid_argument(os.str()); |
| 519 | } |
| 520 | |
| 521 | bool found_file = false; |
| 522 | |
| 523 | vector<string>::const_iterator cit = sessions.begin(); |
| 524 | vector<string>::const_iterator end = sessions.end(); |
| 525 | |
| 526 | for (; cit != end; ++cit) { |
| 527 | if (cit->empty()) |
| 528 | continue; |
| 529 | |
| 530 | string base_dir; |
| 531 | invalid_sample_file = false; |
| 532 | if ((*cit)[0] != '.' && (*cit)[0] != '/') |
| 533 | base_dir = archive_path + op_samples_dir; |
| 534 | base_dir += *cit; |
| 535 | |
| 536 | base_dir = op_realpath(base_dir); |
| 537 | |
| 538 | list<string> files; |
| 539 | create_file_list(files, base_dir, "*", true); |
| 540 | |
| 541 | if (!files.empty()) { |
| 542 | found_file = true; |
| 543 | warn_if_kern_buffs_overflow(base_dir + "/"); |
| 544 | } |
| 545 | |
| 546 | list<string>::const_iterator it = files.begin(); |
| 547 | list<string>::const_iterator fend = files.end(); |
| 548 | for (; it != fend; ++it) { |
| 549 | if (valid_candidate(base_dir, *it, *this, |
| 550 | exclude_dependent, exclude_cg)) { |
| 551 | unique_files.insert(*it); |
| 552 | } |
| 553 | } |
| 554 | if (invalid_sample_file) { |
| 555 | cerr << "Warning: Invalid sample files found in " |
| 556 | << base_dir << endl; |
| 557 | cerr << "This problem can be caused by too high of a sampling rate." |
| 558 | << endl; |
| 559 | } |
| 560 | } |
| 561 | |
| 562 | if (!found_file) { |
| 563 | ostringstream os; |
| 564 | os << "No sample file found: try running opcontrol --dump\n" |
| 565 | << "or specify a session containing sample files\n"; |
| 566 | throw op_fatal_error(os.str()); |
| 567 | } |
| 568 | |
| 569 | list<string> result; |
| 570 | copy(unique_files.begin(), unique_files.end(), back_inserter(result)); |
| 571 | |
| 572 | return result; |
| 573 | } |