Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 1 | Demonstrations of offcputime, the Linux eBPF/bcc version. |
| 2 | |
| 3 | |
| 4 | This program shows stack traces that were blocked, and the total duration they |
| 5 | were blocked. It works by tracing when threads block and when they return to |
| 6 | CPU, measuring both the time they were blocked (aka the "off-CPU time") and the |
Andrew Birchall | 7f0a6f8 | 2016-05-24 01:44:41 -0700 | [diff] [blame] | 7 | blocked stack trace and the task name. This data is summarized in kernel by |
| 8 | summing the blocked time by unique stack trace and task name. |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 9 | |
Andrew Birchall | 7f0a6f8 | 2016-05-24 01:44:41 -0700 | [diff] [blame] | 10 | Here is some example output. The -K option was used to only match kernel stacks. |
| 11 | To explain what we are seeing: the very first stack trace looks like a page |
| 12 | fault (do_page_fault() etc) from the "chmod" command, and in total was off-CPU |
| 13 | for 13 microseconds. |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 14 | |
Andrew Birchall | 1f202e7 | 2016-05-05 10:56:40 -0700 | [diff] [blame] | 15 | # ./offcputime -K |
Andrew Birchall | 7f0a6f8 | 2016-05-24 01:44:41 -0700 | [diff] [blame] | 16 | Tracing off-CPU time (us) of all threads by kernel stack... Hit Ctrl-C to end. |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 17 | ^C |
| 18 | schedule |
| 19 | schedule_timeout |
| 20 | io_schedule_timeout |
| 21 | bit_wait_io |
| 22 | __wait_on_bit |
| 23 | wait_on_page_bit_killable |
| 24 | __lock_page_or_retry |
| 25 | filemap_fault |
| 26 | __do_fault |
| 27 | handle_mm_fault |
| 28 | __do_page_fault |
| 29 | do_page_fault |
| 30 | page_fault |
| 31 | chmod |
| 32 | 13 |
| 33 | |
| 34 | schedule |
| 35 | rcu_nocb_kthread |
| 36 | kthread |
| 37 | ret_from_fork |
| 38 | ddebug_tables |
| 39 | rcuos/0 |
| 40 | 22 |
| 41 | |
| 42 | schedule |
| 43 | schedule_timeout |
| 44 | io_schedule_timeout |
| 45 | bit_wait_io |
| 46 | __wait_on_bit_lock |
| 47 | __lock_page |
| 48 | lock_page |
| 49 | __do_fault |
| 50 | handle_mm_fault |
| 51 | __do_page_fault |
| 52 | do_page_fault |
| 53 | page_fault |
| 54 | run |
| 55 | 27 |
| 56 | |
| 57 | schedule |
| 58 | schedule_timeout |
| 59 | io_schedule_timeout |
| 60 | bit_wait_io |
| 61 | __wait_on_bit |
| 62 | wait_on_page_bit_killable |
| 63 | __lock_page_or_retry |
| 64 | filemap_fault |
| 65 | __do_fault |
| 66 | handle_mm_fault |
| 67 | __do_page_fault |
| 68 | do_page_fault |
| 69 | page_fault |
| 70 | clear_user |
| 71 | padzero |
| 72 | load_elf_binary |
| 73 | search_binary_handler |
| 74 | load_script |
| 75 | search_binary_handler |
| 76 | do_execveat_common.isra.27 |
| 77 | run |
| 78 | 28 |
| 79 | |
| 80 | schedule |
| 81 | schedule_timeout |
| 82 | io_schedule_timeout |
| 83 | bit_wait_io |
| 84 | __wait_on_bit |
| 85 | wait_on_page_bit_killable |
| 86 | __lock_page_or_retry |
| 87 | filemap_fault |
| 88 | __do_fault |
| 89 | handle_mm_fault |
| 90 | __do_page_fault |
| 91 | do_page_fault |
| 92 | page_fault |
| 93 | run |
| 94 | 82 |
| 95 | |
| 96 | schedule |
| 97 | pipe_wait |
| 98 | pipe_read |
| 99 | __vfs_read |
| 100 | vfs_read |
| 101 | sys_read |
| 102 | entry_SYSCALL_64_fastpath |
| 103 | bash |
| 104 | 94 |
| 105 | |
| 106 | schedule |
| 107 | rcu_gp_kthread |
| 108 | kthread |
| 109 | ret_from_fork |
| 110 | ddebug_tables |
| 111 | rcu_sched |
| 112 | 104 |
| 113 | |
| 114 | schedule |
| 115 | schedule_timeout |
| 116 | io_schedule_timeout |
| 117 | bit_wait_io |
| 118 | __wait_on_bit |
| 119 | out_of_line_wait_on_bit |
| 120 | __wait_on_buffer |
| 121 | jbd2_journal_commit_transaction |
| 122 | kjournald2 |
| 123 | kthread |
| 124 | ret_from_fork |
| 125 | mb_cache_list |
| 126 | jbd2/xvda1-8 |
| 127 | 986 |
| 128 | |
| 129 | schedule |
| 130 | schedule_timeout |
| 131 | io_schedule_timeout |
| 132 | bit_wait_io |
| 133 | __wait_on_bit |
| 134 | out_of_line_wait_on_bit |
| 135 | __wait_on_buffer |
| 136 | jbd2_journal_commit_transaction |
| 137 | kjournald2 |
| 138 | kthread |
| 139 | ret_from_fork |
| 140 | mb_cache_list |
| 141 | jbd2/xvda1-8 |
| 142 | 6630 |
| 143 | |
| 144 | schedule |
| 145 | schedule_timeout |
| 146 | io_schedule_timeout |
| 147 | bit_wait_io |
| 148 | __wait_on_bit |
| 149 | out_of_line_wait_on_bit |
| 150 | do_get_write_access |
| 151 | jbd2_journal_get_write_access |
| 152 | __ext4_journal_get_write_access |
| 153 | ext4_mb_mark_diskspace_used |
| 154 | ext4_mb_new_blocks |
| 155 | ext4_ext_map_blocks |
| 156 | ext4_map_blocks |
| 157 | ext4_writepages |
| 158 | do_writepages |
| 159 | __filemap_fdatawrite_range |
| 160 | filemap_flush |
| 161 | ext4_alloc_da_blocks |
| 162 | ext4_rename |
| 163 | ext4_rename2 |
| 164 | supervise |
| 165 | 6645 |
| 166 | |
| 167 | schedule |
| 168 | schedule_timeout |
| 169 | io_schedule_timeout |
| 170 | bit_wait_io |
| 171 | __wait_on_bit |
| 172 | out_of_line_wait_on_bit |
| 173 | do_get_write_access |
| 174 | jbd2_journal_get_write_access |
| 175 | __ext4_journal_get_write_access |
| 176 | __ext4_new_inode |
| 177 | ext4_create |
| 178 | vfs_create |
| 179 | path_openat |
| 180 | do_filp_open |
| 181 | do_sys_open |
| 182 | sys_open |
| 183 | entry_SYSCALL_64_fastpath |
| 184 | supervise |
| 185 | 12702 |
| 186 | |
| 187 | schedule |
| 188 | rcu_nocb_kthread |
| 189 | kthread |
| 190 | ret_from_fork |
| 191 | rcuos/2 |
| 192 | 16036 |
| 193 | |
| 194 | schedule |
| 195 | rcu_nocb_kthread |
| 196 | kthread |
| 197 | ret_from_fork |
| 198 | rcuos/4 |
| 199 | 24085 |
| 200 | |
| 201 | schedule |
| 202 | do_wait |
| 203 | sys_wait4 |
| 204 | entry_SYSCALL_64_fastpath |
| 205 | run |
| 206 | 233055 |
| 207 | |
| 208 | schedule |
| 209 | schedule_timeout |
| 210 | io_schedule_timeout |
| 211 | bit_wait_io |
| 212 | __wait_on_bit |
| 213 | wait_on_page_bit |
| 214 | truncate_inode_pages_range |
| 215 | truncate_inode_pages_final |
| 216 | ext4_evict_inode |
| 217 | evict |
| 218 | iput |
| 219 | __dentry_kill |
| 220 | dput |
| 221 | sys_rename |
| 222 | entry_SYSCALL_64_fastpath |
| 223 | supervise |
| 224 | 297113 |
| 225 | |
| 226 | schedule |
| 227 | schedule_timeout |
| 228 | wait_woken |
| 229 | n_tty_read |
| 230 | tty_read |
| 231 | __vfs_read |
| 232 | vfs_read |
| 233 | sys_read |
| 234 | entry_SYSCALL_64_fastpath |
| 235 | bash |
| 236 | 1789866 |
| 237 | |
| 238 | schedule |
| 239 | schedule_timeout |
| 240 | io_schedule_timeout |
| 241 | do_blockdev_direct_IO |
| 242 | __blockdev_direct_IO |
| 243 | blkdev_direct_IO |
| 244 | generic_file_read_iter |
| 245 | blkdev_read_iter |
| 246 | __vfs_read |
| 247 | vfs_read |
| 248 | sys_read |
| 249 | entry_SYSCALL_64_fastpath |
| 250 | dd |
| 251 | 3310763 |
| 252 | |
| 253 | schedule |
| 254 | smpboot_thread_fn |
| 255 | kthread |
| 256 | ret_from_fork |
| 257 | watchdog/1 |
| 258 | 3999989 |
| 259 | |
| 260 | schedule |
| 261 | smpboot_thread_fn |
| 262 | kthread |
| 263 | ret_from_fork |
| 264 | watchdog/5 |
| 265 | 3999995 |
| 266 | |
| 267 | schedule |
| 268 | smpboot_thread_fn |
| 269 | kthread |
| 270 | ret_from_fork |
| 271 | watchdog/4 |
| 272 | 3999996 |
| 273 | |
| 274 | schedule |
| 275 | smpboot_thread_fn |
| 276 | kthread |
| 277 | ret_from_fork |
| 278 | watchdog/0 |
| 279 | 3999996 |
| 280 | |
| 281 | schedule |
| 282 | smpboot_thread_fn |
| 283 | kthread |
| 284 | ret_from_fork |
| 285 | watchdog/3 |
| 286 | 3999998 |
| 287 | |
| 288 | schedule |
| 289 | smpboot_thread_fn |
| 290 | kthread |
| 291 | ret_from_fork |
| 292 | watchdog/7 |
| 293 | 3999999 |
| 294 | |
| 295 | schedule |
| 296 | smpboot_thread_fn |
| 297 | kthread |
| 298 | ret_from_fork |
| 299 | watchdog/2 |
| 300 | 4000001 |
| 301 | |
| 302 | schedule |
| 303 | smpboot_thread_fn |
| 304 | kthread |
| 305 | ret_from_fork |
| 306 | watchdog/6 |
| 307 | 4000001 |
| 308 | |
| 309 | schedule |
| 310 | do_wait |
| 311 | sys_wait4 |
| 312 | entry_SYSCALL_64_fastpath |
| 313 | bash |
| 314 | 4039675 |
| 315 | |
| 316 | schedule |
| 317 | do_nanosleep |
| 318 | hrtimer_nanosleep |
| 319 | sys_nanosleep |
| 320 | entry_SYSCALL_64_fastpath |
| 321 | svscan |
| 322 | 5000112 |
| 323 | |
| 324 | schedule |
| 325 | schedule_hrtimeout_range_clock |
| 326 | schedule_hrtimeout_range |
| 327 | poll_schedule_timeout |
| 328 | do_select |
| 329 | core_sys_select |
| 330 | sys_select |
| 331 | entry_SYSCALL_64_fastpath |
| 332 | snmpd |
| 333 | 5998761 |
| 334 | |
| 335 | schedule |
| 336 | smpboot_thread_fn |
| 337 | kthread |
| 338 | ret_from_fork |
| 339 | migration/3 |
| 340 | 6149779 |
| 341 | |
| 342 | schedule |
| 343 | schedule_hrtimeout_range_clock |
| 344 | schedule_hrtimeout_range |
| 345 | poll_schedule_timeout |
| 346 | do_select |
| 347 | core_sys_select |
| 348 | sys_select |
| 349 | entry_SYSCALL_64_fastpath |
| 350 | ntpd |
| 351 | 6999832 |
| 352 | |
| 353 | schedule |
| 354 | worker_thread |
| 355 | kthread |
| 356 | ret_from_fork |
| 357 | kworker/u16:2 |
| 358 | 7131941 |
| 359 | |
| 360 | schedule |
| 361 | worker_thread |
| 362 | kthread |
| 363 | ret_from_fork |
| 364 | kworker/3:0 |
| 365 | 7999844 |
| 366 | |
| 367 | schedule |
| 368 | worker_thread |
| 369 | kthread |
| 370 | ret_from_fork |
| 371 | kworker/1:1 |
| 372 | 7999872 |
| 373 | |
| 374 | schedule |
| 375 | worker_thread |
| 376 | kthread |
| 377 | ret_from_fork |
| 378 | kworker/2:1 |
| 379 | 7999889 |
| 380 | |
| 381 | schedule |
| 382 | worker_thread |
| 383 | kthread |
| 384 | ret_from_fork |
| 385 | kworker/5:1 |
| 386 | 7999936 |
| 387 | |
| 388 | schedule |
| 389 | worker_thread |
| 390 | kthread |
| 391 | ret_from_fork |
| 392 | kworker/7:1 |
| 393 | 7999938 |
| 394 | |
| 395 | schedule |
| 396 | worker_thread |
| 397 | kthread |
| 398 | ret_from_fork |
| 399 | kworker/6:1 |
| 400 | 7999940 |
| 401 | |
| 402 | schedule |
| 403 | do_nanosleep |
| 404 | hrtimer_nanosleep |
| 405 | sys_nanosleep |
| 406 | entry_SYSCALL_64_fastpath |
| 407 | tail |
| 408 | 8000905 |
| 409 | |
| 410 | schedule |
| 411 | smpboot_thread_fn |
| 412 | kthread |
| 413 | ret_from_fork |
| 414 | migration/7 |
| 415 | 8197046 |
| 416 | |
| 417 | schedule |
| 418 | pipe_wait |
| 419 | pipe_read |
| 420 | __vfs_read |
| 421 | vfs_read |
| 422 | sys_read |
| 423 | entry_SYSCALL_64_fastpath |
| 424 | readproctitle |
| 425 | 8197835 |
| 426 | |
| 427 | schedule |
| 428 | smpboot_thread_fn |
| 429 | kthread |
| 430 | ret_from_fork |
| 431 | migration/4 |
| 432 | 8201851 |
| 433 | |
| 434 | schedule |
| 435 | smpboot_thread_fn |
| 436 | kthread |
| 437 | ret_from_fork |
| 438 | migration/2 |
| 439 | 8203375 |
| 440 | |
| 441 | schedule |
| 442 | smpboot_thread_fn |
| 443 | kthread |
| 444 | ret_from_fork |
| 445 | migration/6 |
| 446 | 8208664 |
| 447 | |
| 448 | schedule |
| 449 | smpboot_thread_fn |
| 450 | kthread |
| 451 | ret_from_fork |
| 452 | migration/5 |
| 453 | 8209819 |
| 454 | |
| 455 | schedule |
| 456 | smpboot_thread_fn |
| 457 | kthread |
| 458 | ret_from_fork |
| 459 | ddebug_tables |
| 460 | migration/0 |
| 461 | 8211292 |
| 462 | |
| 463 | schedule |
| 464 | smpboot_thread_fn |
| 465 | kthread |
| 466 | ret_from_fork |
| 467 | migration/1 |
| 468 | 8212100 |
| 469 | |
| 470 | schedule |
| 471 | worker_thread |
| 472 | kthread |
| 473 | ret_from_fork |
| 474 | kworker/0:2 |
| 475 | 8270305 |
| 476 | |
| 477 | schedule |
| 478 | rcu_nocb_kthread |
| 479 | kthread |
| 480 | ret_from_fork |
| 481 | rcuos/3 |
| 482 | 8349697 |
| 483 | |
| 484 | schedule |
| 485 | rcu_nocb_kthread |
| 486 | kthread |
| 487 | ret_from_fork |
| 488 | rcuos/2 |
| 489 | 8363357 |
| 490 | |
| 491 | schedule |
| 492 | rcu_nocb_kthread |
| 493 | kthread |
| 494 | ret_from_fork |
| 495 | rcuos/1 |
| 496 | 8365338 |
| 497 | |
| 498 | schedule |
| 499 | schedule_timeout |
| 500 | xfs_buf_terminate |
| 501 | kthread |
| 502 | ret_from_fork |
| 503 | xfsaild/md0 |
| 504 | 8371514 |
| 505 | |
| 506 | schedule |
| 507 | rcu_nocb_kthread |
| 508 | kthread |
| 509 | ret_from_fork |
| 510 | rcuos/4 |
| 511 | 8384013 |
| 512 | |
| 513 | schedule |
| 514 | rcu_nocb_kthread |
| 515 | kthread |
| 516 | ret_from_fork |
| 517 | rcuos/5 |
| 518 | 8390016 |
| 519 | |
| 520 | schedule |
| 521 | rcu_nocb_kthread |
| 522 | kthread |
| 523 | ret_from_fork |
| 524 | ddebug_tables |
| 525 | rcuos/0 |
| 526 | 8405428 |
| 527 | |
| 528 | schedule |
| 529 | schedule_timeout |
| 530 | rcu_gp_kthread |
| 531 | kthread |
| 532 | ret_from_fork |
| 533 | ddebug_tables |
| 534 | rcu_sched |
| 535 | 8406930 |
| 536 | |
| 537 | schedule |
| 538 | rcu_nocb_kthread |
| 539 | kthread |
| 540 | ret_from_fork |
| 541 | rcuos/7 |
| 542 | 8409575 |
| 543 | |
| 544 | schedule |
| 545 | rcu_nocb_kthread |
| 546 | kthread |
| 547 | ret_from_fork |
| 548 | rcuos/6 |
| 549 | 8415062 |
| 550 | |
| 551 | schedule |
| 552 | schedule_hrtimeout_range_clock |
| 553 | schedule_hrtimeout_range |
| 554 | poll_schedule_timeout |
| 555 | do_select |
| 556 | core_sys_select |
| 557 | sys_select |
| 558 | entry_SYSCALL_64_fastpath |
| 559 | offcputime |
| 560 | 8421478 |
| 561 | |
| 562 | schedule |
| 563 | worker_thread |
| 564 | kthread |
| 565 | ret_from_fork |
| 566 | kworker/4:0 |
| 567 | 8421492 |
| 568 | |
| 569 | schedule |
| 570 | schedule_hrtimeout_range_clock |
| 571 | schedule_hrtimeout_range |
| 572 | poll_schedule_timeout |
| 573 | do_select |
| 574 | core_sys_select |
| 575 | sys_select |
| 576 | entry_SYSCALL_64_fastpath |
| 577 | sshd |
| 578 | 14249005 |
| 579 | |
| 580 | schedule |
| 581 | schedule_hrtimeout_range_clock |
| 582 | schedule_hrtimeout_range |
| 583 | poll_schedule_timeout |
| 584 | do_sys_poll |
| 585 | sys_poll |
| 586 | entry_SYSCALL_64_fastpath |
| 587 | supervise |
| 588 | 81670888 |
| 589 | |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 590 | The last few stack traces aren't very interesting, since they are threads that |
Andrew Birchall | 7f0a6f8 | 2016-05-24 01:44:41 -0700 | [diff] [blame] | 591 | are often blocked off-CPU waiting for work. |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 592 | |
| 593 | Do be somewhat careful with overhead: this is tracing scheduler functions, which |
| 594 | can be called very frequently. While this uses in-kernel summaries for |
| 595 | efficiency, the rate of scheduler functions can be very high (> 1,000,000/sec), |
| 596 | and this is performing stack walks when threads return to CPU. At some point |
| 597 | the overhead will be measurable. |
| 598 | |
| 599 | |
| 600 | A -p option can be used to filter (in-kernel) on a single process ID. For |
| 601 | example, only matching PID 26651, which is a running "dd" command: |
| 602 | |
Andrew Birchall | 1f202e7 | 2016-05-05 10:56:40 -0700 | [diff] [blame] | 603 | # ./offcputime -K -p 26651 |
Andrew Birchall | 7f0a6f8 | 2016-05-24 01:44:41 -0700 | [diff] [blame] | 604 | Tracing off-CPU time (us) of all threads by kernel stack... Hit Ctrl-C to end. |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 605 | ^C |
| 606 | schedule |
| 607 | schedule_timeout |
| 608 | io_schedule_timeout |
| 609 | do_blockdev_direct_IO |
| 610 | __blockdev_direct_IO |
| 611 | blkdev_direct_IO |
| 612 | generic_file_read_iter |
| 613 | blkdev_read_iter |
| 614 | __vfs_read |
| 615 | vfs_read |
| 616 | sys_read |
| 617 | entry_SYSCALL_64_fastpath |
| 618 | dd |
| 619 | 2405710 |
| 620 | |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 621 | The stack trace shows "dd" is blocked waiting on disk I/O, as expected, for a |
| 622 | total of 2.4 seconds during tracing. |
| 623 | |
| 624 | |
| 625 | A duration can be added, for example, tracing for 5 seconds only: |
| 626 | |
Andrew Birchall | 1f202e7 | 2016-05-05 10:56:40 -0700 | [diff] [blame] | 627 | # ./offcputime -K -p 26651 5 |
Andrew Birchall | 7f0a6f8 | 2016-05-24 01:44:41 -0700 | [diff] [blame] | 628 | Tracing off-CPU time (us) of all threads by kernel stack for 5 secs. |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 629 | |
| 630 | schedule |
| 631 | schedule_timeout |
| 632 | io_schedule_timeout |
| 633 | do_blockdev_direct_IO |
| 634 | __blockdev_direct_IO |
| 635 | blkdev_direct_IO |
| 636 | generic_file_read_iter |
| 637 | blkdev_read_iter |
| 638 | __vfs_read |
| 639 | vfs_read |
| 640 | sys_read |
| 641 | entry_SYSCALL_64_fastpath |
| 642 | dd |
| 643 | 4413909 |
| 644 | |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 645 | Here, dd was blocked for 4.4 seconds out of 5. Or put differently, likely |
| 646 | on-CPU for about 12% of the time. Which matches the ratio seen by time(1): |
| 647 | |
Andrew Birchall | 47d871f | 2016-05-11 18:31:49 -0700 | [diff] [blame] | 648 | # time dd if=/dev/md0 iflag=direct of=/dev/null bs=1k |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 649 | ^C108115+0 records in |
| 650 | 108114+0 records out |
| 651 | 110708736 bytes (111 MB) copied, 13.7565 s, 8.0 MB/s |
| 652 | |
| 653 | real 0m13.760s |
| 654 | user 0m0.000s |
| 655 | sys 0m1.739s |
| 656 | |
| 657 | |
| 658 | A -f option will emit output using the "folded stacks" format, which can be |
| 659 | read directly by flamegraph.pl from the FlameGraph open source software |
| 660 | (https://github.com/brendangregg/FlameGraph). Eg: |
| 661 | |
Andrew Birchall | 1f202e7 | 2016-05-05 10:56:40 -0700 | [diff] [blame] | 662 | # ./offcputime -K -f 5 |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 663 | bash;entry_SYSCALL_64_fastpath;sys_read;vfs_read;__vfs_read;tty_read;n_tty_read;call_rwsem_down_read_failed;rwsem_down_read_failed;schedule 8 |
| 664 | yes;entry_SYSCALL_64_fastpath;sys_write;vfs_write;__vfs_write;tty_write;n_tty_write;call_rwsem_down_read_failed;rwsem_down_read_failed;schedule 14 |
| 665 | run;page_fault;do_page_fault;__do_page_fault;handle_mm_fault;__do_fault;filemap_fault;__lock_page_or_retry;wait_on_page_bit_killable;__wait_on_bit;bit_wait_io;io_schedule_timeout;schedule_timeout;schedule 33 |
| 666 | rcuos/4;ret_from_fork;kthread;rcu_nocb_kthread;schedule 45 |
| 667 | bash;entry_SYSCALL_64_fastpath;sys_read;vfs_read;__vfs_read;pipe_read;pipe_wait;schedule 88 |
| 668 | run;page_fault;do_page_fault;__do_page_fault;handle_mm_fault;__do_fault;filemap_fault;__lock_page_or_retry;wait_on_page_bit_killable;__wait_on_bit;bit_wait_io;io_schedule_timeout;schedule_timeout;schedule 108 |
| 669 | jbd2/xvda1-8;mb_cache_list;ret_from_fork;kthread;kjournald2;jbd2_journal_commit_transaction;__wait_on_buffer;out_of_line_wait_on_bit;__wait_on_bit;bit_wait_io;io_schedule_timeout;schedule_timeout;schedule 828 |
| 670 | jbd2/xvda1-8;mb_cache_list;ret_from_fork;kthread;kjournald2;jbd2_journal_commit_transaction;__wait_on_buffer;out_of_line_wait_on_bit;__wait_on_bit;bit_wait_io;io_schedule_timeout;schedule_timeout;schedule 6201 |
| 671 | supervise;entry_SYSCALL_64_fastpath;sys_rename;dput;__dentry_kill;iput;evict;ext4_evict_inode;truncate_inode_pages_final;truncate_inode_pages_range;wait_on_page_bit;__wait_on_bit;bit_wait_io;io_schedule_timeout;schedule_timeout;schedule 41049 |
| 672 | run;entry_SYSCALL_64_fastpath;sys_wait4;do_wait;schedule 120709 |
| 673 | bash;entry_SYSCALL_64_fastpath;sys_wait4;do_wait;schedule 699320 |
| 674 | ksoftirqd/0;ret_from_fork;kthread;smpboot_thread_fn;schedule 1077529 |
| 675 | bash;entry_SYSCALL_64_fastpath;sys_read;vfs_read;__vfs_read;tty_read;n_tty_read;wait_woken;schedule_timeout;schedule 1362045 |
| 676 | sshd;entry_SYSCALL_64_fastpath;sys_select;core_sys_select;do_select;poll_schedule_timeout;schedule_hrtimeout_range;schedule_hrtimeout_range_clock;schedule 1377627 |
| 677 | migration/0;ddebug_tables;ret_from_fork;kthread;smpboot_thread_fn;schedule 2040753 |
| 678 | snmpd;entry_SYSCALL_64_fastpath;sys_select;core_sys_select;do_select;poll_schedule_timeout;schedule_hrtimeout_range;schedule_hrtimeout_range_clock;schedule 2197568 |
| 679 | migration/5;ret_from_fork;kthread;smpboot_thread_fn;schedule 3079426 |
| 680 | migration/7;ret_from_fork;kthread;smpboot_thread_fn;schedule 3084746 |
| 681 | kworker/6:2;ret_from_fork;kthread;worker_thread;schedule 3940583 |
| 682 | kworker/5:1;ret_from_fork;kthread;worker_thread;schedule 3944892 |
| 683 | kworker/1:2;ret_from_fork;kthread;worker_thread;schedule 3999646 |
| 684 | ntpd;entry_SYSCALL_64_fastpath;sys_select;core_sys_select;do_select;poll_schedule_timeout;schedule_hrtimeout_range;schedule_hrtimeout_range_clock;schedule 3999904 |
| 685 | kworker/u16:0;ret_from_fork;kthread;worker_thread;schedule 3999967 |
| 686 | kworker/7:0;ret_from_fork;kthread;worker_thread;schedule 3999987 |
| 687 | tail;entry_SYSCALL_64_fastpath;sys_nanosleep;hrtimer_nanosleep;do_nanosleep;schedule 4000473 |
| 688 | migration/1;ret_from_fork;kthread;smpboot_thread_fn;schedule 4091150 |
| 689 | migration/4;ret_from_fork;kthread;smpboot_thread_fn;schedule 4095217 |
| 690 | readproctitle;entry_SYSCALL_64_fastpath;sys_read;vfs_read;__vfs_read;pipe_read;pipe_wait;schedule 4108470 |
| 691 | migration/3;ret_from_fork;kthread;smpboot_thread_fn;schedule 4109264 |
| 692 | migration/2;ret_from_fork;kthread;smpboot_thread_fn;schedule 4109280 |
| 693 | migration/6;ret_from_fork;kthread;smpboot_thread_fn;schedule 4111143 |
| 694 | kworker/4:0;ret_from_fork;kthread;worker_thread;schedule 4402350 |
| 695 | kworker/3:0;ret_from_fork;kthread;worker_thread;schedule 4433988 |
| 696 | kworker/2:1;ret_from_fork;kthread;worker_thread;schedule 4636142 |
| 697 | kworker/0:2;ret_from_fork;kthread;worker_thread;schedule 4832023 |
| 698 | rcuos/1;ret_from_fork;kthread;rcu_nocb_kthread;schedule 4974186 |
| 699 | rcuos/5;ret_from_fork;kthread;rcu_nocb_kthread;schedule 4977137 |
| 700 | rcuos/6;ret_from_fork;kthread;rcu_nocb_kthread;schedule 4987769 |
| 701 | rcuos/3;ret_from_fork;kthread;rcu_nocb_kthread;schedule 4992282 |
| 702 | rcuos/4;ret_from_fork;kthread;rcu_nocb_kthread;schedule 4992364 |
| 703 | rcuos/2;ret_from_fork;kthread;rcu_nocb_kthread;schedule 4992714 |
| 704 | rcuos/0;ddebug_tables;ret_from_fork;kthread;rcu_nocb_kthread;schedule 4996504 |
| 705 | rcuos/7;ret_from_fork;kthread;rcu_nocb_kthread;schedule 4998497 |
| 706 | rcu_sched;ddebug_tables;ret_from_fork;kthread;rcu_gp_kthread;schedule_timeout;schedule 5000686 |
| 707 | offcputime;entry_SYSCALL_64_fastpath;sys_select;core_sys_select;do_select;poll_schedule_timeout;schedule_hrtimeout_range;schedule_hrtimeout_range_clock;schedule 5005063 |
| 708 | dd;entry_SYSCALL_64_fastpath;sys_read;vfs_read;__vfs_read;blkdev_read_iter;generic_file_read_iter;blkdev_direct_IO;__blockdev_direct_IO;do_blockdev_direct_IO;io_schedule_timeout;schedule_timeout;schedule 8025599 |
| 709 | supervise;entry_SYSCALL_64_fastpath;sys_poll;do_sys_poll;poll_schedule_timeout;schedule_hrtimeout_range;schedule_hrtimeout_range_clock;schedule 40835611 |
| 710 | |
| 711 | The stack traces are shown as single lines, with functions separated by |
| 712 | semicolons. The first entry is the task name. The 2nd column is the total |
| 713 | off-CPU time. |
| 714 | |
| 715 | I'd save this output to a file, then move it to the system where you'll be |
| 716 | creating your "off-CPU time flame graphs". |
| 717 | |
| 718 | |
| 719 | USAGE message: |
| 720 | |
Brendan Gregg | 843340d | 2017-07-31 18:39:30 -0700 | [diff] [blame] | 721 | # ./offcputime.py -h |
Mark Drayton | 66bf2e8 | 2016-07-31 22:47:07 +0100 | [diff] [blame] | 722 | usage: offcputime.py [-h] [-p PID | -t TID | -u | -k] [-U | -K] [-d] [-f] |
Andrew Birchall | 47d871f | 2016-05-11 18:31:49 -0700 | [diff] [blame] | 723 | [--stack-storage-size STACK_STORAGE_SIZE] |
Brendan Gregg | 843340d | 2017-07-31 18:39:30 -0700 | [diff] [blame] | 724 | [-m MIN_BLOCK_TIME] [-M MAX_BLOCK_TIME] [--state STATE] |
Andrew Birchall | 47d871f | 2016-05-11 18:31:49 -0700 | [diff] [blame] | 725 | [duration] |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 726 | |
Andrew Birchall | 7f0a6f8 | 2016-05-24 01:44:41 -0700 | [diff] [blame] | 727 | Summarize off-CPU time by stack trace |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 728 | |
| 729 | positional arguments: |
Andrew Birchall | 47d871f | 2016-05-11 18:31:49 -0700 | [diff] [blame] | 730 | duration duration of trace, in seconds |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 731 | |
| 732 | optional arguments: |
Andrew Birchall | 47d871f | 2016-05-11 18:31:49 -0700 | [diff] [blame] | 733 | -h, --help show this help message and exit |
Andrew Birchall | 47d871f | 2016-05-11 18:31:49 -0700 | [diff] [blame] | 734 | -p PID, --pid PID trace this PID only |
Mark Drayton | 66bf2e8 | 2016-07-31 22:47:07 +0100 | [diff] [blame] | 735 | -t TID, --tid TID trace this TID only |
Andrew Birchall | 582b5dd | 2016-05-04 16:03:34 -0700 | [diff] [blame] | 736 | -u, --user-threads-only |
| 737 | user threads only (no kernel threads) |
Andrew Birchall | 7f0a6f8 | 2016-05-24 01:44:41 -0700 | [diff] [blame] | 738 | -k, --kernel-threads-only |
| 739 | kernel threads only (no user threads) |
Andrew Birchall | 1f202e7 | 2016-05-05 10:56:40 -0700 | [diff] [blame] | 740 | -U, --user-stacks-only |
Andrew Birchall | 7f0a6f8 | 2016-05-24 01:44:41 -0700 | [diff] [blame] | 741 | show stacks from user space only (no kernel space |
Andrew Birchall | 1f202e7 | 2016-05-05 10:56:40 -0700 | [diff] [blame] | 742 | stacks) |
| 743 | -K, --kernel-stacks-only |
Andrew Birchall | 7f0a6f8 | 2016-05-24 01:44:41 -0700 | [diff] [blame] | 744 | show stacks from kernel space only (no user space |
Andrew Birchall | 1f202e7 | 2016-05-05 10:56:40 -0700 | [diff] [blame] | 745 | stacks) |
Mark Drayton | 66bf2e8 | 2016-07-31 22:47:07 +0100 | [diff] [blame] | 746 | -d, --delimited insert delimiter between kernel/user stacks |
Andrew Birchall | 47d871f | 2016-05-11 18:31:49 -0700 | [diff] [blame] | 747 | -f, --folded output folded format |
| 748 | --stack-storage-size STACK_STORAGE_SIZE |
| 749 | the number of unique stack traces that can be stored |
| 750 | and displayed (default 1024) |
Brendan Gregg | 843340d | 2017-07-31 18:39:30 -0700 | [diff] [blame] | 751 | -m MIN_BLOCK_TIME, --min-block-time MIN_BLOCK_TIME |
| 752 | the amount of time in microseconds over which we store |
| 753 | traces (default 1) |
| 754 | -M MAX_BLOCK_TIME, --max-block-time MAX_BLOCK_TIME |
| 755 | the amount of time in microseconds under which we |
| 756 | store traces (default U64_MAX) |
| 757 | --state STATE filter on this thread state bitmask (eg, 2 == |
| 758 | TASK_UNINTERRUPTIBLE) see include/linux/sched.h |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 759 | |
| 760 | examples: |
| 761 | ./offcputime # trace off-CPU stack time until Ctrl-C |
| 762 | ./offcputime 5 # trace for 5 seconds only |
| 763 | ./offcputime -f 5 # 5 seconds, and output in folded format |
Brendan Gregg | 843340d | 2017-07-31 18:39:30 -0700 | [diff] [blame] | 764 | ./offcputime -m 1000 # trace only events that last more than 1000 usec |
| 765 | ./offcputime -M 10000 # trace only events that last less than 10000 usec |
Andrew Birchall | 582b5dd | 2016-05-04 16:03:34 -0700 | [diff] [blame] | 766 | ./offcputime -p 185 # only trace threads for PID 185 |
Mark Drayton | 66bf2e8 | 2016-07-31 22:47:07 +0100 | [diff] [blame] | 767 | ./offcputime -t 188 # only trace thread 188 |
Andrew Birchall | 582b5dd | 2016-05-04 16:03:34 -0700 | [diff] [blame] | 768 | ./offcputime -u # only trace user threads (no kernel) |
| 769 | ./offcputime -k # only trace kernel threads (no user) |
Andrew Birchall | 7f0a6f8 | 2016-05-24 01:44:41 -0700 | [diff] [blame] | 770 | ./offcputime -U # only show user space stacks (no kernel) |
| 771 | ./offcputime -K # only show kernel space stacks (no user) |