Mel Gorman | c9d05cf | 2009-09-21 17:02:47 -0700 | [diff] [blame] | 1 | #!/usr/bin/perl |
| 2 | # This is a POC (proof of concept or piece of crap, take your pick) for reading the |
| 3 | # text representation of trace output related to page allocation. It makes an attempt |
| 4 | # to extract some high-level information on what is going on. The accuracy of the parser |
| 5 | # may vary considerably |
| 6 | # |
| 7 | # Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe |
| 8 | # other options |
| 9 | # --prepend-parent Report on the parent proc and PID |
| 10 | # --read-procstat If the trace lacks process info, get it from /proc |
| 11 | # --ignore-pid Aggregate processes of the same name together |
| 12 | # |
| 13 | # Copyright (c) IBM Corporation 2009 |
| 14 | # Author: Mel Gorman <mel@csn.ul.ie> |
| 15 | use strict; |
| 16 | use Getopt::Long; |
| 17 | |
| 18 | # Tracepoint events |
| 19 | use constant MM_PAGE_ALLOC => 1; |
| 20 | use constant MM_PAGE_FREE_DIRECT => 2; |
| 21 | use constant MM_PAGEVEC_FREE => 3; |
| 22 | use constant MM_PAGE_PCPU_DRAIN => 4; |
| 23 | use constant MM_PAGE_ALLOC_ZONE_LOCKED => 5; |
| 24 | use constant MM_PAGE_ALLOC_EXTFRAG => 6; |
| 25 | use constant EVENT_UNKNOWN => 7; |
| 26 | |
| 27 | # Constants used to track state |
| 28 | use constant STATE_PCPU_PAGES_DRAINED => 8; |
| 29 | use constant STATE_PCPU_PAGES_REFILLED => 9; |
| 30 | |
| 31 | # High-level events extrapolated from tracepoints |
| 32 | use constant HIGH_PCPU_DRAINS => 10; |
| 33 | use constant HIGH_PCPU_REFILLS => 11; |
| 34 | use constant HIGH_EXT_FRAGMENT => 12; |
| 35 | use constant HIGH_EXT_FRAGMENT_SEVERE => 13; |
| 36 | use constant HIGH_EXT_FRAGMENT_MODERATE => 14; |
| 37 | use constant HIGH_EXT_FRAGMENT_CHANGED => 15; |
| 38 | |
| 39 | my %perprocesspid; |
| 40 | my %perprocess; |
| 41 | my $opt_ignorepid; |
| 42 | my $opt_read_procstat; |
| 43 | my $opt_prepend_parent; |
| 44 | |
| 45 | # Catch sigint and exit on request |
| 46 | my $sigint_report = 0; |
| 47 | my $sigint_exit = 0; |
| 48 | my $sigint_pending = 0; |
| 49 | my $sigint_received = 0; |
| 50 | sub sigint_handler { |
| 51 | my $current_time = time; |
| 52 | if ($current_time - 2 > $sigint_received) { |
| 53 | print "SIGINT received, report pending. Hit ctrl-c again to exit\n"; |
| 54 | $sigint_report = 1; |
| 55 | } else { |
| 56 | if (!$sigint_exit) { |
| 57 | print "Second SIGINT received quickly, exiting\n"; |
| 58 | } |
| 59 | $sigint_exit++; |
| 60 | } |
| 61 | |
| 62 | if ($sigint_exit > 3) { |
| 63 | print "Many SIGINTs received, exiting now without report\n"; |
| 64 | exit; |
| 65 | } |
| 66 | |
| 67 | $sigint_received = $current_time; |
| 68 | $sigint_pending = 1; |
| 69 | } |
| 70 | $SIG{INT} = "sigint_handler"; |
| 71 | |
| 72 | # Parse command line options |
| 73 | GetOptions( |
| 74 | 'ignore-pid' => \$opt_ignorepid, |
| 75 | 'read-procstat' => \$opt_read_procstat, |
| 76 | 'prepend-parent' => \$opt_prepend_parent, |
| 77 | ); |
| 78 | |
| 79 | # Defaults for dynamically discovered regex's |
| 80 | my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])'; |
| 81 | |
| 82 | # Dyanically discovered regex |
| 83 | my $regex_fragdetails; |
| 84 | |
| 85 | # Static regex used. Specified like this for readability and for use with /o |
| 86 | # (process_pid) (cpus ) ( time ) (tpoint ) (details) |
| 87 | my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)'; |
| 88 | my $regex_statname = '[-0-9]*\s\((.*)\).*'; |
| 89 | my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*'; |
| 90 | |
| 91 | sub generate_traceevent_regex { |
| 92 | my $event = shift; |
| 93 | my $default = shift; |
| 94 | my $regex; |
| 95 | |
| 96 | # Read the event format or use the default |
| 97 | if (!open (FORMAT, "/sys/kernel/debug/tracing/events/$event/format")) { |
| 98 | $regex = $default; |
| 99 | } else { |
| 100 | my $line; |
| 101 | while (!eof(FORMAT)) { |
| 102 | $line = <FORMAT>; |
| 103 | if ($line =~ /^print fmt:\s"(.*)",.*/) { |
| 104 | $regex = $1; |
| 105 | $regex =~ s/%p/\([0-9a-f]*\)/g; |
| 106 | $regex =~ s/%d/\([-0-9]*\)/g; |
| 107 | $regex =~ s/%lu/\([0-9]*\)/g; |
| 108 | } |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | # Verify fields are in the right order |
| 113 | my $tuple; |
| 114 | foreach $tuple (split /\s/, $regex) { |
| 115 | my ($key, $value) = split(/=/, $tuple); |
| 116 | my $expected = shift; |
| 117 | if ($key ne $expected) { |
| 118 | print("WARNING: Format not as expected '$key' != '$expected'"); |
| 119 | $regex =~ s/$key=\((.*)\)/$key=$1/; |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | if (defined shift) { |
| 124 | die("Fewer fields than expected in format"); |
| 125 | } |
| 126 | |
| 127 | return $regex; |
| 128 | } |
| 129 | $regex_fragdetails = generate_traceevent_regex("kmem/mm_page_alloc_extfrag", |
| 130 | $regex_fragdetails_default, |
| 131 | "page", "pfn", |
| 132 | "alloc_order", "fallback_order", "pageblock_order", |
| 133 | "alloc_migratetype", "fallback_migratetype", |
| 134 | "fragmenting", "change_ownership"); |
| 135 | |
| 136 | sub read_statline($) { |
| 137 | my $pid = $_[0]; |
| 138 | my $statline; |
| 139 | |
| 140 | if (open(STAT, "/proc/$pid/stat")) { |
| 141 | $statline = <STAT>; |
| 142 | close(STAT); |
| 143 | } |
| 144 | |
| 145 | if ($statline eq '') { |
| 146 | $statline = "-1 (UNKNOWN_PROCESS_NAME) R 0"; |
| 147 | } |
| 148 | |
| 149 | return $statline; |
| 150 | } |
| 151 | |
| 152 | sub guess_process_pid($$) { |
| 153 | my $pid = $_[0]; |
| 154 | my $statline = $_[1]; |
| 155 | |
| 156 | if ($pid == 0) { |
| 157 | return "swapper-0"; |
| 158 | } |
| 159 | |
| 160 | if ($statline !~ /$regex_statname/o) { |
| 161 | die("Failed to math stat line for process name :: $statline"); |
| 162 | } |
| 163 | return "$1-$pid"; |
| 164 | } |
| 165 | |
| 166 | sub parent_info($$) { |
| 167 | my $pid = $_[0]; |
| 168 | my $statline = $_[1]; |
| 169 | my $ppid; |
| 170 | |
| 171 | if ($pid == 0) { |
| 172 | return "NOPARENT-0"; |
| 173 | } |
| 174 | |
| 175 | if ($statline !~ /$regex_statppid/o) { |
| 176 | die("Failed to match stat line process ppid:: $statline"); |
| 177 | } |
| 178 | |
| 179 | # Read the ppid stat line |
| 180 | $ppid = $1; |
| 181 | return guess_process_pid($ppid, read_statline($ppid)); |
| 182 | } |
| 183 | |
| 184 | sub process_events { |
| 185 | my $traceevent; |
| 186 | my $process_pid; |
| 187 | my $cpus; |
| 188 | my $timestamp; |
| 189 | my $tracepoint; |
| 190 | my $details; |
| 191 | my $statline; |
| 192 | |
| 193 | # Read each line of the event log |
| 194 | EVENT_PROCESS: |
| 195 | while ($traceevent = <STDIN>) { |
| 196 | if ($traceevent =~ /$regex_traceevent/o) { |
| 197 | $process_pid = $1; |
| 198 | $tracepoint = $4; |
| 199 | |
| 200 | if ($opt_read_procstat || $opt_prepend_parent) { |
| 201 | $process_pid =~ /(.*)-([0-9]*)$/; |
| 202 | my $process = $1; |
| 203 | my $pid = $2; |
| 204 | |
| 205 | $statline = read_statline($pid); |
| 206 | |
| 207 | if ($opt_read_procstat && $process eq '') { |
| 208 | $process_pid = guess_process_pid($pid, $statline); |
| 209 | } |
| 210 | |
| 211 | if ($opt_prepend_parent) { |
| 212 | $process_pid = parent_info($pid, $statline) . " :: $process_pid"; |
| 213 | } |
| 214 | } |
| 215 | |
| 216 | # Unnecessary in this script. Uncomment if required |
| 217 | # $cpus = $2; |
| 218 | # $timestamp = $3; |
| 219 | } else { |
| 220 | next; |
| 221 | } |
| 222 | |
| 223 | # Perl Switch() sucks majorly |
| 224 | if ($tracepoint eq "mm_page_alloc") { |
| 225 | $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++; |
| 226 | } elsif ($tracepoint eq "mm_page_free_direct") { |
| 227 | $perprocesspid{$process_pid}->{MM_PAGE_FREE_DIRECT}++; |
| 228 | } elsif ($tracepoint eq "mm_pagevec_free") { |
| 229 | $perprocesspid{$process_pid}->{MM_PAGEVEC_FREE}++; |
| 230 | } elsif ($tracepoint eq "mm_page_pcpu_drain") { |
| 231 | $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++; |
| 232 | $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++; |
| 233 | } elsif ($tracepoint eq "mm_page_alloc_zone_locked") { |
| 234 | $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}++; |
| 235 | $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED}++; |
| 236 | } elsif ($tracepoint eq "mm_page_alloc_extfrag") { |
| 237 | |
| 238 | # Extract the details of the event now |
| 239 | $details = $5; |
| 240 | |
| 241 | my ($page, $pfn); |
| 242 | my ($alloc_order, $fallback_order, $pageblock_order); |
| 243 | my ($alloc_migratetype, $fallback_migratetype); |
| 244 | my ($fragmenting, $change_ownership); |
| 245 | |
| 246 | if ($details !~ /$regex_fragdetails/o) { |
| 247 | print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n"; |
| 248 | next; |
| 249 | } |
| 250 | |
| 251 | $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}++; |
| 252 | $page = $1; |
| 253 | $pfn = $2; |
| 254 | $alloc_order = $3; |
| 255 | $fallback_order = $4; |
| 256 | $pageblock_order = $5; |
| 257 | $alloc_migratetype = $6; |
| 258 | $fallback_migratetype = $7; |
| 259 | $fragmenting = $8; |
| 260 | $change_ownership = $9; |
| 261 | |
| 262 | if ($fragmenting) { |
| 263 | $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}++; |
| 264 | if ($fallback_order <= 3) { |
| 265 | $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}++; |
| 266 | } else { |
| 267 | $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}++; |
| 268 | } |
| 269 | } |
| 270 | if ($change_ownership) { |
| 271 | $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}++; |
| 272 | } |
| 273 | } else { |
| 274 | $perprocesspid{$process_pid}->{EVENT_UNKNOWN}++; |
| 275 | } |
| 276 | |
| 277 | # Catch a full pcpu drain event |
| 278 | if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} && |
| 279 | $tracepoint ne "mm_page_pcpu_drain") { |
| 280 | |
| 281 | $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}++; |
| 282 | $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0; |
| 283 | } |
| 284 | |
| 285 | # Catch a full pcpu refill event |
| 286 | if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} && |
| 287 | $tracepoint ne "mm_page_alloc_zone_locked") { |
| 288 | $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}++; |
| 289 | $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0; |
| 290 | } |
| 291 | |
| 292 | if ($sigint_pending) { |
| 293 | last EVENT_PROCESS; |
| 294 | } |
| 295 | } |
| 296 | } |
| 297 | |
| 298 | sub dump_stats { |
| 299 | my $hashref = shift; |
| 300 | my %stats = %$hashref; |
| 301 | |
| 302 | # Dump per-process stats |
| 303 | my $process_pid; |
| 304 | my $max_strlen = 0; |
| 305 | |
| 306 | # Get the maximum process name |
| 307 | foreach $process_pid (keys %perprocesspid) { |
| 308 | my $len = length($process_pid); |
| 309 | if ($len > $max_strlen) { |
| 310 | $max_strlen = $len; |
| 311 | } |
| 312 | } |
| 313 | $max_strlen += 2; |
| 314 | |
| 315 | printf("\n"); |
| 316 | printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n", |
| 317 | "Process", "Pages", "Pages", "Pages", "Pages", "PCPU", "PCPU", "PCPU", "Fragment", "Fragment", "MigType", "Fragment", "Fragment", "Unknown"); |
| 318 | printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n", |
| 319 | "details", "allocd", "allocd", "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing", "Changed", "Severe", "Moderate", ""); |
| 320 | |
| 321 | printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n", |
| 322 | "", "", "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", ""); |
| 323 | |
| 324 | foreach $process_pid (keys %stats) { |
| 325 | # Dump final aggregates |
| 326 | if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED}) { |
| 327 | $stats{$process_pid}->{HIGH_PCPU_DRAINS}++; |
| 328 | $stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0; |
| 329 | } |
| 330 | if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED}) { |
| 331 | $stats{$process_pid}->{HIGH_PCPU_REFILLS}++; |
| 332 | $stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0; |
| 333 | } |
| 334 | |
| 335 | printf("%-" . $max_strlen . "s %8d %10d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d\n", |
| 336 | $process_pid, |
| 337 | $stats{$process_pid}->{MM_PAGE_ALLOC}, |
| 338 | $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}, |
| 339 | $stats{$process_pid}->{MM_PAGE_FREE_DIRECT}, |
| 340 | $stats{$process_pid}->{MM_PAGEVEC_FREE}, |
| 341 | $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN}, |
| 342 | $stats{$process_pid}->{HIGH_PCPU_DRAINS}, |
| 343 | $stats{$process_pid}->{HIGH_PCPU_REFILLS}, |
| 344 | $stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}, |
| 345 | $stats{$process_pid}->{HIGH_EXT_FRAG}, |
| 346 | $stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}, |
| 347 | $stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}, |
| 348 | $stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}, |
| 349 | $stats{$process_pid}->{EVENT_UNKNOWN}); |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | sub aggregate_perprocesspid() { |
| 354 | my $process_pid; |
| 355 | my $process; |
| 356 | undef %perprocess; |
| 357 | |
| 358 | foreach $process_pid (keys %perprocesspid) { |
| 359 | $process = $process_pid; |
| 360 | $process =~ s/-([0-9])*$//; |
| 361 | if ($process eq '') { |
| 362 | $process = "NO_PROCESS_NAME"; |
| 363 | } |
| 364 | |
| 365 | $perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}; |
| 366 | $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}; |
| 367 | $perprocess{$process}->{MM_PAGE_FREE_DIRECT} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_DIRECT}; |
| 368 | $perprocess{$process}->{MM_PAGEVEC_FREE} += $perprocesspid{$process_pid}->{MM_PAGEVEC_FREE}; |
| 369 | $perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}; |
| 370 | $perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}; |
| 371 | $perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}; |
| 372 | $perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}; |
| 373 | $perprocess{$process}->{HIGH_EXT_FRAG} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}; |
| 374 | $perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}; |
| 375 | $perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}; |
| 376 | $perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}; |
| 377 | $perprocess{$process}->{EVENT_UNKNOWN} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN}; |
| 378 | } |
| 379 | } |
| 380 | |
| 381 | sub report() { |
| 382 | if (!$opt_ignorepid) { |
| 383 | dump_stats(\%perprocesspid); |
| 384 | } else { |
| 385 | aggregate_perprocesspid(); |
| 386 | dump_stats(\%perprocess); |
| 387 | } |
| 388 | } |
| 389 | |
| 390 | # Process events or signals until neither is available |
| 391 | sub signal_loop() { |
| 392 | my $sigint_processed; |
| 393 | do { |
| 394 | $sigint_processed = 0; |
| 395 | process_events(); |
| 396 | |
| 397 | # Handle pending signals if any |
| 398 | if ($sigint_pending) { |
| 399 | my $current_time = time; |
| 400 | |
| 401 | if ($sigint_exit) { |
| 402 | print "Received exit signal\n"; |
| 403 | $sigint_pending = 0; |
| 404 | } |
| 405 | if ($sigint_report) { |
| 406 | if ($current_time >= $sigint_received + 2) { |
| 407 | report(); |
| 408 | $sigint_report = 0; |
| 409 | $sigint_pending = 0; |
| 410 | $sigint_processed = 1; |
| 411 | } |
| 412 | } |
| 413 | } |
| 414 | } while ($sigint_pending || $sigint_processed); |
| 415 | } |
| 416 | |
| 417 | signal_loop(); |
| 418 | report(); |