blob: 0a120aae33ce5c9836dad948b18736ea183e6801 [file] [log] [blame]
Mel Gormanc9d05cf2009-09-21 17:02:47 -07001#!/usr/bin/perl
2# This is a POC (proof of concept or piece of crap, take your pick) for reading the
3# text representation of trace output related to page allocation. It makes an attempt
4# to extract some high-level information on what is going on. The accuracy of the parser
5# may vary considerably
6#
7# Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe
8# other options
9# --prepend-parent Report on the parent proc and PID
10# --read-procstat If the trace lacks process info, get it from /proc
11# --ignore-pid Aggregate processes of the same name together
12#
13# Copyright (c) IBM Corporation 2009
14# Author: Mel Gorman <mel@csn.ul.ie>
15use strict;
16use Getopt::Long;
17
18# Tracepoint events
19use constant MM_PAGE_ALLOC => 1;
Konstantin Khlebnikovb413d482012-01-10 15:07:09 -080020use constant MM_PAGE_FREE => 2;
21use constant MM_PAGE_FREE_BATCHED => 3;
Mel Gormanc9d05cf2009-09-21 17:02:47 -070022use constant MM_PAGE_PCPU_DRAIN => 4;
23use constant MM_PAGE_ALLOC_ZONE_LOCKED => 5;
24use constant MM_PAGE_ALLOC_EXTFRAG => 6;
25use constant EVENT_UNKNOWN => 7;
26
27# Constants used to track state
28use constant STATE_PCPU_PAGES_DRAINED => 8;
29use constant STATE_PCPU_PAGES_REFILLED => 9;
30
31# High-level events extrapolated from tracepoints
32use constant HIGH_PCPU_DRAINS => 10;
33use constant HIGH_PCPU_REFILLS => 11;
34use constant HIGH_EXT_FRAGMENT => 12;
35use constant HIGH_EXT_FRAGMENT_SEVERE => 13;
36use constant HIGH_EXT_FRAGMENT_MODERATE => 14;
37use constant HIGH_EXT_FRAGMENT_CHANGED => 15;
38
39my %perprocesspid;
40my %perprocess;
41my $opt_ignorepid;
42my $opt_read_procstat;
43my $opt_prepend_parent;
44
45# Catch sigint and exit on request
46my $sigint_report = 0;
47my $sigint_exit = 0;
48my $sigint_pending = 0;
49my $sigint_received = 0;
50sub sigint_handler {
51 my $current_time = time;
52 if ($current_time - 2 > $sigint_received) {
53 print "SIGINT received, report pending. Hit ctrl-c again to exit\n";
54 $sigint_report = 1;
55 } else {
56 if (!$sigint_exit) {
57 print "Second SIGINT received quickly, exiting\n";
58 }
59 $sigint_exit++;
60 }
61
62 if ($sigint_exit > 3) {
63 print "Many SIGINTs received, exiting now without report\n";
64 exit;
65 }
66
67 $sigint_received = $current_time;
68 $sigint_pending = 1;
69}
70$SIG{INT} = "sigint_handler";
71
72# Parse command line options
73GetOptions(
74 'ignore-pid' => \$opt_ignorepid,
75 'read-procstat' => \$opt_read_procstat,
76 'prepend-parent' => \$opt_prepend_parent,
77);
78
79# Defaults for dynamically discovered regex's
80my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])';
81
82# Dyanically discovered regex
83my $regex_fragdetails;
84
85# Static regex used. Specified like this for readability and for use with /o
86# (process_pid) (cpus ) ( time ) (tpoint ) (details)
87my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)';
88my $regex_statname = '[-0-9]*\s\((.*)\).*';
89my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*';
90
91sub generate_traceevent_regex {
92 my $event = shift;
93 my $default = shift;
94 my $regex;
95
96 # Read the event format or use the default
97 if (!open (FORMAT, "/sys/kernel/debug/tracing/events/$event/format")) {
98 $regex = $default;
99 } else {
100 my $line;
101 while (!eof(FORMAT)) {
102 $line = <FORMAT>;
103 if ($line =~ /^print fmt:\s"(.*)",.*/) {
104 $regex = $1;
105 $regex =~ s/%p/\([0-9a-f]*\)/g;
106 $regex =~ s/%d/\([-0-9]*\)/g;
107 $regex =~ s/%lu/\([0-9]*\)/g;
108 }
109 }
110 }
111
112 # Verify fields are in the right order
113 my $tuple;
114 foreach $tuple (split /\s/, $regex) {
115 my ($key, $value) = split(/=/, $tuple);
116 my $expected = shift;
117 if ($key ne $expected) {
118 print("WARNING: Format not as expected '$key' != '$expected'");
119 $regex =~ s/$key=\((.*)\)/$key=$1/;
120 }
121 }
122
123 if (defined shift) {
124 die("Fewer fields than expected in format");
125 }
126
127 return $regex;
128}
129$regex_fragdetails = generate_traceevent_regex("kmem/mm_page_alloc_extfrag",
130 $regex_fragdetails_default,
131 "page", "pfn",
132 "alloc_order", "fallback_order", "pageblock_order",
133 "alloc_migratetype", "fallback_migratetype",
134 "fragmenting", "change_ownership");
135
136sub read_statline($) {
137 my $pid = $_[0];
138 my $statline;
139
140 if (open(STAT, "/proc/$pid/stat")) {
141 $statline = <STAT>;
142 close(STAT);
143 }
144
145 if ($statline eq '') {
146 $statline = "-1 (UNKNOWN_PROCESS_NAME) R 0";
147 }
148
149 return $statline;
150}
151
152sub guess_process_pid($$) {
153 my $pid = $_[0];
154 my $statline = $_[1];
155
156 if ($pid == 0) {
157 return "swapper-0";
158 }
159
160 if ($statline !~ /$regex_statname/o) {
161 die("Failed to math stat line for process name :: $statline");
162 }
163 return "$1-$pid";
164}
165
166sub parent_info($$) {
167 my $pid = $_[0];
168 my $statline = $_[1];
169 my $ppid;
170
171 if ($pid == 0) {
172 return "NOPARENT-0";
173 }
174
175 if ($statline !~ /$regex_statppid/o) {
176 die("Failed to match stat line process ppid:: $statline");
177 }
178
179 # Read the ppid stat line
180 $ppid = $1;
181 return guess_process_pid($ppid, read_statline($ppid));
182}
183
184sub process_events {
185 my $traceevent;
186 my $process_pid;
187 my $cpus;
188 my $timestamp;
189 my $tracepoint;
190 my $details;
191 my $statline;
192
193 # Read each line of the event log
194EVENT_PROCESS:
195 while ($traceevent = <STDIN>) {
196 if ($traceevent =~ /$regex_traceevent/o) {
197 $process_pid = $1;
198 $tracepoint = $4;
199
200 if ($opt_read_procstat || $opt_prepend_parent) {
201 $process_pid =~ /(.*)-([0-9]*)$/;
202 my $process = $1;
203 my $pid = $2;
204
205 $statline = read_statline($pid);
206
207 if ($opt_read_procstat && $process eq '') {
208 $process_pid = guess_process_pid($pid, $statline);
209 }
210
211 if ($opt_prepend_parent) {
212 $process_pid = parent_info($pid, $statline) . " :: $process_pid";
213 }
214 }
215
216 # Unnecessary in this script. Uncomment if required
217 # $cpus = $2;
218 # $timestamp = $3;
219 } else {
220 next;
221 }
222
223 # Perl Switch() sucks majorly
224 if ($tracepoint eq "mm_page_alloc") {
225 $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++;
Konstantin Khlebnikovb413d482012-01-10 15:07:09 -0800226 } elsif ($tracepoint eq "mm_page_free") {
227 $perprocesspid{$process_pid}->{MM_PAGE_FREE}++
228 } elsif ($tracepoint eq "mm_page_free_batched") {
229 $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}++;
Mel Gormanc9d05cf2009-09-21 17:02:47 -0700230 } elsif ($tracepoint eq "mm_page_pcpu_drain") {
231 $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++;
232 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++;
233 } elsif ($tracepoint eq "mm_page_alloc_zone_locked") {
234 $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}++;
235 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED}++;
236 } elsif ($tracepoint eq "mm_page_alloc_extfrag") {
237
238 # Extract the details of the event now
239 $details = $5;
240
241 my ($page, $pfn);
242 my ($alloc_order, $fallback_order, $pageblock_order);
243 my ($alloc_migratetype, $fallback_migratetype);
244 my ($fragmenting, $change_ownership);
245
246 if ($details !~ /$regex_fragdetails/o) {
247 print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n";
248 next;
249 }
250
251 $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}++;
252 $page = $1;
253 $pfn = $2;
254 $alloc_order = $3;
255 $fallback_order = $4;
256 $pageblock_order = $5;
257 $alloc_migratetype = $6;
258 $fallback_migratetype = $7;
259 $fragmenting = $8;
260 $change_ownership = $9;
261
262 if ($fragmenting) {
263 $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}++;
264 if ($fallback_order <= 3) {
265 $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}++;
266 } else {
267 $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}++;
268 }
269 }
270 if ($change_ownership) {
271 $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}++;
272 }
273 } else {
274 $perprocesspid{$process_pid}->{EVENT_UNKNOWN}++;
275 }
276
277 # Catch a full pcpu drain event
278 if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} &&
279 $tracepoint ne "mm_page_pcpu_drain") {
280
281 $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}++;
282 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
283 }
284
285 # Catch a full pcpu refill event
286 if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} &&
287 $tracepoint ne "mm_page_alloc_zone_locked") {
288 $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}++;
289 $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
290 }
291
292 if ($sigint_pending) {
293 last EVENT_PROCESS;
294 }
295 }
296}
297
298sub dump_stats {
299 my $hashref = shift;
300 my %stats = %$hashref;
301
302 # Dump per-process stats
303 my $process_pid;
304 my $max_strlen = 0;
305
306 # Get the maximum process name
307 foreach $process_pid (keys %perprocesspid) {
308 my $len = length($process_pid);
309 if ($len > $max_strlen) {
310 $max_strlen = $len;
311 }
312 }
313 $max_strlen += 2;
314
315 printf("\n");
316 printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
317 "Process", "Pages", "Pages", "Pages", "Pages", "PCPU", "PCPU", "PCPU", "Fragment", "Fragment", "MigType", "Fragment", "Fragment", "Unknown");
318 printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
319 "details", "allocd", "allocd", "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing", "Changed", "Severe", "Moderate", "");
320
321 printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
322 "", "", "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", "");
323
324 foreach $process_pid (keys %stats) {
325 # Dump final aggregates
326 if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED}) {
327 $stats{$process_pid}->{HIGH_PCPU_DRAINS}++;
328 $stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
329 }
330 if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED}) {
331 $stats{$process_pid}->{HIGH_PCPU_REFILLS}++;
332 $stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
333 }
334
335 printf("%-" . $max_strlen . "s %8d %10d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d\n",
336 $process_pid,
337 $stats{$process_pid}->{MM_PAGE_ALLOC},
338 $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED},
Konstantin Khlebnikovb413d482012-01-10 15:07:09 -0800339 $stats{$process_pid}->{MM_PAGE_FREE},
340 $stats{$process_pid}->{MM_PAGE_FREE_BATCHED},
Mel Gormanc9d05cf2009-09-21 17:02:47 -0700341 $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN},
342 $stats{$process_pid}->{HIGH_PCPU_DRAINS},
343 $stats{$process_pid}->{HIGH_PCPU_REFILLS},
344 $stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG},
345 $stats{$process_pid}->{HIGH_EXT_FRAG},
346 $stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED},
347 $stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE},
348 $stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE},
349 $stats{$process_pid}->{EVENT_UNKNOWN});
350 }
351}
352
353sub aggregate_perprocesspid() {
354 my $process_pid;
355 my $process;
356 undef %perprocess;
357
358 foreach $process_pid (keys %perprocesspid) {
359 $process = $process_pid;
360 $process =~ s/-([0-9])*$//;
361 if ($process eq '') {
362 $process = "NO_PROCESS_NAME";
363 }
364
365 $perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC};
366 $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED};
Konstantin Khlebnikovb413d482012-01-10 15:07:09 -0800367 $perprocess{$process}->{MM_PAGE_FREE} += $perprocesspid{$process_pid}->{MM_PAGE_FREE};
368 $perprocess{$process}->{MM_PAGE_FREE_BATCHED} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED};
Mel Gormanc9d05cf2009-09-21 17:02:47 -0700369 $perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN};
370 $perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS};
371 $perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS};
372 $perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG};
373 $perprocess{$process}->{HIGH_EXT_FRAG} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG};
374 $perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED};
375 $perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE};
376 $perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE};
377 $perprocess{$process}->{EVENT_UNKNOWN} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN};
378 }
379}
380
381sub report() {
382 if (!$opt_ignorepid) {
383 dump_stats(\%perprocesspid);
384 } else {
385 aggregate_perprocesspid();
386 dump_stats(\%perprocess);
387 }
388}
389
390# Process events or signals until neither is available
391sub signal_loop() {
392 my $sigint_processed;
393 do {
394 $sigint_processed = 0;
395 process_events();
396
397 # Handle pending signals if any
398 if ($sigint_pending) {
399 my $current_time = time;
400
401 if ($sigint_exit) {
402 print "Received exit signal\n";
403 $sigint_pending = 0;
404 }
405 if ($sigint_report) {
406 if ($current_time >= $sigint_received + 2) {
407 report();
408 $sigint_report = 0;
409 $sigint_pending = 0;
410 $sigint_processed = 1;
411 }
412 }
413 }
414 } while ($sigint_pending || $sigint_processed);
415}
416
417signal_loop();
418report();