123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419 |
- #!/usr/bin/perl
- # This is a POC (proof of concept or piece of crap, take your pick) for reading the
- # text representation of trace output related to page allocation. It makes an attempt
- # to extract some high-level information on what is going on. The accuracy of the parser
- # may vary considerably
- #
- # Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe
- # other options
- # --prepend-parent Report on the parent proc and PID
- # --read-procstat If the trace lacks process info, get it from /proc
- # --ignore-pid Aggregate processes of the same name together
- #
- # Copyright (c) IBM Corporation 2009
- # Author: Mel Gorman <mel@csn.ul.ie>
- use strict;
- use Getopt::Long;
- # Tracepoint events
- use constant MM_PAGE_ALLOC => 1;
- use constant MM_PAGE_FREE => 2;
- use constant MM_PAGE_FREE_BATCHED => 3;
- use constant MM_PAGE_PCPU_DRAIN => 4;
- use constant MM_PAGE_ALLOC_ZONE_LOCKED => 5;
- use constant MM_PAGE_ALLOC_EXTFRAG => 6;
- use constant EVENT_UNKNOWN => 7;
- # Constants used to track state
- use constant STATE_PCPU_PAGES_DRAINED => 8;
- use constant STATE_PCPU_PAGES_REFILLED => 9;
- # High-level events extrapolated from tracepoints
- use constant HIGH_PCPU_DRAINS => 10;
- use constant HIGH_PCPU_REFILLS => 11;
- use constant HIGH_EXT_FRAGMENT => 12;
- use constant HIGH_EXT_FRAGMENT_SEVERE => 13;
- use constant HIGH_EXT_FRAGMENT_MODERATE => 14;
- use constant HIGH_EXT_FRAGMENT_CHANGED => 15;
- my %perprocesspid;
- my %perprocess;
- my $opt_ignorepid;
- my $opt_read_procstat;
- my $opt_prepend_parent;
- # Catch sigint and exit on request
- my $sigint_report = 0;
- my $sigint_exit = 0;
- my $sigint_pending = 0;
- my $sigint_received = 0;
- sub sigint_handler {
- my $current_time = time;
- if ($current_time - 2 > $sigint_received) {
- print "SIGINT received, report pending. Hit ctrl-c again to exit\n";
- $sigint_report = 1;
- } else {
- if (!$sigint_exit) {
- print "Second SIGINT received quickly, exiting\n";
- }
- $sigint_exit++;
- }
- if ($sigint_exit > 3) {
- print "Many SIGINTs received, exiting now without report\n";
- exit;
- }
- $sigint_received = $current_time;
- $sigint_pending = 1;
- }
- $SIG{INT} = "sigint_handler";
- # Parse command line options
- GetOptions(
- 'ignore-pid' => \$opt_ignorepid,
- 'read-procstat' => \$opt_read_procstat,
- 'prepend-parent' => \$opt_prepend_parent,
- );
- # Defaults for dynamically discovered regex's
- my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])';
- # Dyanically discovered regex
- my $regex_fragdetails;
- # Static regex used. Specified like this for readability and for use with /o
- # (process_pid) (cpus ) ( time ) (tpoint ) (details)
- my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)';
- my $regex_statname = '[-0-9]*\s\((.*)\).*';
- my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*';
- sub generate_traceevent_regex {
- my $event = shift;
- my $default = shift;
- my $regex;
- # Read the event format or use the default
- if (!open (FORMAT, "/sys/kernel/debug/tracing/events/$event/format")) {
- $regex = $default;
- } else {
- my $line;
- while (!eof(FORMAT)) {
- $line = <FORMAT>;
- if ($line =~ /^print fmt:\s"(.*)",.*/) {
- $regex = $1;
- $regex =~ s/%p/\([0-9a-f]*\)/g;
- $regex =~ s/%d/\([-0-9]*\)/g;
- $regex =~ s/%lu/\([0-9]*\)/g;
- }
- }
- }
- # Verify fields are in the right order
- my $tuple;
- foreach $tuple (split /\s/, $regex) {
- my ($key, $value) = split(/=/, $tuple);
- my $expected = shift;
- if ($key ne $expected) {
- print("WARNING: Format not as expected '$key' != '$expected'");
- $regex =~ s/$key=\((.*)\)/$key=$1/;
- }
- }
- if (defined shift) {
- die("Fewer fields than expected in format");
- }
- return $regex;
- }
- $regex_fragdetails = generate_traceevent_regex("kmem/mm_page_alloc_extfrag",
- $regex_fragdetails_default,
- "page", "pfn",
- "alloc_order", "fallback_order", "pageblock_order",
- "alloc_migratetype", "fallback_migratetype",
- "fragmenting", "change_ownership");
- sub read_statline($) {
- my $pid = $_[0];
- my $statline;
- if (open(STAT, "/proc/$pid/stat")) {
- $statline = <STAT>;
- close(STAT);
- }
- if ($statline eq '') {
- $statline = "-1 (UNKNOWN_PROCESS_NAME) R 0";
- }
- return $statline;
- }
- sub guess_process_pid($$) {
- my $pid = $_[0];
- my $statline = $_[1];
- if ($pid == 0) {
- return "swapper-0";
- }
- if ($statline !~ /$regex_statname/o) {
- die("Failed to math stat line for process name :: $statline");
- }
- return "$1-$pid";
- }
- sub parent_info($$) {
- my $pid = $_[0];
- my $statline = $_[1];
- my $ppid;
- if ($pid == 0) {
- return "NOPARENT-0";
- }
- if ($statline !~ /$regex_statppid/o) {
- die("Failed to match stat line process ppid:: $statline");
- }
- # Read the ppid stat line
- $ppid = $1;
- return guess_process_pid($ppid, read_statline($ppid));
- }
- sub process_events {
- my $traceevent;
- my $process_pid;
- my $cpus;
- my $timestamp;
- my $tracepoint;
- my $details;
- my $statline;
- # Read each line of the event log
- EVENT_PROCESS:
- while ($traceevent = <STDIN>) {
- if ($traceevent =~ /$regex_traceevent/o) {
- $process_pid = $1;
- $tracepoint = $4;
- if ($opt_read_procstat || $opt_prepend_parent) {
- $process_pid =~ /(.*)-([0-9]*)$/;
- my $process = $1;
- my $pid = $2;
- $statline = read_statline($pid);
- if ($opt_read_procstat && $process eq '') {
- $process_pid = guess_process_pid($pid, $statline);
- }
- if ($opt_prepend_parent) {
- $process_pid = parent_info($pid, $statline) . " :: $process_pid";
- }
- }
- # Unnecessary in this script. Uncomment if required
- # $cpus = $2;
- # $timestamp = $3;
- } else {
- next;
- }
- # Perl Switch() sucks majorly
- if ($tracepoint eq "mm_page_alloc") {
- $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++;
- } elsif ($tracepoint eq "mm_page_free") {
- $perprocesspid{$process_pid}->{MM_PAGE_FREE}++
- } elsif ($tracepoint eq "mm_page_free_batched") {
- $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}++;
- } elsif ($tracepoint eq "mm_page_pcpu_drain") {
- $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++;
- $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++;
- } elsif ($tracepoint eq "mm_page_alloc_zone_locked") {
- $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}++;
- $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED}++;
- } elsif ($tracepoint eq "mm_page_alloc_extfrag") {
- # Extract the details of the event now
- $details = $5;
- my ($page, $pfn);
- my ($alloc_order, $fallback_order, $pageblock_order);
- my ($alloc_migratetype, $fallback_migratetype);
- my ($fragmenting, $change_ownership);
- if ($details !~ /$regex_fragdetails/o) {
- print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n";
- next;
- }
- $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}++;
- $page = $1;
- $pfn = $2;
- $alloc_order = $3;
- $fallback_order = $4;
- $pageblock_order = $5;
- $alloc_migratetype = $6;
- $fallback_migratetype = $7;
- $fragmenting = $8;
- $change_ownership = $9;
- if ($fragmenting) {
- $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}++;
- if ($fallback_order <= 3) {
- $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}++;
- } else {
- $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}++;
- }
- }
- if ($change_ownership) {
- $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}++;
- }
- } else {
- $perprocesspid{$process_pid}->{EVENT_UNKNOWN}++;
- }
- # Catch a full pcpu drain event
- if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} &&
- $tracepoint ne "mm_page_pcpu_drain") {
- $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}++;
- $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
- }
- # Catch a full pcpu refill event
- if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} &&
- $tracepoint ne "mm_page_alloc_zone_locked") {
- $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}++;
- $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
- }
- if ($sigint_pending) {
- last EVENT_PROCESS;
- }
- }
- }
- sub dump_stats {
- my $hashref = shift;
- my %stats = %$hashref;
- # Dump per-process stats
- my $process_pid;
- my $max_strlen = 0;
- # Get the maximum process name
- foreach $process_pid (keys %perprocesspid) {
- my $len = length($process_pid);
- if ($len > $max_strlen) {
- $max_strlen = $len;
- }
- }
- $max_strlen += 2;
- printf("\n");
- printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
- "Process", "Pages", "Pages", "Pages", "Pages", "PCPU", "PCPU", "PCPU", "Fragment", "Fragment", "MigType", "Fragment", "Fragment", "Unknown");
- printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
- "details", "allocd", "allocd", "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing", "Changed", "Severe", "Moderate", "");
- printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
- "", "", "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", "");
- foreach $process_pid (keys %stats) {
- # Dump final aggregates
- if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED}) {
- $stats{$process_pid}->{HIGH_PCPU_DRAINS}++;
- $stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
- }
- if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED}) {
- $stats{$process_pid}->{HIGH_PCPU_REFILLS}++;
- $stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
- }
- printf("%-" . $max_strlen . "s %8d %10d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d\n",
- $process_pid,
- $stats{$process_pid}->{MM_PAGE_ALLOC},
- $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED},
- $stats{$process_pid}->{MM_PAGE_FREE},
- $stats{$process_pid}->{MM_PAGE_FREE_BATCHED},
- $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN},
- $stats{$process_pid}->{HIGH_PCPU_DRAINS},
- $stats{$process_pid}->{HIGH_PCPU_REFILLS},
- $stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG},
- $stats{$process_pid}->{HIGH_EXT_FRAG},
- $stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED},
- $stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE},
- $stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE},
- $stats{$process_pid}->{EVENT_UNKNOWN});
- }
- }
- sub aggregate_perprocesspid() {
- my $process_pid;
- my $process;
- undef %perprocess;
- foreach $process_pid (keys %perprocesspid) {
- $process = $process_pid;
- $process =~ s/-([0-9])*$//;
- if ($process eq '') {
- $process = "NO_PROCESS_NAME";
- }
- $perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC};
- $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED};
- $perprocess{$process}->{MM_PAGE_FREE} += $perprocesspid{$process_pid}->{MM_PAGE_FREE};
- $perprocess{$process}->{MM_PAGE_FREE_BATCHED} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED};
- $perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN};
- $perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS};
- $perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS};
- $perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG};
- $perprocess{$process}->{HIGH_EXT_FRAG} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG};
- $perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED};
- $perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE};
- $perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE};
- $perprocess{$process}->{EVENT_UNKNOWN} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN};
- }
- }
- sub report() {
- if (!$opt_ignorepid) {
- dump_stats(\%perprocesspid);
- } else {
- aggregate_perprocesspid();
- dump_stats(\%perprocess);
- }
- }
- # Process events or signals until neither is available
- sub signal_loop() {
- my $sigint_processed;
- do {
- $sigint_processed = 0;
- process_events();
- # Handle pending signals if any
- if ($sigint_pending) {
- my $current_time = time;
- if ($sigint_exit) {
- print "Received exit signal\n";
- $sigint_pending = 0;
- }
- if ($sigint_report) {
- if ($current_time >= $sigint_received + 2) {
- report();
- $sigint_report = 0;
- $sigint_pending = 0;
- $sigint_processed = 1;
- }
- }
- }
- } while ($sigint_pending || $sigint_processed);
- }
- signal_loop();
- report();
|