419 lines
12 KiB
Perl
419 lines
12 KiB
Perl
|
#!/usr/bin/env perl
|
||
|
# This is a POC (proof of concept or piece of crap, take your pick) for reading the
|
||
|
# text representation of trace output related to page allocation. It makes an attempt
|
||
|
# to extract some high-level information on what is going on. The accuracy of the parser
|
||
|
# may vary considerably
|
||
|
#
|
||
|
# Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/tracing/trace_pipe
|
||
|
# other options
|
||
|
# --prepend-parent Report on the parent proc and PID
|
||
|
# --read-procstat If the trace lacks process info, get it from /proc
|
||
|
# --ignore-pid Aggregate processes of the same name together
|
||
|
#
|
||
|
# Copyright (c) IBM Corporation 2009
|
||
|
# Author: Mel Gorman <mel@csn.ul.ie>
|
||
|
use strict;
|
||
|
use Getopt::Long;
|
||
|
|
||
|
# Tracepoint events
|
||
|
use constant MM_PAGE_ALLOC => 1;
|
||
|
use constant MM_PAGE_FREE => 2;
|
||
|
use constant MM_PAGE_FREE_BATCHED => 3;
|
||
|
use constant MM_PAGE_PCPU_DRAIN => 4;
|
||
|
use constant MM_PAGE_ALLOC_ZONE_LOCKED => 5;
|
||
|
use constant MM_PAGE_ALLOC_EXTFRAG => 6;
|
||
|
use constant EVENT_UNKNOWN => 7;
|
||
|
|
||
|
# Constants used to track state
|
||
|
use constant STATE_PCPU_PAGES_DRAINED => 8;
|
||
|
use constant STATE_PCPU_PAGES_REFILLED => 9;
|
||
|
|
||
|
# High-level events extrapolated from tracepoints
|
||
|
use constant HIGH_PCPU_DRAINS => 10;
|
||
|
use constant HIGH_PCPU_REFILLS => 11;
|
||
|
use constant HIGH_EXT_FRAGMENT => 12;
|
||
|
use constant HIGH_EXT_FRAGMENT_SEVERE => 13;
|
||
|
use constant HIGH_EXT_FRAGMENT_MODERATE => 14;
|
||
|
use constant HIGH_EXT_FRAGMENT_CHANGED => 15;
|
||
|
|
||
|
my %perprocesspid;
|
||
|
my %perprocess;
|
||
|
my $opt_ignorepid;
|
||
|
my $opt_read_procstat;
|
||
|
my $opt_prepend_parent;
|
||
|
|
||
|
# Catch sigint and exit on request
|
||
|
my $sigint_report = 0;
|
||
|
my $sigint_exit = 0;
|
||
|
my $sigint_pending = 0;
|
||
|
my $sigint_received = 0;
|
||
|
sub sigint_handler {
|
||
|
my $current_time = time;
|
||
|
if ($current_time - 2 > $sigint_received) {
|
||
|
print "SIGINT received, report pending. Hit ctrl-c again to exit\n";
|
||
|
$sigint_report = 1;
|
||
|
} else {
|
||
|
if (!$sigint_exit) {
|
||
|
print "Second SIGINT received quickly, exiting\n";
|
||
|
}
|
||
|
$sigint_exit++;
|
||
|
}
|
||
|
|
||
|
if ($sigint_exit > 3) {
|
||
|
print "Many SIGINTs received, exiting now without report\n";
|
||
|
exit;
|
||
|
}
|
||
|
|
||
|
$sigint_received = $current_time;
|
||
|
$sigint_pending = 1;
|
||
|
}
|
||
|
$SIG{INT} = "sigint_handler";
|
||
|
|
||
|
# Parse command line options
|
||
|
GetOptions(
|
||
|
'ignore-pid' => \$opt_ignorepid,
|
||
|
'read-procstat' => \$opt_read_procstat,
|
||
|
'prepend-parent' => \$opt_prepend_parent,
|
||
|
);
|
||
|
|
||
|
# Defaults for dynamically discovered regex's
|
||
|
my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])';
|
||
|
|
||
|
# Dyanically discovered regex
|
||
|
my $regex_fragdetails;
|
||
|
|
||
|
# Static regex used. Specified like this for readability and for use with /o
|
||
|
# (process_pid) (cpus ) ( time ) (tpoint ) (details)
|
||
|
my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)';
|
||
|
my $regex_statname = '[-0-9]*\s\((.*)\).*';
|
||
|
my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*';
|
||
|
|
||
|
sub generate_traceevent_regex {
|
||
|
my $event = shift;
|
||
|
my $default = shift;
|
||
|
my $regex;
|
||
|
|
||
|
# Read the event format or use the default
|
||
|
if (!open (FORMAT, "/sys/kernel/tracing/events/$event/format")) {
|
||
|
$regex = $default;
|
||
|
} else {
|
||
|
my $line;
|
||
|
while (!eof(FORMAT)) {
|
||
|
$line = <FORMAT>;
|
||
|
if ($line =~ /^print fmt:\s"(.*)",.*/) {
|
||
|
$regex = $1;
|
||
|
$regex =~ s/%p/\([0-9a-f]*\)/g;
|
||
|
$regex =~ s/%d/\([-0-9]*\)/g;
|
||
|
$regex =~ s/%lu/\([0-9]*\)/g;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Verify fields are in the right order
|
||
|
my $tuple;
|
||
|
foreach $tuple (split /\s/, $regex) {
|
||
|
my ($key, $value) = split(/=/, $tuple);
|
||
|
my $expected = shift;
|
||
|
if ($key ne $expected) {
|
||
|
print("WARNING: Format not as expected '$key' != '$expected'");
|
||
|
$regex =~ s/$key=\((.*)\)/$key=$1/;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (defined shift) {
|
||
|
die("Fewer fields than expected in format");
|
||
|
}
|
||
|
|
||
|
return $regex;
|
||
|
}
|
||
|
$regex_fragdetails = generate_traceevent_regex("kmem/mm_page_alloc_extfrag",
|
||
|
$regex_fragdetails_default,
|
||
|
"page", "pfn",
|
||
|
"alloc_order", "fallback_order", "pageblock_order",
|
||
|
"alloc_migratetype", "fallback_migratetype",
|
||
|
"fragmenting", "change_ownership");
|
||
|
|
||
|
sub read_statline($) {
|
||
|
my $pid = $_[0];
|
||
|
my $statline;
|
||
|
|
||
|
if (open(STAT, "/proc/$pid/stat")) {
|
||
|
$statline = <STAT>;
|
||
|
close(STAT);
|
||
|
}
|
||
|
|
||
|
if ($statline eq '') {
|
||
|
$statline = "-1 (UNKNOWN_PROCESS_NAME) R 0";
|
||
|
}
|
||
|
|
||
|
return $statline;
|
||
|
}
|
||
|
|
||
|
sub guess_process_pid($$) {
|
||
|
my $pid = $_[0];
|
||
|
my $statline = $_[1];
|
||
|
|
||
|
if ($pid == 0) {
|
||
|
return "swapper-0";
|
||
|
}
|
||
|
|
||
|
if ($statline !~ /$regex_statname/o) {
|
||
|
die("Failed to math stat line for process name :: $statline");
|
||
|
}
|
||
|
return "$1-$pid";
|
||
|
}
|
||
|
|
||
|
sub parent_info($$) {
|
||
|
my $pid = $_[0];
|
||
|
my $statline = $_[1];
|
||
|
my $ppid;
|
||
|
|
||
|
if ($pid == 0) {
|
||
|
return "NOPARENT-0";
|
||
|
}
|
||
|
|
||
|
if ($statline !~ /$regex_statppid/o) {
|
||
|
die("Failed to match stat line process ppid:: $statline");
|
||
|
}
|
||
|
|
||
|
# Read the ppid stat line
|
||
|
$ppid = $1;
|
||
|
return guess_process_pid($ppid, read_statline($ppid));
|
||
|
}
|
||
|
|
||
|
sub process_events {
|
||
|
my $traceevent;
|
||
|
my $process_pid;
|
||
|
my $cpus;
|
||
|
my $timestamp;
|
||
|
my $tracepoint;
|
||
|
my $details;
|
||
|
my $statline;
|
||
|
|
||
|
# Read each line of the event log
|
||
|
EVENT_PROCESS:
|
||
|
while ($traceevent = <STDIN>) {
|
||
|
if ($traceevent =~ /$regex_traceevent/o) {
|
||
|
$process_pid = $1;
|
||
|
$tracepoint = $4;
|
||
|
|
||
|
if ($opt_read_procstat || $opt_prepend_parent) {
|
||
|
$process_pid =~ /(.*)-([0-9]*)$/;
|
||
|
my $process = $1;
|
||
|
my $pid = $2;
|
||
|
|
||
|
$statline = read_statline($pid);
|
||
|
|
||
|
if ($opt_read_procstat && $process eq '') {
|
||
|
$process_pid = guess_process_pid($pid, $statline);
|
||
|
}
|
||
|
|
||
|
if ($opt_prepend_parent) {
|
||
|
$process_pid = parent_info($pid, $statline) . " :: $process_pid";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Unnecessary in this script. Uncomment if required
|
||
|
# $cpus = $2;
|
||
|
# $timestamp = $3;
|
||
|
} else {
|
||
|
next;
|
||
|
}
|
||
|
|
||
|
# Perl Switch() sucks majorly
|
||
|
if ($tracepoint eq "mm_page_alloc") {
|
||
|
$perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++;
|
||
|
} elsif ($tracepoint eq "mm_page_free") {
|
||
|
$perprocesspid{$process_pid}->{MM_PAGE_FREE}++
|
||
|
} elsif ($tracepoint eq "mm_page_free_batched") {
|
||
|
$perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}++;
|
||
|
} elsif ($tracepoint eq "mm_page_pcpu_drain") {
|
||
|
$perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++;
|
||
|
$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++;
|
||
|
} elsif ($tracepoint eq "mm_page_alloc_zone_locked") {
|
||
|
$perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}++;
|
||
|
$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED}++;
|
||
|
} elsif ($tracepoint eq "mm_page_alloc_extfrag") {
|
||
|
|
||
|
# Extract the details of the event now
|
||
|
$details = $5;
|
||
|
|
||
|
my ($page, $pfn);
|
||
|
my ($alloc_order, $fallback_order, $pageblock_order);
|
||
|
my ($alloc_migratetype, $fallback_migratetype);
|
||
|
my ($fragmenting, $change_ownership);
|
||
|
|
||
|
if ($details !~ /$regex_fragdetails/o) {
|
||
|
print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n";
|
||
|
next;
|
||
|
}
|
||
|
|
||
|
$perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}++;
|
||
|
$page = $1;
|
||
|
$pfn = $2;
|
||
|
$alloc_order = $3;
|
||
|
$fallback_order = $4;
|
||
|
$pageblock_order = $5;
|
||
|
$alloc_migratetype = $6;
|
||
|
$fallback_migratetype = $7;
|
||
|
$fragmenting = $8;
|
||
|
$change_ownership = $9;
|
||
|
|
||
|
if ($fragmenting) {
|
||
|
$perprocesspid{$process_pid}->{HIGH_EXT_FRAG}++;
|
||
|
if ($fallback_order <= 3) {
|
||
|
$perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}++;
|
||
|
} else {
|
||
|
$perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}++;
|
||
|
}
|
||
|
}
|
||
|
if ($change_ownership) {
|
||
|
$perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}++;
|
||
|
}
|
||
|
} else {
|
||
|
$perprocesspid{$process_pid}->{EVENT_UNKNOWN}++;
|
||
|
}
|
||
|
|
||
|
# Catch a full pcpu drain event
|
||
|
if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} &&
|
||
|
$tracepoint ne "mm_page_pcpu_drain") {
|
||
|
|
||
|
$perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}++;
|
||
|
$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
|
||
|
}
|
||
|
|
||
|
# Catch a full pcpu refill event
|
||
|
if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} &&
|
||
|
$tracepoint ne "mm_page_alloc_zone_locked") {
|
||
|
$perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}++;
|
||
|
$perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
|
||
|
}
|
||
|
|
||
|
if ($sigint_pending) {
|
||
|
last EVENT_PROCESS;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
sub dump_stats {
|
||
|
my $hashref = shift;
|
||
|
my %stats = %$hashref;
|
||
|
|
||
|
# Dump per-process stats
|
||
|
my $process_pid;
|
||
|
my $max_strlen = 0;
|
||
|
|
||
|
# Get the maximum process name
|
||
|
foreach $process_pid (keys %perprocesspid) {
|
||
|
my $len = length($process_pid);
|
||
|
if ($len > $max_strlen) {
|
||
|
$max_strlen = $len;
|
||
|
}
|
||
|
}
|
||
|
$max_strlen += 2;
|
||
|
|
||
|
printf("\n");
|
||
|
printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
|
||
|
"Process", "Pages", "Pages", "Pages", "Pages", "PCPU", "PCPU", "PCPU", "Fragment", "Fragment", "MigType", "Fragment", "Fragment", "Unknown");
|
||
|
printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
|
||
|
"details", "allocd", "allocd", "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing", "Changed", "Severe", "Moderate", "");
|
||
|
|
||
|
printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
|
||
|
"", "", "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", "");
|
||
|
|
||
|
foreach $process_pid (keys %stats) {
|
||
|
# Dump final aggregates
|
||
|
if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED}) {
|
||
|
$stats{$process_pid}->{HIGH_PCPU_DRAINS}++;
|
||
|
$stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
|
||
|
}
|
||
|
if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED}) {
|
||
|
$stats{$process_pid}->{HIGH_PCPU_REFILLS}++;
|
||
|
$stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
|
||
|
}
|
||
|
|
||
|
printf("%-" . $max_strlen . "s %8d %10d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d\n",
|
||
|
$process_pid,
|
||
|
$stats{$process_pid}->{MM_PAGE_ALLOC},
|
||
|
$stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED},
|
||
|
$stats{$process_pid}->{MM_PAGE_FREE},
|
||
|
$stats{$process_pid}->{MM_PAGE_FREE_BATCHED},
|
||
|
$stats{$process_pid}->{MM_PAGE_PCPU_DRAIN},
|
||
|
$stats{$process_pid}->{HIGH_PCPU_DRAINS},
|
||
|
$stats{$process_pid}->{HIGH_PCPU_REFILLS},
|
||
|
$stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG},
|
||
|
$stats{$process_pid}->{HIGH_EXT_FRAG},
|
||
|
$stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED},
|
||
|
$stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE},
|
||
|
$stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE},
|
||
|
$stats{$process_pid}->{EVENT_UNKNOWN});
|
||
|
}
|
||
|
}
|
||
|
|
||
|
sub aggregate_perprocesspid() {
|
||
|
my $process_pid;
|
||
|
my $process;
|
||
|
undef %perprocess;
|
||
|
|
||
|
foreach $process_pid (keys %perprocesspid) {
|
||
|
$process = $process_pid;
|
||
|
$process =~ s/-([0-9])*$//;
|
||
|
if ($process eq '') {
|
||
|
$process = "NO_PROCESS_NAME";
|
||
|
}
|
||
|
|
||
|
$perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC};
|
||
|
$perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED};
|
||
|
$perprocess{$process}->{MM_PAGE_FREE} += $perprocesspid{$process_pid}->{MM_PAGE_FREE};
|
||
|
$perprocess{$process}->{MM_PAGE_FREE_BATCHED} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED};
|
||
|
$perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN};
|
||
|
$perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS};
|
||
|
$perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS};
|
||
|
$perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG};
|
||
|
$perprocess{$process}->{HIGH_EXT_FRAG} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG};
|
||
|
$perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED};
|
||
|
$perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE};
|
||
|
$perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE};
|
||
|
$perprocess{$process}->{EVENT_UNKNOWN} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN};
|
||
|
}
|
||
|
}
|
||
|
|
||
|
sub report() {
|
||
|
if (!$opt_ignorepid) {
|
||
|
dump_stats(\%perprocesspid);
|
||
|
} else {
|
||
|
aggregate_perprocesspid();
|
||
|
dump_stats(\%perprocess);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Process events or signals until neither is available
|
||
|
sub signal_loop() {
|
||
|
my $sigint_processed;
|
||
|
do {
|
||
|
$sigint_processed = 0;
|
||
|
process_events();
|
||
|
|
||
|
# Handle pending signals if any
|
||
|
if ($sigint_pending) {
|
||
|
my $current_time = time;
|
||
|
|
||
|
if ($sigint_exit) {
|
||
|
print "Received exit signal\n";
|
||
|
$sigint_pending = 0;
|
||
|
}
|
||
|
if ($sigint_report) {
|
||
|
if ($current_time >= $sigint_received + 2) {
|
||
|
report();
|
||
|
$sigint_report = 0;
|
||
|
$sigint_pending = 0;
|
||
|
$sigint_processed = 1;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
} while ($sigint_pending || $sigint_processed);
|
||
|
}
|
||
|
|
||
|
signal_loop();
|
||
|
report();
|