Hi,

On 2020-08-12 16:47:13 -0700, Peter Geoghegan wrote:
> On Mon, Aug 10, 2020 at 5:41 PM Andres Freund <and...@anarazel.de> wrote:
> > Most of the cases where this kind of information really is interesting
> > seem to benefit a lot from having stack information available.  That
> > obviously has overhead, so we don't want the cost all the
> > time.  The script at
> > https://postgr.es/m/20170622210845.d2hsbqv6rxu2tiye%40alap3.anarazel.de
> > can give you results like e.g.
> > https://anarazel.de/t/2017-06-22/pgsemwait_64_async.svg
> 
> It seems to have bitrot. Do you have a more recent version of the script?

Attached. Needed one python3 fix, and to be adapted so it works with
futex based semaphores. Seems to work for both sysv and posix semaphores
now, based a very short test.

sudo python3 ./pgsemwait.py -x 
/home/andres/build/postgres/dev-optimize/vpath/src/backend/postgres -f 
3|~/src/flamegraph/flamegraph.pl

Will add a note to the other thread.

Greetings,

Andres Freund
#!/usr/bin/python

from __future__ import print_function
from bcc import BPF
from time import sleep
import argparse
import signal

def positive_int(val):
    try:
        ival = int(val)
    except ValueError:
        raise argparse.ArgumentTypeError("must be an integer")

    if ival < 0:
        raise argparse.ArgumentTypeError("must be positive")
    return ival

def positive_nonzero_int(val):
    ival = positive_int(val)
    if ival == 0:
        raise argparse.ArgumentTypeError("must be nonzero")
    return ival

bpf_text = """
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>

struct stats_key_t {
    u32 pid;
    int user_stack_id;
};

struct stats_value_t {
    u64 total_time;
};

struct start_key_t {
    u32 pid;
};

struct start_value_t {
    u64 last_start;
};

// map_type, key_type, leaf_type, table_name, num_entry
BPF_HASH(stats, struct stats_key_t, struct stats_value_t);
BPF_HASH(start, struct start_key_t, struct start_value_t);

BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE);

int trace_sem_entry(struct pt_regs *ctx)
{
    u32 pid = bpf_get_current_pid_tgid();
    struct start_key_t start_key = {};
    struct start_value_t start_value = {};

    if (!(THREAD_FILTER)) {
        return 0;
    }

    start_key.pid = pid;
    start_value.last_start = bpf_ktime_get_ns();

    start.update(&start_key, &start_value);

    return 0;
}

int trace_sem_return(struct pt_regs *ctx)
{
    u32 pid = bpf_get_current_pid_tgid();
    struct stats_key_t stats_key = {};
    struct start_key_t start_key = {};
    struct stats_value_t zero = {};
    struct stats_value_t *stats_value;
    struct start_value_t *start_value;
    u64 delta;

    if (!(THREAD_FILTER)) {
        return 0;
    }

    start_key.pid = pid;
    start_value = start.lookup(&start_key);

    if (!start_value)
        return 0; /* missed start */;

    delta = bpf_ktime_get_ns() - start_value->last_start;
    start.delete(&start_key);

    stats_key.pid = pid;
    stats_key.user_stack_id = stack_traces.get_stackid(ctx, BPF_F_REUSE_STACKID | BPF_F_USER_STACK);

    stats_value = stats.lookup_or_init(&stats_key, &zero);
    stats_value->total_time += delta;

    return 0;
}

"""

examples = """examples:
    ./pgsemwait.py -x BINARY        # trace postgres lwlock wait time until Ctrl-C
    ./pgsemwait.py -x BINARY 5      # trace for 5 seconds only
    ./pgsemwait.py -x BINARY -p PID # trace PID
"""

parser = argparse.ArgumentParser(
    description="Measure Postgres LWLock Wait Time",
    formatter_class=argparse.RawDescriptionHelpFormatter,
    epilog=examples)
parser.add_argument("-x", "--binary", metavar="BINARY", dest="binary", required = True,
    help="path to postgres binary")
parser.add_argument("-p", "--pid", metavar="PID", dest="pid",
    help="trace this PID only", type=positive_int)
parser.add_argument("-f", "--folded", action="store_true",
    help="output folded format")
parser.add_argument("duration", nargs="?", default=99999999,
    type=positive_nonzero_int,
    help="duration of trace, in seconds")
parser.add_argument("--stack-storage-size", default=1024,
    type=positive_nonzero_int,
    help="the number of unique stack traces that can be stored and "
         "displayed (default 1024)")

args = parser.parse_args()

folded = args.folded
duration = int(args.duration)

# set stack storage size
bpf_text = bpf_text.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size))

# setup pid filter
thread_filter = '1'
if args.pid is not None:
    thread_filter = 'pid == %d' % args.pid
bpf_text = bpf_text.replace('THREAD_FILTER', thread_filter)

binary = args.binary

b = BPF(text=bpf_text)

libpath = BPF.find_exe(binary)
if not libpath:
    bail("can't resolve library %s" % library)
library = libpath

b.attach_uprobe(name=library, sym_re='PGSemaphoreLock|futex',
    fn_name="trace_sem_entry",
    pid = -1)

b.attach_uretprobe(name=library, sym_re='PGSemaphoreLock|futex',
    fn_name="trace_sem_return",
    pid = -1)

matched = b.num_open_uprobes()
if matched == 0:
    print("error: 0 functions traced. Exiting.", file=stderr)
    exit(1)

# signal handler
def signal_ignore(signal, frame):
    print()

signal.signal(signal.SIGINT, signal_ignore)

sleep(duration)

stats = b.get_table("stats")
stack_traces = b.get_table("stack_traces")

for k, v in stats.items():
    user_stack = [] if k.user_stack_id < 0 else \
        stack_traces.walk(k.user_stack_id)
    name = 'postgres'
    if v.total_time == 0:
        continue
    if folded:
        # print folded stack output
        user_stack = list(user_stack)
        line = [name] + \
            [b.sym(addr, k.pid).decode() for addr in reversed(user_stack)]
        print("%s %d" % (";".join(line), v.total_time))
    else:
        # print default multi-line stack output
        for addr in user_stack:
            print("    %s" % b.sym(addr, k.pid))
        print("    %-16s %s (%d)" % ("-", name, k.pid))
        print("        %d\n" % v.total_time)

Reply via email to