Hi, On 2020-08-12 16:47:13 -0700, Peter Geoghegan wrote: > On Mon, Aug 10, 2020 at 5:41 PM Andres Freund <and...@anarazel.de> wrote: > > Most of the cases where this kind of information really is interesting > > seem to benefit a lot from having stack information available. That > > obviously has overhead, so we don't want the cost all the > > time. The script at > > https://postgr.es/m/20170622210845.d2hsbqv6rxu2tiye%40alap3.anarazel.de > > can give you results like e.g. > > https://anarazel.de/t/2017-06-22/pgsemwait_64_async.svg > > It seems to have bitrot. Do you have a more recent version of the script?
Attached. Needed one python3 fix, and to be adapted so it works with futex based semaphores. Seems to work for both sysv and posix semaphores now, based a very short test. sudo python3 ./pgsemwait.py -x /home/andres/build/postgres/dev-optimize/vpath/src/backend/postgres -f 3|~/src/flamegraph/flamegraph.pl Will add a note to the other thread. Greetings, Andres Freund
#!/usr/bin/python from __future__ import print_function from bcc import BPF from time import sleep import argparse import signal def positive_int(val): try: ival = int(val) except ValueError: raise argparse.ArgumentTypeError("must be an integer") if ival < 0: raise argparse.ArgumentTypeError("must be positive") return ival def positive_nonzero_int(val): ival = positive_int(val) if ival == 0: raise argparse.ArgumentTypeError("must be nonzero") return ival bpf_text = """ #include <uapi/linux/ptrace.h> #include <linux/sched.h> struct stats_key_t { u32 pid; int user_stack_id; }; struct stats_value_t { u64 total_time; }; struct start_key_t { u32 pid; }; struct start_value_t { u64 last_start; }; // map_type, key_type, leaf_type, table_name, num_entry BPF_HASH(stats, struct stats_key_t, struct stats_value_t); BPF_HASH(start, struct start_key_t, struct start_value_t); BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE); int trace_sem_entry(struct pt_regs *ctx) { u32 pid = bpf_get_current_pid_tgid(); struct start_key_t start_key = {}; struct start_value_t start_value = {}; if (!(THREAD_FILTER)) { return 0; } start_key.pid = pid; start_value.last_start = bpf_ktime_get_ns(); start.update(&start_key, &start_value); return 0; } int trace_sem_return(struct pt_regs *ctx) { u32 pid = bpf_get_current_pid_tgid(); struct stats_key_t stats_key = {}; struct start_key_t start_key = {}; struct stats_value_t zero = {}; struct stats_value_t *stats_value; struct start_value_t *start_value; u64 delta; if (!(THREAD_FILTER)) { return 0; } start_key.pid = pid; start_value = start.lookup(&start_key); if (!start_value) return 0; /* missed start */; delta = bpf_ktime_get_ns() - start_value->last_start; start.delete(&start_key); stats_key.pid = pid; stats_key.user_stack_id = stack_traces.get_stackid(ctx, BPF_F_REUSE_STACKID | BPF_F_USER_STACK); stats_value = stats.lookup_or_init(&stats_key, &zero); stats_value->total_time += delta; return 0; } """ examples = """examples: ./pgsemwait.py -x BINARY # trace postgres lwlock wait time until Ctrl-C ./pgsemwait.py -x BINARY 5 # trace for 5 seconds only ./pgsemwait.py -x BINARY -p PID # trace PID """ parser = argparse.ArgumentParser( description="Measure Postgres LWLock Wait Time", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=examples) parser.add_argument("-x", "--binary", metavar="BINARY", dest="binary", required = True, help="path to postgres binary") parser.add_argument("-p", "--pid", metavar="PID", dest="pid", help="trace this PID only", type=positive_int) parser.add_argument("-f", "--folded", action="store_true", help="output folded format") parser.add_argument("duration", nargs="?", default=99999999, type=positive_nonzero_int, help="duration of trace, in seconds") parser.add_argument("--stack-storage-size", default=1024, type=positive_nonzero_int, help="the number of unique stack traces that can be stored and " "displayed (default 1024)") args = parser.parse_args() folded = args.folded duration = int(args.duration) # set stack storage size bpf_text = bpf_text.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size)) # setup pid filter thread_filter = '1' if args.pid is not None: thread_filter = 'pid == %d' % args.pid bpf_text = bpf_text.replace('THREAD_FILTER', thread_filter) binary = args.binary b = BPF(text=bpf_text) libpath = BPF.find_exe(binary) if not libpath: bail("can't resolve library %s" % library) library = libpath b.attach_uprobe(name=library, sym_re='PGSemaphoreLock|futex', fn_name="trace_sem_entry", pid = -1) b.attach_uretprobe(name=library, sym_re='PGSemaphoreLock|futex', fn_name="trace_sem_return", pid = -1) matched = b.num_open_uprobes() if matched == 0: print("error: 0 functions traced. Exiting.", file=stderr) exit(1) # signal handler def signal_ignore(signal, frame): print() signal.signal(signal.SIGINT, signal_ignore) sleep(duration) stats = b.get_table("stats") stack_traces = b.get_table("stack_traces") for k, v in stats.items(): user_stack = [] if k.user_stack_id < 0 else \ stack_traces.walk(k.user_stack_id) name = 'postgres' if v.total_time == 0: continue if folded: # print folded stack output user_stack = list(user_stack) line = [name] + \ [b.sym(addr, k.pid).decode() for addr in reversed(user_stack)] print("%s %d" % (";".join(line), v.total_time)) else: # print default multi-line stack output for addr in user_stack: print(" %s" % b.sym(addr, k.pid)) print(" %-16s %s (%d)" % ("-", name, k.pid)) print(" %d\n" % v.total_time)