On Wed, 3 Mar 2021 09:26:04 -0500
Steven Rostedt <rost...@goodmis.org> wrote:

> On Wed, 3 Mar 2021 13:48:28 +0900
> Masami Hiramatsu <mhira...@kernel.org> wrote:
> 
> > 
> > > 
> > > I think (can't prove) this used to work:  
> 
> Would be good to find out if it did.
> 
> > 
> > I'm not sure the bpftrace had correctly handled it or not.
> > 
> > > 
> > >     # bpftrace -e 'kretprobe:__tcp_retransmit_skb { @[kstack()] = count() 
> > > }'
> > >     Attaching 1 probe...
> > >     ^C
> > > 
> > >     @[
> > >         kretprobe_trampoline+0
> > >     ]: 1  
> > 
> > Would you know how the bpftrace stacktracer rewinds the stack entries?
> > FYI, ftrace does it in trace_seq_print_sym()@kernel/trace/trace_output.c
> > 
> 
> The difference between trace events and normal function tracing stack
> traces is that it keeps its original return address. But kretprobes (and
> function graph tracing, and some bpf trampolines too) modify the return
> pointer, and that could possibly cause havoc with the stack trace.

BTW, I think if the stack tracer passes the nth of kretprobe_trampoline
or a cursor, kretprobe can find the correct return address from given task.

I've made a patch to do that only for the CONFIG_ARCH_STACKWALK=y

Here is an example on x86. 

 # echo r vfs_read > kprobe_events 
 # echo stacktrace > events/kprobes/r_vfs_read_0/trigger 
 # echo 1 > events/kprobes/r_vfs_read_0/enable 
 # echo 1 > options/sym-offset 
 # less trace 
...

              sh-132     [007] ...1    22.524917: <stack trace>
 => kretprobe_dispatcher+0x7d/0xc0
 => __kretprobe_trampoline_handler+0xdb/0x1b0
 => trampoline_handler+0x48/0x60
 => kretprobe_trampoline+0x2a/0x50
 => ksys_read+0x70/0xf0
 => __x64_sys_read+0x1a/0x20
 => do_syscall_64+0x38/0x50
 => entry_SYSCALL_64_after_hwframe+0x44/0xae
 => 0
 => 0

------

>From 77a785a3a0791171b570830d0b2f099f8a4ba337 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhira...@kernel.org>
Date: Thu, 4 Mar 2021 14:19:24 +0900
Subject: [PATCH] kprobes: stacktrace: Recover the address changed by kretprobe

Recover the return address on the stack which changed by the
kretprobe.

Signed-off-by: Masami Hiramatsu <mhira...@kernel.org>
---
 include/linux/kprobes.h |  3 ++
 kernel/kprobes.c        | 81 +++++++++++++++++++++++++++--------------
 kernel/stacktrace.c     | 26 +++++++++++++
 3 files changed, 82 insertions(+), 28 deletions(-)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1883a4a9f16a..a022e507d829 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -205,6 +205,9 @@ extern void arch_prepare_kretprobe(struct 
kretprobe_instance *ri,
                                   struct pt_regs *regs);
 extern int arch_trampoline_kprobe(struct kprobe *p);
 
+unsigned long kretprobe_real_stack_tsk(struct task_struct *tsk,
+                                      unsigned long addr,
+                                      struct llist_node **cur);
 /* If the trampoline handler called from a kprobe, use this version */
 unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
                                void *trampoline_address,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 745f08fdd7a6..b3d9dbd6086f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1863,46 +1863,56 @@ unsigned long __weak arch_deref_entry_point(void *entry)
 
 #ifdef CONFIG_KRETPROBES
 
-unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
-                                            void *trampoline_address,
-                                            void *frame_pointer)
+/* This assumes the tsk is current or the task which is not running. */
+unsigned long kretprobe_real_stack_tsk(struct task_struct *tsk,
+                                      unsigned long addr,
+                                      struct llist_node **cur)
 {
-       kprobe_opcode_t *correct_ret_addr = NULL;
        struct kretprobe_instance *ri = NULL;
-       struct llist_node *first, *node;
-       struct kretprobe *rp;
+       struct llist_node *node = *cur;
 
-       /* Find all nodes for this frame. */
-       first = node = current->kretprobe_instances.first;
-       while (node) {
-               ri = container_of(node, struct kretprobe_instance, llist);
+       if (addr != (unsigned long)&kretprobe_trampoline)
+               return addr;
 
-               BUG_ON(ri->fp != frame_pointer);
+       if (!node)
+               node = tsk->kretprobe_instances.first;
+       else
+               node = node->next;
 
-               if (ri->ret_addr != trampoline_address) {
-                       correct_ret_addr = ri->ret_addr;
-                       /*
-                        * This is the real return address. Any other
-                        * instances associated with this task are for
-                        * other calls deeper on the call stack
-                        */
-                       goto found;
+       while (node) {
+               ri = container_of(node, struct kretprobe_instance, llist);
+               if (ri->ret_addr != (void *)&kretprobe_trampoline) {
+                       *cur = node;
+                       return (unsigned long)ri->ret_addr;
                }
-
                node = node->next;
        }
-       pr_err("Oops! Kretprobe fails to find correct return address.\n");
-       BUG_ON(1);
+       return 0;
+}
 
-found:
-       /* Unlink all nodes for this frame. */
-       current->kretprobe_instances.first = node->next;
-       node->next = NULL;
+unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
+                                            void *trampoline_address,
+                                            void *frame_pointer)
+{
+       kprobe_opcode_t *correct_ret_addr = NULL;
+       struct kretprobe_instance *ri = NULL;
+       struct llist_node *first, *node = NULL;
+       struct kretprobe *rp;
 
-       /* Run them..  */
+       /* Find correct address and all nodes for this frame. */
+       correct_ret_addr = (void*)kretprobe_real_stack_tsk(current,
+                               (unsigned long)&kretprobe_trampoline, &node);
+       if (!correct_ret_addr) {
+               pr_err("Oops! Kretprobe fails to find correct return 
address.\n");
+               BUG_ON(1);
+       }
+
+       /* Run them. */
+       first = current->kretprobe_instances.first;
        while (first) {
                ri = container_of(first, struct kretprobe_instance, llist);
-               first = first->next;
+
+               BUG_ON(ri->fp != frame_pointer);
 
                rp = get_kretprobe(ri);
                if (rp && rp->handler) {
@@ -1913,6 +1923,21 @@ unsigned long __kretprobe_trampoline_handler(struct 
pt_regs *regs,
                        rp->handler(ri, regs);
                        __this_cpu_write(current_kprobe, prev);
                }
+               if (first == node)
+                       break;
+
+               first = first->next;
+       }
+
+       /* Unlink all nodes for this frame. */
+       first = current->kretprobe_instances.first;
+       current->kretprobe_instances.first = node->next;
+       node->next = NULL;
+
+       /* Recycle them.  */
+       while (first) {
+               ri = container_of(first, struct kretprobe_instance, llist);
+               first = first->next;
 
                recycle_rp_inst(ri);
        }
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index 9f8117c7cfdd..416f357e64b8 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -13,6 +13,7 @@
 #include <linux/export.h>
 #include <linux/kallsyms.h>
 #include <linux/stacktrace.h>
+#include <linux/kprobes.h>
 
 /**
  * stack_trace_print - Print the entries in the stack trace
@@ -76,6 +77,10 @@ struct stacktrace_cookie {
        unsigned int    size;
        unsigned int    skip;
        unsigned int    len;
+#ifdef CONFIG_KRETPROBES
+       struct llist_node *cur;
+       struct task_struct *tsk;
+#endif
 };
 
 static bool stack_trace_consume_entry(void *cookie, unsigned long addr)
@@ -89,6 +94,7 @@ static bool stack_trace_consume_entry(void *cookie, unsigned 
long addr)
                c->skip--;
                return true;
        }
+       addr = kretprobe_real_stack_tsk(c->tsk, addr, &c->cur);
        c->store[c->len++] = addr;
        return c->len < c->size;
 }
@@ -116,6 +122,10 @@ unsigned int stack_trace_save(unsigned long *store, 
unsigned int size,
                .store  = store,
                .size   = size,
                .skip   = skipnr + 1,
+#ifdef CONFIG_KRETPROBES
+               .cur    = NULL,
+               .tsk    = current,
+#endif
        };
 
        arch_stack_walk(consume_entry, &c, current, NULL);
@@ -141,6 +151,10 @@ unsigned int stack_trace_save_tsk(struct task_struct *tsk, 
unsigned long *store,
                .size   = size,
                /* skip this function if they are tracing us */
                .skip   = skipnr + (current == tsk),
+#ifdef CONFIG_KRETPROBES
+               .cur    = NULL,
+               .tsk    = tsk,
+#endif
        };
 
        if (!try_get_task_stack(tsk))
@@ -168,6 +182,10 @@ unsigned int stack_trace_save_regs(struct pt_regs *regs, 
unsigned long *store,
                .store  = store,
                .size   = size,
                .skip   = skipnr,
+#ifdef CONFIG_KRETPROBES
+               .cur    = NULL,
+               .tsk    = current,
+#endif
        };
 
        arch_stack_walk(consume_entry, &c, current, regs);
@@ -194,6 +212,10 @@ int stack_trace_save_tsk_reliable(struct task_struct *tsk, 
unsigned long *store,
        struct stacktrace_cookie c = {
                .store  = store,
                .size   = size,
+#ifdef CONFIG_KRETPROBES
+               .cur    = NULL,
+               .tsk    = tsk,
+#endif
        };
        int ret;
 
@@ -224,6 +246,10 @@ unsigned int stack_trace_save_user(unsigned long *store, 
unsigned int size)
        struct stacktrace_cookie c = {
                .store  = store,
                .size   = size,
+#ifdef CONFIG_KRETPROBES
+               .cur    = NULL,
+               .tsk    = current,
+#endif
        };
        mm_segment_t fs;
 
-- 
2.25.1




Thank you,

-- 
Masami Hiramatsu <mhira...@kernel.org>

Reply via email to