On Tue, May 01, 2018 at 05:02:19PM -0700, Song Liu wrote:
> @@ -267,17 +285,27 @@ static void stack_map_get_build_id_offset(struct 
> bpf_stack_build_id *id_offs,
>  {
>       int i;
>       struct vm_area_struct *vma;
> +     bool in_nmi_ctx = in_nmi();
> +     bool irq_work_busy = false;
> +     struct stack_map_irq_work *work;
> +
> +     if (in_nmi_ctx) {
> +             work = this_cpu_ptr(&irq_work);
> +             if (work->sem)
> +                     /* cannot queue more up_read, fallback */
> +                     irq_work_busy = true;
> +     }
>  
>       /*
> +      * We cannot do up_read() in nmi context. To do build_id lookup
> +      * in nmi context, we need to run up_read() in irq_work. We use
> +      * a percpu variable to do the irq_work. If the irq_work is
> +      * already used by another lookup, we fall back to report ips.
>        *
>        * Same fallback is used for kernel stack (!user) on a stackmap
>        * with build_id.
>        */
> +     if (!user || !current || !current->mm || irq_work_busy ||
>           down_read_trylock(&current->mm->mmap_sem) == 0) {
>               /* cannot access current->mm, fall back to ips */
>               for (i = 0; i < trace_nr; i++) {
> @@ -299,7 +327,13 @@ static void stack_map_get_build_id_offset(struct 
> bpf_stack_build_id *id_offs,
>                       - vma->vm_start;
>               id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
>       }
> +
> +     if (!in_nmi_ctx)
> +             up_read(&current->mm->mmap_sem);
> +     else {
> +             work->sem = &current->mm->mmap_sem;
> +             irq_work_queue(&work->work);
> +     }
>  }

This is disguisting.. :-)

It's broken though, I've bet you've never actually ran this with lockdep
enabled for example.

Also, you set work->sem before you do trylock, if the trylock fails you
return early and keep work->sem set, which will thereafter always result
in irq_work_busy.


Reply via email to