On Tue, May 01, 2018 at 05:02:19PM -0700, Song Liu wrote: > @@ -267,17 +285,27 @@ static void stack_map_get_build_id_offset(struct > bpf_stack_build_id *id_offs, > { > int i; > struct vm_area_struct *vma; > + bool in_nmi_ctx = in_nmi(); > + bool irq_work_busy = false; > + struct stack_map_irq_work *work; > + > + if (in_nmi_ctx) { > + work = this_cpu_ptr(&irq_work); > + if (work->sem) > + /* cannot queue more up_read, fallback */ > + irq_work_busy = true; > + } > > /* > + * We cannot do up_read() in nmi context. To do build_id lookup > + * in nmi context, we need to run up_read() in irq_work. We use > + * a percpu variable to do the irq_work. If the irq_work is > + * already used by another lookup, we fall back to report ips. > * > * Same fallback is used for kernel stack (!user) on a stackmap > * with build_id. > */ > + if (!user || !current || !current->mm || irq_work_busy || > down_read_trylock(¤t->mm->mmap_sem) == 0) { > /* cannot access current->mm, fall back to ips */ > for (i = 0; i < trace_nr; i++) { > @@ -299,7 +327,13 @@ static void stack_map_get_build_id_offset(struct > bpf_stack_build_id *id_offs, > - vma->vm_start; > id_offs[i].status = BPF_STACK_BUILD_ID_VALID; > } > + > + if (!in_nmi_ctx) > + up_read(¤t->mm->mmap_sem); > + else { > + work->sem = ¤t->mm->mmap_sem; > + irq_work_queue(&work->work); > + } > }
This is disguisting.. :-) It's broken though, I've bet you've never actually ran this with lockdep enabled for example. Also, you set work->sem before you do trylock, if the trylock fails you return early and keep work->sem set, which will thereafter always result in irq_work_busy.