The branch main has been updated by manu:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=ec25b6fa5ffde89f202c2caa77268ed1eed12158

commit ec25b6fa5ffde89f202c2caa77268ed1eed12158
Author:     Vladimir Kondratyev <w...@freebsd.org>
AuthorDate: 2021-01-17 11:21:49 +0000
Commit:     Emmanuel Vadot <m...@freebsd.org>
CommitDate: 2021-01-17 11:47:28 +0000

    LinuxKPI: Reimplement irq_work queue on top of fast taskqueue
    
    Summary:
    Linux's irq_work queue was created for asynchronous execution of code from 
contexts where spin_lock's are not available like "hardware interrupt context". 
FreeBSD's fast taskqueues was created for the same purposes.
    
    Drm-kmod 5.4 uses irq_work_queue() at least in one place to schedule 
execution of task/work from the critical section that triggers following 
INVARIANTS-induced panic:
    
    ```
    panic: acquiring blockable sleep lock with spinlock or critical section 
held (sleep mutex) linuxkpi_short_wq @ /usr/src/sys/kern/subr_taskqueue.c:281
    cpuid = 6
    time = 1605048416
    KDB: stack backtrace:
    db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 
0xfffffe006b538c90
    vpanic() at vpanic+0x182/frame 0xfffffe006b538ce0
    panic() at panic+0x43/frame 0xfffffe006b538d40
    witness_checkorder() at witness_checkorder+0xf3e/frame 0xfffffe006b538f00
    __mtx_lock_flags() at __mtx_lock_flags+0x94/frame 0xfffffe006b538f50
    taskqueue_enqueue() at taskqueue_enqueue+0x42/frame 0xfffffe006b538f70
    linux_queue_work_on() at linux_queue_work_on+0xe9/frame 0xfffffe006b538fb0
    irq_work_queue() at irq_work_queue+0x21/frame 0xfffffe006b538fd0
    semaphore_notify() at semaphore_notify+0xb2/frame 0xfffffe006b539020
    __i915_sw_fence_notify() at __i915_sw_fence_notify+0x2e/frame 
0xfffffe006b539050
    __i915_sw_fence_complete() at __i915_sw_fence_complete+0x63/frame 
0xfffffe006b539080
    i915_sw_fence_complete() at i915_sw_fence_complete+0x8e/frame 
0xfffffe006b5390c0
    dma_i915_sw_fence_wake() at dma_i915_sw_fence_wake+0x4f/frame 
0xfffffe006b539100
    dma_fence_signal_locked() at dma_fence_signal_locked+0x105/frame 
0xfffffe006b539180
    dma_fence_signal() at dma_fence_signal+0x72/frame 0xfffffe006b5391c0
    dma_fence_is_signaled() at dma_fence_is_signaled+0x80/frame 
0xfffffe006b539200
    dma_resv_add_shared_fence() at dma_resv_add_shared_fence+0xb3/frame 
0xfffffe006b539270
    i915_vma_move_to_active() at i915_vma_move_to_active+0x18a/frame 
0xfffffe006b5392b0
    eb_move_to_gpu() at eb_move_to_gpu+0x3ad/frame 0xfffffe006b539320
    eb_submit() at eb_submit+0x15/frame 0xfffffe006b539350
    i915_gem_do_execbuffer() at i915_gem_do_execbuffer+0x7d4/frame 
0xfffffe006b539570
    i915_gem_execbuffer2_ioctl() at i915_gem_execbuffer2_ioctl+0x1c1/frame 
0xfffffe006b539600
    drm_ioctl_kernel() at drm_ioctl_kernel+0xd9/frame 0xfffffe006b539670
    drm_ioctl() at drm_ioctl+0x5cd/frame 0xfffffe006b539820
    linux_file_ioctl() at linux_file_ioctl+0x323/frame 0xfffffe006b539880
    kern_ioctl() at kern_ioctl+0x1f4/frame 0xfffffe006b5398f0
    sys_ioctl() at sys_ioctl+0x12a/frame 0xfffffe006b5399c0
    amd64_syscall() at amd64_syscall+0x121/frame 0xfffffe006b539af0
    fast_syscall_common() at fast_syscall_common+0xf8/frame 0xfffffe006b539af0
    --- syscall (54, FreeBSD ELF64, sys_ioctl), rip = 0x800a6f09a, rsp = 
0x7fffffffe588, rbp = 0x7fffffffe640 ---
    KDB: enter: panic
    ```
    Here, the  dma_resv_add_shared_fence() performs a critical_enter() and 
following call of schedule_work() from semaphore_notify() triggers 'acquiring 
blockable sleep lock with spinlock or critical section held' panic.
    
    Switching irq_work implementation to fast taskqueue fixes the panic for me.
    
    Other report with the similar bug: 
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=247166
    
    Reviewed By: hselasky
    Differential Revision: https://reviews.freebsd.org/D27171
---
 .../linuxkpi/common/include/linux/irq_work.h       |  25 ++++-
 sys/compat/linuxkpi/common/include/linux/llist.h   | 101 +++++++++++++++++++++
 sys/compat/linuxkpi/common/include/linux/slab.h    |  16 ++++
 sys/compat/linuxkpi/common/src/linux_slab.c        |  27 ++++++
 sys/compat/linuxkpi/common/src/linux_work.c        |  48 ++++++++++
 5 files changed, 212 insertions(+), 5 deletions(-)

diff --git a/sys/compat/linuxkpi/common/include/linux/irq_work.h 
b/sys/compat/linuxkpi/common/include/linux/irq_work.h
index b44e78230b0d..eb1798a4e450 100644
--- a/sys/compat/linuxkpi/common/include/linux/irq_work.h
+++ b/sys/compat/linuxkpi/common/include/linux/irq_work.h
@@ -31,22 +31,37 @@
 #ifndef __LINUX_IRQ_WORK_H__
 #define        __LINUX_IRQ_WORK_H__
 
-#include <linux/workqueue.h>
+#include <sys/param.h>
+#include <sys/taskqueue.h>
+
+struct irq_work;
+typedef void (*irq_work_func_t)(struct irq_work *);
 
 struct irq_work {
-       struct work_struct work;
+       struct task irq_task;
+       irq_work_func_t func;
 };
 
+extern struct taskqueue *linux_irq_work_tq;
+
+#define        DEFINE_IRQ_WORK(name, _func)    struct irq_work name = {        
\
+       .irq_task = TASK_INITIALIZER(0, linux_irq_work_fn, &(name)),    \
+       .func  = (_func),                                               \
+}
+
+void   linux_irq_work_fn(void *, int);
+
 static inline void
-init_irq_work(struct irq_work *irqw, void (*func)(struct irq_work *))
+init_irq_work(struct irq_work *irqw, irq_work_func_t func)
 {
-       INIT_WORK(&irqw->work, (work_func_t)func);
+       TASK_INIT(&irqw->irq_task, 0, linux_irq_work_fn, irqw);
+       irqw->func = func;
 }
 
 static inline void
 irq_work_queue(struct irq_work *irqw)
 {
-       schedule_work(&irqw->work);
+       taskqueue_enqueue(linux_irq_work_tq, &irqw->irq_task);
 }
 
 #endif /* __LINUX_IRQ_WORK_H__ */
diff --git a/sys/compat/linuxkpi/common/include/linux/llist.h 
b/sys/compat/linuxkpi/common/include/linux/llist.h
new file mode 100644
index 000000000000..b3c89516e710
--- /dev/null
+++ b/sys/compat/linuxkpi/common/include/linux/llist.h
@@ -0,0 +1,101 @@
+/* Public domain. */
+
+#ifndef _LINUX_LLIST_H
+#define _LINUX_LLIST_H
+
+#include <sys/types.h>
+#include <machine/atomic.h>
+
+struct llist_node {
+       struct llist_node *next;
+};
+
+struct llist_head {
+       struct llist_node *first;
+};
+
+#define        LLIST_HEAD_INIT(name)   { NULL }
+#define        LLIST_HEAD(name)        struct llist_head name = 
LLIST_HEAD_INIT(name)
+
+#define llist_entry(ptr, type, member) \
+       ((ptr) ? container_of(ptr, type, member) : NULL)
+
+static inline struct llist_node *
+llist_del_all(struct llist_head *head)
+{
+       return ((void *)atomic_readandclear_ptr((uintptr_t *)&head->first));
+}
+
+static inline struct llist_node *
+llist_del_first(struct llist_head *head)
+{
+       struct llist_node *first, *next;
+
+       do {
+               first = head->first;
+               if (first == NULL)
+                       return NULL;
+               next = first->next;
+       } while (atomic_cmpset_ptr((uintptr_t *)&head->first,
+           (uintptr_t)first, (uintptr_t)next) == 0);
+
+       return (first);
+}
+
+static inline bool
+llist_add(struct llist_node *new, struct llist_head *head)
+{
+       struct llist_node *first;
+
+       do {
+               new->next = first = head->first;
+       } while (atomic_cmpset_ptr((uintptr_t *)&head->first,
+           (uintptr_t)first, (uintptr_t)new) == 0);
+
+       return (first == NULL);
+}
+
+static inline bool
+llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
+    struct llist_head *head)
+{
+       struct llist_node *first;
+
+       do {
+               new_last->next = first = head->first;
+       } while (atomic_cmpset_ptr((uintptr_t *)&head->first,
+           (uintptr_t)first, (uintptr_t)new_first) == 0);
+
+       return (first == NULL);
+}
+
+static inline void
+init_llist_head(struct llist_head *head)
+{
+       head->first = NULL;
+}
+
+static inline bool
+llist_empty(struct llist_head *head)
+{
+       return (head->first == NULL);
+}
+
+#define llist_for_each_safe(pos, n, node)                              \
+       for ((pos) = (node);                                            \
+           (pos) != NULL &&                                            \
+           ((n) = (pos)->next, pos);                                   \
+           (pos) = (n))
+
+#define llist_for_each_entry_safe(pos, n, node, member)                \
+       for (pos = llist_entry((node), __typeof(*pos), member);         \
+           pos != NULL &&                                              \
+           (n = llist_entry(pos->member.next, __typeof(*pos), member), pos); \
+           pos = n)
+
+#define llist_for_each_entry(pos, node, member)                                
\
+       for ((pos) = llist_entry((node), __typeof(*(pos)), member);     \
+           (pos) != NULL;                                              \
+           (pos) = llist_entry((pos)->member.next, __typeof(*(pos)), member))
+
+#endif
diff --git a/sys/compat/linuxkpi/common/include/linux/slab.h 
b/sys/compat/linuxkpi/common/include/linux/slab.h
index ae1c9d81843e..0cd748b7ecb9 100644
--- a/sys/compat/linuxkpi/common/include/linux/slab.h
+++ b/sys/compat/linuxkpi/common/include/linux/slab.h
@@ -35,10 +35,12 @@
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/limits.h>
+#include <sys/proc.h>
 #include <vm/uma.h>
 
 #include <linux/types.h>
 #include <linux/gfp.h>
+#include <linux/llist.h>
 
 MALLOC_DECLARE(M_KMALLOC);
 
@@ -90,6 +92,19 @@ struct linux_kmem_cache {
 #define        ARCH_KMALLOC_MINALIGN \
        __alignof(unsigned long long)
 
+/*
+ * Critical section-friendly version of kfree().
+ * Requires knowledge of the allocation size at build time.
+ */
+#define kfree_async(ptr)       do {                                    \
+       _Static_assert(sizeof(*(ptr)) >= sizeof(struct llist_node),     \
+           "Size of object to free is unknown or too small");          \
+       if (curthread->td_critnest != 0)                                \
+               linux_kfree_async(ptr);                                 \
+       else                                                            \
+               kfree(ptr);                                             \
+} while (0)
+
 static inline gfp_t
 linux_check_m_flags(gfp_t flags)
 {
@@ -189,5 +204,6 @@ linux_kmem_cache_free(struct linux_kmem_cache *c, void *m)
 }
 
 extern void linux_kmem_cache_destroy(struct linux_kmem_cache *);
+void linux_kfree_async(void *);
 
 #endif                                 /* _LINUX_SLAB_H_ */
diff --git a/sys/compat/linuxkpi/common/src/linux_slab.c 
b/sys/compat/linuxkpi/common/src/linux_slab.c
index c8deab01731a..3304c34b1dee 100644
--- a/sys/compat/linuxkpi/common/src/linux_slab.c
+++ b/sys/compat/linuxkpi/common/src/linux_slab.c
@@ -30,6 +30,11 @@ __FBSDID("$FreeBSD$");
 #include <linux/slab.h>
 #include <linux/rcupdate.h>
 #include <linux/kernel.h>
+#include <linux/irq_work.h>
+#include <linux/llist.h>
+
+#include <sys/param.h>
+#include <sys/taskqueue.h>
 
 struct linux_kmem_rcu {
        struct rcu_head rcu_head;
@@ -44,6 +49,8 @@ struct linux_kmem_rcu {
        ((void *)((char *)(r) + sizeof(struct linux_kmem_rcu) - \
        (r)->cache->cache_size))
 
+static LLIST_HEAD(linux_kfree_async_list);
+
 static int
 linux_kmem_ctor(void *mem, int size, void *arg, int flags)
 {
@@ -126,3 +133,23 @@ linux_kmem_cache_destroy(struct linux_kmem_cache *c)
        uma_zdestroy(c->cache_zone);
        free(c, M_KMALLOC);
 }
+
+static void
+linux_kfree_async_fn(void *context, int pending)
+{
+       struct llist_node *freed;
+
+       while((freed = llist_del_first(&linux_kfree_async_list)) != NULL)
+               kfree(freed);
+}
+static struct task linux_kfree_async_task =
+    TASK_INITIALIZER(0, linux_kfree_async_fn, &linux_kfree_async_task);
+
+void
+linux_kfree_async(void *addr)
+{
+       if (addr == NULL)
+               return;
+       llist_add(addr, &linux_kfree_async_list);
+       taskqueue_enqueue(linux_irq_work_tq, &linux_kfree_async_task);
+}
diff --git a/sys/compat/linuxkpi/common/src/linux_work.c 
b/sys/compat/linuxkpi/common/src/linux_work.c
index 043c5b7d1aff..45025378baa9 100644
--- a/sys/compat/linuxkpi/common/src/linux_work.c
+++ b/sys/compat/linuxkpi/common/src/linux_work.c
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
 #include <linux/compat.h>
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
+#include <linux/irq_work.h>
 
 #include <sys/kernel.h>
 
@@ -59,6 +60,8 @@ struct workqueue_struct *system_unbound_wq;
 struct workqueue_struct *system_highpri_wq;
 struct workqueue_struct *system_power_efficient_wq;
 
+struct taskqueue *linux_irq_work_tq;
+
 static int linux_default_wq_cpus = 4;
 
 static void linux_delayed_work_timer_fn(void *);
@@ -683,3 +686,48 @@ linux_work_uninit(void *arg)
        system_highpri_wq = NULL;
 }
 SYSUNINIT(linux_work_uninit, SI_SUB_TASKQ, SI_ORDER_THIRD, linux_work_uninit, 
NULL);
+
+void
+linux_irq_work_fn(void *context, int pending)
+{
+       struct irq_work *irqw = context;
+
+       irqw->func(irqw);
+}
+
+static void
+linux_irq_work_init_fn(void *context, int pending)
+{
+       /*
+        * LinuxKPI performs lazy allocation of memory structures required by
+        * current on the first access to it.  As some irq_work clients read
+        * it with spinlock taken, we have to preallocate td_lkpi_task before
+        * first call to irq_work_queue().  As irq_work uses a single thread,
+        * it is enough to read current once at SYSINIT stage.
+        */
+       if (current == NULL)
+               panic("irq_work taskqueue is not initialized");
+}
+static struct task linux_irq_work_init_task =
+    TASK_INITIALIZER(0, linux_irq_work_init_fn, &linux_irq_work_init_task);
+
+static void
+linux_irq_work_init(void *arg)
+{
+       linux_irq_work_tq = taskqueue_create_fast("linuxkpi_irq_wq",
+           M_WAITOK, taskqueue_thread_enqueue, &linux_irq_work_tq);
+       taskqueue_start_threads(&linux_irq_work_tq, 1, PWAIT,
+           "linuxkpi_irq_wq");
+       taskqueue_enqueue(linux_irq_work_tq, &linux_irq_work_init_task);
+}
+SYSINIT(linux_irq_work_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
+    linux_irq_work_init, NULL);
+
+static void
+linux_irq_work_uninit(void *arg)
+{
+       taskqueue_drain_all(linux_irq_work_tq);
+       taskqueue_free(linux_irq_work_tq);
+}
+SYSUNINIT(linux_irq_work_uninit, SI_SUB_TASKQ, SI_ORDER_SECOND,
+    linux_irq_work_uninit, NULL);
_______________________________________________
dev-commits-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-all
To unsubscribe, send any mail to "dev-commits-src-all-unsubscr...@freebsd.org"

Reply via email to