Add a facility to the workqueue subsystem whereby an atomic_t can be
registered by a work function such that the work function dispatcher will
decrement the atomic after the work function has returned and then call
wake_up_atomic() on it if it reached 0.

This is analogous to complete_and_exit() for kernel threads and is used to
avoid a race between notifying that a work item is about to finish and the
.text segment from a module being discarded.

The way this is used is that the work function calls:

        dec_after_work(atomic_t *counter);

to register the counter and then process_one_work() calls it, potentially
wakes it and clears the registration.

The reason I've used an atomic_t rather than a completion is that (1) it
takes up less space and (2) it can monitor multiple objects.

Signed-off-by: David Howells <dhowe...@redhat.com>
cc: Tejun Heo <t...@kernel.org>
cc: Lai Jiangshan <jiangshan...@gmail.com>
---

 include/linux/workqueue.h   |    1 +
 kernel/workqueue.c          |   25 +++++++++++++++++++++++++
 kernel/workqueue_internal.h |    1 +
 3 files changed, 27 insertions(+)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index db6dc9dc0482..ceaed1387e9b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -451,6 +451,7 @@ extern bool mod_delayed_work_on(int cpu, struct 
workqueue_struct *wq,
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void drain_workqueue(struct workqueue_struct *wq);
+extern void dec_after_work(atomic_t *counter);
 
 extern int schedule_on_each_cpu(work_func_t func);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ca937b0c3a96..2936ad0ab293 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2112,6 +2112,12 @@ __acquires(&pool->lock)
                dump_stack();
        }
 
+       if (worker->dec_after) {
+               if (atomic_dec_and_test(worker->dec_after))
+                       wake_up_atomic_t(worker->dec_after);
+               worker->dec_after = NULL;
+       }
+
        /*
         * The following prevents a kworker from hogging CPU on !PREEMPT
         * kernels, where a requeueing work item waiting for something to
@@ -3087,6 +3093,25 @@ int schedule_on_each_cpu(work_func_t func)
 }
 
 /**
+ * dec_after_work - Register counter to dec and wake after work func returns
+ * @counter: The counter to decrement and wake
+ *
+ * Register an atomic counter to be decremented after a work function returns
+ * to the core.  The counter is 'woken' if it is decremented to 0.  This allows
+ * synchronisation to be effected by one or more work functions in a module
+ * without leaving a window in which the work function code can be unloaded.
+ */
+void dec_after_work(atomic_t *counter)
+{
+       struct worker *worker = current_wq_worker();
+
+       BUG_ON(!worker);
+       BUG_ON(worker->dec_after);
+       worker->dec_after = counter;
+}
+EXPORT_SYMBOL(dec_after_work);
+
+/**
  * execute_in_process_context - reliably execute the routine with user context
  * @fn:                the function to execute
  * @ew:                guaranteed storage for the execute work structure (must
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 8635417c587b..94ea1ca9b01f 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -28,6 +28,7 @@ struct worker {
 
        struct work_struct      *current_work;  /* L: work being processed */
        work_func_t             current_func;   /* L: current_work's fn */
+       atomic_t                *dec_after;     /* Decrement after func returns 
*/
        struct pool_workqueue   *current_pwq; /* L: current_work's pwq */
        bool                    desc_valid;     /* ->desc is valid */
        struct list_head        scheduled;      /* L: scheduled works */

Reply via email to