Re: [PATCH 2/8] ntb_perf: Improve thread handling to increase robustness

Jiang, Dave Mon, 13 Jun 2016 11:17:33 -0700

On Fri, 2016-06-10 at 16:54 -0600, Logan Gunthorpe wrote:
> This commit accomplishes a few things:
> 
> 1) Properly prevent multiple sets of threads from running at once
> using
> a mutex. Lots of race issues existed with the thread_cleanup.
> 
> 2) The mutex allows us to ensure that threads are finished before
> tearing down the device or module.
> 
> 3) Don't use kthread_stop when the threads can exit by themselves, as
> this is counter-indicated by the kthread_create documentation.
> Threads
> now wait for kthread_stop to occur.
> 
> 4) Writing to the run file now blocks until the threads are complete.
> The test can then be safely interrupted by a SIGINT.
> 
> Also, while I was at it:
> 
> 5) debugfs_run_write shouldn't return 0 in the early check cases as
> this
> could cause debugfs_run_write to loop undesirably.
> 
> Signed-off-by: Logan Gunthorpe <log...@deltatee.com>
Acked-by: Dave Jiang <dave.ji...@intel.com>


> ---
>  drivers/ntb/test/ntb_perf.c | 124 +++++++++++++++++++++++++++-------
> ----------
>  1 file changed, 76 insertions(+), 48 deletions(-)
> 
> diff --git a/drivers/ntb/test/ntb_perf.c
> b/drivers/ntb/test/ntb_perf.c
> index 5008ccf..db4dc61 100644
> --- a/drivers/ntb/test/ntb_perf.c
> +++ b/drivers/ntb/test/ntb_perf.c
> @@ -58,6 +58,7 @@
>  #include <linux/delay.h>
>  #include <linux/sizes.h>
>  #include <linux/ntb.h>
> +#include <linux/mutex.h>
>  
>  #define DRIVER_NAME          "ntb_perf"
>  #define DRIVER_DESCRIPTION   "PCIe NTB Performance Measurement
> Tool"
> @@ -121,6 +122,7 @@ struct pthr_ctx {
>       int                     dma_prep_err;
>       int                     src_idx;
>       void                    *srcs[MAX_SRCS];
> +     wait_queue_head_t       *wq;
>  };
>  
>  struct perf_ctx {
> @@ -134,9 +136,11 @@ struct perf_ctx {
>       struct dentry           *debugfs_run;
>       struct dentry           *debugfs_threads;
>       u8                      perf_threads;
> -     bool                    run;
> +     /* mutex ensures only one set of threads run at once */
> +     struct mutex            run_mutex;
>       struct pthr_ctx         pthr_ctx[MAX_THREADS];
>       atomic_t                tsync;
> +     atomic_t                tdone;
>  };
>  
>  enum {
> @@ -295,12 +299,18 @@ static int perf_move_data(struct pthr_ctx
> *pctx, char __iomem *dst, char *src,
>                       set_current_state(TASK_INTERRUPTIBLE);
>                       schedule_timeout(1);
>               }
> +
> +             if (unlikely(kthread_should_stop()))
> +                     break;
>       }
>  
>       if (use_dma) {
>               pr_info("%s: All DMA descriptors submitted\n",
> current->comm);
> -             while (atomic_read(&pctx->dma_sync) != 0)
> +             while (atomic_read(&pctx->dma_sync) != 0) {
> +                     if (kthread_should_stop())
> +                             break;
>                       msleep(20);
> +             }
>       }
>  
>       kstop = ktime_get();
> @@ -393,7 +403,10 @@ static int ntb_perf_thread(void *data)
>               pctx->srcs[i] = NULL;
>       }
>  
> -     return 0;
> +     atomic_inc(&perf->tdone);
> +     wake_up(pctx->wq);
> +     rc = 0;
> +     goto done;
>  
>  err:
>       for (i = 0; i < MAX_SRCS; i++) {
> @@ -406,6 +419,16 @@ err:
>               pctx->dma_chan = NULL;
>       }
>  
> +done:
> +     /* Wait until we are told to stop */
> +     for (;;) {
> +             set_current_state(TASK_INTERRUPTIBLE);
> +             if (kthread_should_stop())
> +                     break;
> +             schedule();
> +     }
> +     __set_current_state(TASK_RUNNING);
> +
>       return rc;
>  }
>  
> @@ -553,6 +576,7 @@ static ssize_t debugfs_run_read(struct file
> *filp, char __user *ubuf,
>       struct perf_ctx *perf = filp->private_data;
>       char *buf;
>       ssize_t ret, out_offset;
> +     int running;
>  
>       if (!perf)
>               return 0;
> @@ -560,7 +584,9 @@ static ssize_t debugfs_run_read(struct file
> *filp, char __user *ubuf,
>       buf = kmalloc(64, GFP_KERNEL);
>       if (!buf)
>               return -ENOMEM;
> -     out_offset = snprintf(buf, 64, "%d\n", perf->run);
> +
> +     running = mutex_is_locked(&perf->run_mutex);
> +     out_offset = snprintf(buf, 64, "%d\n", running);
>       ret = simple_read_from_buffer(ubuf, count, offp, buf,
> out_offset);
>       kfree(buf);
>  
> @@ -572,7 +598,6 @@ static void threads_cleanup(struct perf_ctx
> *perf)
>       struct pthr_ctx *pctx;
>       int i;
>  
> -     perf->run = false;
>       for (i = 0; i < MAX_THREADS; i++) {
>               pctx = &perf->pthr_ctx[i];
>               if (pctx->thread) {
> @@ -587,65 +612,66 @@ static ssize_t debugfs_run_write(struct file
> *filp, const char __user *ubuf,
>  {
>       struct perf_ctx *perf = filp->private_data;
>       int node, i;
> +     DECLARE_WAIT_QUEUE_HEAD(wq);
>  
>       if (!perf->link_is_up)
> -             return 0;
> +             return -ENOLINK;
>  
>       if (perf->perf_threads == 0)
> -             return 0;
> +             return -EINVAL;
>  
> -     if (atomic_read(&perf->tsync) == 0)
> -             perf->run = false;
> +     if (!mutex_trylock(&perf->run_mutex))
> +             return -EBUSY;
>  
> -     if (perf->run)
> -             threads_cleanup(perf);
> -     else {
> -             perf->run = true;
> +     if (perf->perf_threads > MAX_THREADS) {
> +             perf->perf_threads = MAX_THREADS;
> +             pr_info("Reset total threads to: %u\n",
> MAX_THREADS);
> +     }
>  
> -             if (perf->perf_threads > MAX_THREADS) {
> -                     perf->perf_threads = MAX_THREADS;
> -                     pr_info("Reset total threads to: %u\n",
> MAX_THREADS);
> -             }
> +     /* no greater than 1M */
> +     if (seg_order > MAX_SEG_ORDER) {
> +             seg_order = MAX_SEG_ORDER;
> +             pr_info("Fix seg_order to %u\n", seg_order);
> +     }
>  
> -             /* no greater than 1M */
> -             if (seg_order > MAX_SEG_ORDER) {
> -                     seg_order = MAX_SEG_ORDER;
> -                     pr_info("Fix seg_order to %u\n", seg_order);
> -             }
> +     if (run_order < seg_order) {
> +             run_order = seg_order;
> +             pr_info("Fix run_order to %u\n", run_order);
> +     }
>  
> -             if (run_order < seg_order) {
> -                     run_order = seg_order;
> -                     pr_info("Fix run_order to %u\n", run_order);
> -             }
> +     node = dev_to_node(&perf->ntb->pdev->dev);
> +     atomic_set(&perf->tdone, 0);
>  
> -             node = dev_to_node(&perf->ntb->pdev->dev);
> -             /* launch kernel thread */
> -             for (i = 0; i < perf->perf_threads; i++) {
> -                     struct pthr_ctx *pctx;
> -
> -                     pctx = &perf->pthr_ctx[i];
> -                     atomic_set(&pctx->dma_sync, 0);
> -                     pctx->perf = perf;
> -                     pctx->thread =
> -                             kthread_create_on_node(ntb_perf_thre
> ad,
> -                                                    (void *)pctx,
> -                                                    node,
> "ntb_perf %d", i);
> -                     if (IS_ERR(pctx->thread)) {
> -                             pctx->thread = NULL;
> -                             goto err;
> -                     } else
> -                             wake_up_process(pctx->thread);
> -
> -                     if (perf->run == false)
> -                             return -ENXIO;
> -             }
> +     /* launch kernel thread */
> +     for (i = 0; i < perf->perf_threads; i++) {
> +             struct pthr_ctx *pctx;
>  
> +             pctx = &perf->pthr_ctx[i];
> +             atomic_set(&pctx->dma_sync, 0);
> +             pctx->perf = perf;
> +             pctx->wq = &wq;
> +             pctx->thread =
> +                     kthread_create_on_node(ntb_perf_thread,
> +                                            (void *)pctx,
> +                                            node, "ntb_perf %d",
> i);
> +             if (IS_ERR(pctx->thread)) {
> +                     pctx->thread = NULL;
> +                     goto err;
> +             } else {
> +                     wake_up_process(pctx->thread);
> +             }
>       }
>  
> +     wait_event_interruptible(wq,
> +             atomic_read(&perf->tdone) == perf->perf_threads);
> +
> +     threads_cleanup(perf);
> +     mutex_unlock(&perf->run_mutex);
>       return count;
>  
>  err:
>       threads_cleanup(perf);
> +     mutex_unlock(&perf->run_mutex);
>       return -ENXIO;
>  }
>  
> @@ -713,7 +739,7 @@ static int perf_probe(struct ntb_client *client,
> struct ntb_dev *ntb)
>       perf->ntb = ntb;
>       perf->perf_threads = 1;
>       atomic_set(&perf->tsync, 0);
> -     perf->run = false;
> +     mutex_init(&perf->run_mutex);
>       spin_lock_init(&perf->db_lock);
>       perf_setup_mw(ntb, perf);
>       INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
> @@ -748,6 +774,8 @@ static void perf_remove(struct ntb_client
> *client, struct ntb_dev *ntb)
>  
>       dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
>  
> +     mutex_lock(&perf->run_mutex);
> +
>       cancel_delayed_work_sync(&perf->link_work);
>       cancel_work_sync(&perf->link_cleanup);
>  
> -- 
> 2.1.4
>

Re: [PATCH 2/8] ntb_perf: Improve thread handling to increase robustness

Reply via email to