On Mon, Apr 10, 2023 at 03:35:09PM -0700, Ashutosh Dixit wrote:
> Instead of erroring out when GuC reset is in progress, block waiting for
> GuC reset to complete which is a more reasonable uapi behavior.
> 
> v2: Avoid race between wake_up_all and waiting for wakeup (Rodrigo)
> 
> Signed-off-by: Ashutosh Dixit <ashutosh.di...@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_hwmon.c | 38 +++++++++++++++++++++++++++----
>  1 file changed, 33 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
> b/drivers/gpu/drm/i915/i915_hwmon.c
> index 9ab8971679fe3..8471a667dfc71 100644
> --- a/drivers/gpu/drm/i915/i915_hwmon.c
> +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> @@ -51,6 +51,7 @@ struct hwm_drvdata {
>       char name[12];
>       int gt_n;
>       bool reset_in_progress;
> +     wait_queue_head_t waitq;
>  };
>  
>  struct i915_hwmon {
> @@ -395,16 +396,41 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
>  static int
>  hwm_power_max_write(struct hwm_drvdata *ddat, long val)
>  {
> +#define GUC_RESET_TIMEOUT msecs_to_jiffies(2000)
> +
> +     int ret = 0, timeout = GUC_RESET_TIMEOUT;
>       struct i915_hwmon *hwmon = ddat->hwmon;
>       intel_wakeref_t wakeref;
> -     int ret = 0;
> +     DEFINE_WAIT(wait);
>       u32 nval;
>  
> -     mutex_lock(&hwmon->hwmon_lock);
> -     if (hwmon->ddat.reset_in_progress) {
> -             ret = -EAGAIN;
> -             goto unlock;
> +     /* Block waiting for GuC reset to complete when needed */
> +     for (;;) {
> +             mutex_lock(&hwmon->hwmon_lock);

I'm really afraid of how this mutex is handled with the wait queue.
some initial thought it looks like it is trying to reimplement ww_mutex?

all other examples of the wait_queue usages like this or didn't use
locks or had it in a total different flow that I could not correlate.

> +
> +             prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
> +
> +             if (!hwmon->ddat.reset_in_progress)
> +                     break;

If this breaks we never unlock it?

> +
> +             if (signal_pending(current)) {
> +                     ret = -EINTR;
> +                     break;
> +             }
> +
> +             if (!timeout) {
> +                     ret = -ETIME;
> +                     break;
> +             }
> +
> +             mutex_unlock(&hwmon->hwmon_lock);

do we need to lock the signal pending and timeout as well?
or only wrapping it around the hwmon->ddat access would be
enough?

> +
> +             timeout = schedule_timeout(timeout);
>       }
> +     finish_wait(&ddat->waitq, &wait);
> +     if (ret)
> +             goto unlock;
> +
>       wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
>  
>       /* Disable PL1 limit and verify, because the limit cannot be disabled 
> on all platforms */
> @@ -508,6 +534,7 @@ void i915_hwmon_power_max_restore(struct drm_i915_private 
> *i915, bool old)
>       intel_uncore_rmw(hwmon->ddat.uncore, hwmon->rg.pkg_rapl_limit,
>                        PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
>       hwmon->ddat.reset_in_progress = false;
> +     wake_up_all(&hwmon->ddat.waitq);
>  
>       mutex_unlock(&hwmon->hwmon_lock);
>  }
> @@ -784,6 +811,7 @@ void i915_hwmon_register(struct drm_i915_private *i915)
>       ddat->uncore = &i915->uncore;
>       snprintf(ddat->name, sizeof(ddat->name), "i915");
>       ddat->gt_n = -1;
> +     init_waitqueue_head(&ddat->waitq);
>  
>       for_each_gt(gt, i915, i) {
>               ddat_gt = hwmon->ddat_gt + i;
> -- 
> 2.38.0
> 

Reply via email to