On Tue, Nov 15, 2016 at 04:36:35PM +0200, Mika Kuoppala wrote:
> If we have a bad client submitting unfavourably across different
> contexts, creating new ones, the per context scoring of badness
> doesn't remove the root cause, the offending client.
> To counter, keep track of per client context bans. Deny access if
> client is responsible for more than 3 context bans in
> it's lifetime.
> 
> Cc: Chris Wilson <ch...@chris-wilson.co.uk>
> Signed-off-by: Mika Kuoppala <mika.kuopp...@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  4 ++++
>  drivers/gpu/drm/i915/i915_gem.c            | 28 +++++++++++++++++++++-------
>  drivers/gpu/drm/i915/i915_gem_context.c    | 16 ++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  8 ++++++--
>  drivers/gpu/drm/i915/i915_gpu_error.c      | 10 ++++++----
>  5 files changed, 53 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5af1c38..92c5284 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -411,6 +411,9 @@ struct drm_i915_file_private {
>       } rps;
>  
>       unsigned int bsd_engine;
> +
> +#define I915_MAX_CLIENT_CONTEXT_BANS 3
> +     int context_bans;

3 feels a little too small. But possibly ok since this is at the context
level. Still webgl...

>  };
>  
>  /* Used by dp and fdi links */
> @@ -854,6 +857,7 @@ struct drm_i915_error_state {
>  
>               pid_t pid;
>               char comm[TASK_COMM_LEN];
> +             int context_bans;
>       } engine[I915_NUM_ENGINES];
>  
>       struct drm_i915_error_buffer {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 40a9e10..f56230f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2636,19 +2636,33 @@ static bool i915_context_is_banned(const struct 
> i915_gem_context *ctx)
>       return false;
>  }
>  
> -static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
> +static void i915_gem_request_mark_guilty(struct drm_i915_gem_request 
> *request)
>  {
> -     struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
> +     struct i915_ctx_hang_stats *hs = &request->ctx->hang_stats;
>  
>       hs->ban_score += 10;
>  
> -     hs->banned = i915_context_is_banned(ctx);
> +     hs->banned = i915_context_is_banned(request->ctx);
>       hs->batch_active++;
> +
> +     DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
> +                      request->ctx->name, hs->ban_score, yesno(hs->banned));
> +
> +     if (!request->file_priv)
> +             return;
> +
> +     if (hs->banned) {
> +             request->file_priv->context_bans++;
> +
> +             DRM_DEBUG_DRIVER("client %s has has %d context banned\n",
> +                              request->ctx->name,
> +                              request->file_priv->context_bans);
> +     }
>  }
>  
> -static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
> +static void i915_gem_request_mark_innocent(struct drm_i915_gem_request 
> *request)
>  {
> -     struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
> +     struct i915_ctx_hang_stats *hs = &request->ctx->hang_stats;
>  
>       hs->batch_pending++;
>  }
> @@ -2719,9 +2733,9 @@ static void i915_gem_reset_engine(struct 
> intel_engine_cs *engine)
>       }
>  
>       if (ring_hung)
> -             i915_gem_context_mark_guilty(request->ctx);
> +             i915_gem_request_mark_guilty(request);
>       else
> -             i915_gem_context_mark_innocent(request->ctx);
> +             i915_gem_request_mark_innocent(request);

Why? Just use the file_priv on the ctx.

> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
> b/drivers/gpu/drm/i915/i915_gem_context.c
> index 9abaae4..4ddd044 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -347,6 +347,14 @@ __create_hw_context(struct drm_device *dev,
>       return ERR_PTR(ret);
>  }
>  
> +static bool client_is_banned(struct drm_i915_file_private *file_priv)
> +{
> +     if (file_priv->context_bans <= I915_MAX_CLIENT_CONTEXT_BANS)
> +             return false;

        return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS;
> +
> +     return true;
> +}
> +
>  /**
>   * The default context needs to exist per ring that uses contexts. It stores 
> the
>   * context state of the GPU for applications that don't utilize HW contexts, 
> as
> @@ -360,6 +368,14 @@ i915_gem_create_context(struct drm_device *dev,
>  
>       lockdep_assert_held(&dev->struct_mutex);
>  
> +     if (file_priv && client_is_banned(file_priv)) {

So this belongs to context_craate_ioctl

> +             DRM_DEBUG("client %s[%d] banned from creating ctx\n",
> +                       current->comm,
> +                       pid_nr(get_task_pid(current, PIDTYPE_PID)));
> +
> +             return ERR_PTR(-EIO);
> +     }
> +
>       ctx = __create_hw_context(dev, file_priv);
>       if (IS_ERR(ctx))
>               return ctx;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index e804cb2..21beaf0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1231,16 +1231,20 @@ static struct i915_gem_context *
>  i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
>                         struct intel_engine_cs *engine, const u32 ctx_id)
>  {
> +     struct drm_i915_file_private *file_priv = file->driver_priv;
>       struct i915_gem_context *ctx;
>       struct i915_ctx_hang_stats *hs;
>  
> -     ctx = i915_gem_context_lookup(file->driver_priv, ctx_id);
> +     ctx = i915_gem_context_lookup(file_priv, ctx_id);
>       if (IS_ERR(ctx))
>               return ctx;
>  
>       hs = &ctx->hang_stats;
>       if (hs->banned) {
> -             DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
> +             DRM_DEBUG("Client %s banned from submitting (%d:%d)\n",
> +                       ctx->name,
> +                       file_priv->context_bans,
> +                       hs->ban_score);

Context bans prevents creating new contexts. What is its relevance here?

(We should start using pr_debug for these user aides, at least something
more suitable than DRM_DEBUG).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to