On Fri, Jan 24, 2025 at 08:50:23AM +0200, Badal Nilawar wrote:
> The Forcewake timeout issue has been observed on Gen 12.0 and above. To 
> address this,
> disable Render Power-Gating (RPG) during live self-tests for these 
> generations.
> The temporary workaround 'drm/i915/mtl: do not enable render power-gating on 
> MTL'
> disables RPG globally, which is unnecessary since the issues were only seen 
> during self-tests.
> 
> v2: take runtime pm wakeref
> 
> Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/9413
> Fixes: 25e7976db86b ("drm/i915/mtl: do not enable render power-gating on MTL")
> Cc: Rodrigo Vivi <rodrigo.v...@intel.com>
> Cc: Andi Shyti <andi.sh...@intel.com>
> Cc: Andrzej Hajda <andrzej.ha...@intel.com>
> Signed-off-by: Badal Nilawar <badal.nila...@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_rc6.c           | 19 ++++---------------
>  .../gpu/drm/i915/selftests/i915_selftest.c    | 18 ++++++++++++++++++
>  2 files changed, 22 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c 
> b/drivers/gpu/drm/i915/gt/intel_rc6.c
> index 9378d5901c49..9ca42589da4d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rc6.c
> +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
> @@ -117,21 +117,10 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
>                       GEN6_RC_CTL_RC6_ENABLE |
>                       GEN6_RC_CTL_EI_MODE(1);
>  
> -     /*
> -      * BSpec 52698 - Render powergating must be off.
> -      * FIXME BSpec is outdated, disabling powergating for MTL is just
> -      * temporary wa and should be removed after fixing real cause
> -      * of forcewake timeouts.
> -      */
> -     if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
> -             pg_enable =
> -                     GEN9_MEDIA_PG_ENABLE |
> -                     GEN11_MEDIA_SAMPLER_PG_ENABLE;
> -     else
> -             pg_enable =
> -                     GEN9_RENDER_PG_ENABLE |
> -                     GEN9_MEDIA_PG_ENABLE |
> -                     GEN11_MEDIA_SAMPLER_PG_ENABLE;
> +     pg_enable =
> +             GEN9_RENDER_PG_ENABLE |
> +             GEN9_MEDIA_PG_ENABLE |
> +             GEN11_MEDIA_SAMPLER_PG_ENABLE;
>  
>       if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) {
>               for (i = 0; i < I915_MAX_VCS; i++)
> diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c 
> b/drivers/gpu/drm/i915/selftests/i915_selftest.c
> index fee76c1d2f45..889281819c5b 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c
> @@ -23,7 +23,9 @@
>  
>  #include <linux/random.h>
>  
> +#include "gt/intel_gt.h"
>  #include "gt/intel_gt_pm.h"
> +#include "gt/intel_gt_regs.h"
>  #include "gt/uc/intel_gsc_fw.h"
>  
>  #include "i915_driver.h"
> @@ -253,11 +255,27 @@ int i915_mock_selftests(void)
>  int i915_live_selftests(struct pci_dev *pdev)
>  {
>       struct drm_i915_private *i915 = pdev_to_i915(pdev);
> +     struct intel_uncore *uncore = &i915->uncore;
>       int err;
> +     u32 pg_enable;
> +     intel_wakeref_t wakeref;
>  
>       if (!i915_selftest.live)
>               return 0;
>  
> +     /*
> +      * FIXME Disable render powergating, this is temporary wa and should be 
> removed
> +      * after fixing real cause of forcewake timeouts.
> +      */
> +     with_intel_runtime_pm(uncore->rpm, wakeref) {

We probably need to take the runtime pm reference in upper layers...

CI is mad around runtime pm on selftests with this patch.

> +             if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 00), IP_VER(12, 
> 74))) {
> +                     pg_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE);
> +                     if (pg_enable & GEN9_RENDER_PG_ENABLE)
> +                             intel_uncore_write_fw(uncore, GEN9_PG_ENABLE,
> +                                                   pg_enable & 
> ~GEN9_RENDER_PG_ENABLE);

We also are facing some repeated GPU hangs on selftests with this,
so we probably cannot blindly disable here?

I know our CI oscilates some time, but when different attempts gets
same results there, then we really need to believe it...

> +             }
> +     }
> +
>       __wait_gsc_proxy_completed(i915);
>       __wait_gsc_huc_load_completed(i915);
>  
> -- 
> 2.34.1
> 

Reply via email to