From: Michal Wajdeczko <michal.wajdec...@intel.com> [ Upstream commit 33f17e2cbd930a2a00eb007d9b241b6db010a880 ]
GuC firmware counts received VF configuration KLVs and may start validation of the complete VF config even if some resources where unprovisioned in the meantime, leading to unexpected errors like: $ echo 1 | sudo tee /sys/kernel/debug/dri/0000:00:02.0/gt0/vf1/contexts_quota $ echo 0 | sudo tee /sys/kernel/debug/dri/0000:00:02.0/gt0/vf1/contexts_quota $ echo 1 | sudo tee /sys/kernel/debug/dri/0000:00:02.0/gt0/vf1/doorbells_quota $ echo 0 | sudo tee /sys/kernel/debug/dri/0000:00:02.0/gt0/vf1/doorbells_quota $ echo 1 | sudo tee /sys/kernel/debug/dri/0000:00:02.0/gt0/vf1/ggtt_quota tee: '/sys/kernel/debug/dri/0000:00:02.0/gt0/vf1/ggtt_quota': Input/output error To mitigate this problem trigger explicit VF config reset after unprovisioning any of the critical resources (GGTT, context or doorbell IDs) that GuC is monitoring. Signed-off-by: Michal Wajdeczko <michal.wajdec...@intel.com> Reviewed-by: MichaĆ Winiarski <michal.winiar...@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20250129195947.764-3-michal.wajdec...@intel.com Signed-off-by: Sasha Levin <sas...@kernel.org> --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 37 +++++++++++++++++++--- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index c9ed996b9cb0c..786f0dba41437 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -323,6 +323,26 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) return err; } +static int pf_push_vf_cfg(struct xe_gt *gt, unsigned int vfid, bool reset) +{ + int err = 0; + + xe_gt_assert(gt, vfid); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (reset) + err = pf_send_vf_cfg_reset(gt, vfid); + if (!err) + err = pf_push_full_vf_config(gt, vfid); + + return err; +} + +static int pf_refresh_vf_cfg(struct xe_gt *gt, unsigned int vfid) +{ + return pf_push_vf_cfg(gt, vfid, true); +} + static u64 pf_get_ggtt_alignment(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -419,6 +439,10 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) return err; pf_release_vf_config_ggtt(gt, config); + + err = pf_refresh_vf_cfg(gt, vfid); + if (unlikely(err)) + return err; } xe_gt_assert(gt, !xe_ggtt_node_allocated(config->ggtt_region)); @@ -744,6 +768,10 @@ static int pf_provision_vf_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctx return ret; pf_release_config_ctxs(gt, config); + + ret = pf_refresh_vf_cfg(gt, vfid); + if (unlikely(ret)) + return ret; } if (!num_ctxs) @@ -1041,6 +1069,10 @@ static int pf_provision_vf_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs) return ret; pf_release_config_dbs(gt, config); + + ret = pf_refresh_vf_cfg(gt, vfid); + if (unlikely(ret)) + return ret; } if (!num_dbs) @@ -2003,10 +2035,7 @@ int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh xe_gt_assert(gt, vfid); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); - if (refresh) - err = pf_send_vf_cfg_reset(gt, vfid); - if (!err) - err = pf_push_full_vf_config(gt, vfid); + err = pf_push_vf_cfg(gt, vfid, refresh); mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); if (unlikely(err)) { -- 2.39.5