On Wed, 2022-11-09 at 09:48 +0000, Lin, Wayne wrote:
> [Public]
> 
> Thanks, Lyude!
> Comments inline.
> 
> > -----Original Message-----
> > From: Lyude Paul <ly...@redhat.com>
> > Sent: Saturday, November 5, 2022 7:59 AM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Wentland, Harry <harry.wentl...@amd.com>; sta...@vger.kernel.org;
> > Li, Sun peng (Leo) <sunpeng...@amd.com>; Siqueira, Rodrigo
> > <rodrigo.sique...@amd.com>; Deucher, Alexander
> > <alexander.deuc...@amd.com>; Koenig, Christian
> > <christian.koe...@amd.com>; Pan, Xinhui <xinhui....@amd.com>; David
> > Airlie <airl...@gmail.com>; Daniel Vetter <dan...@ffwll.ch>; Kazlauskas,
> > Nicholas <nicholas.kazlaus...@amd.com>; Pillai, Aurabindo
> > <aurabindo.pil...@amd.com>; Li, Roman <roman...@amd.com>; Zuo, Jerry
> > <jerry....@amd.com>; Wu, Hersen <hersenxs...@amd.com>; Lin, Wayne
> > <wayne....@amd.com>; Thomas Zimmermann <tzimmerm...@suse.de>;
> > Mahfooz, Hamza <hamza.mahf...@amd.com>; Hung, Alex
> > <alex.h...@amd.com>; Francis, David <david.fran...@amd.com>; Mikita
> > Lipski <mikita.lip...@amd.com>; Liu, Wenjing <wenjing....@amd.com>;
> > open list:DRM DRIVERS <dri-de...@lists.freedesktop.org>; open list <linux-
> > ker...@vger.kernel.org>
> > Subject: [PATCH 1/2] drm/amdgpu/mst: Stop ignoring error codes and
> > deadlocking
> > 
> > It appears that amdgpu makes the mistake of completely ignoring the return
> > values from the DP MST helpers, and instead just returns a simple 
> > true/false.
> > In this case, it seems to have come back to bite us because as a result of
> > simply returning false from compute_mst_dsc_configs_for_state(), amdgpu
> > had no way of telling when a deadlock happened from these helpers. This
> > could definitely result in some kernel splats.
> > 
> > Signed-off-by: Lyude Paul <ly...@redhat.com>
> > Fixes: 8c20a1ed9b4f ("drm/amd/display: MST DSC compute fair share")
> > Cc: Harry Wentland <harry.wentl...@amd.com>
> > Cc: <sta...@vger.kernel.org> # v5.6+
> > ---
> >  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  18 +--
> >  .../display/amdgpu_dm/amdgpu_dm_mst_types.c   | 107 ++++++++++------
> > --
> >  .../display/amdgpu_dm/amdgpu_dm_mst_types.h   |  12 +-
> >  3 files changed, 73 insertions(+), 64 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > index 0db2a88cd4d7b..6f76b2c84cdb5 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > @@ -6462,7 +6462,7 @@ static int
> > dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
> >     struct drm_connector_state *new_con_state;
> >     struct amdgpu_dm_connector *aconnector;
> >     struct dm_connector_state *dm_conn_state;
> > -   int i, j;
> > +   int i, j, ret;
> >     int vcpi, pbn_div, pbn, slot_num = 0;
> > 
> >     for_each_new_connector_in_state(state, connector,
> > new_con_state, i) { @@ -6509,8 +6509,11 @@ static int
> > dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
> >                     dm_conn_state->pbn = pbn;
> >                     dm_conn_state->vcpi_slots = slot_num;
> > 
> > -                   drm_dp_mst_atomic_enable_dsc(state, aconnector-
> > > port, dm_conn_state->pbn,
> > -                                                false);
> > +                   ret = drm_dp_mst_atomic_enable_dsc(state,
> > aconnector->port,
> > +                                                      dm_conn_state-
> > > pbn, false);
> > +                   if (ret != 0)
> > +                           return ret;
> > +
> >                     continue;
> >             }
> > 
> > @@ -9523,10 +9526,9 @@ static int amdgpu_dm_atomic_check(struct
> > drm_device *dev,
> > 
> >  #if defined(CONFIG_DRM_AMD_DC_DCN)
> >     if (dc_resource_is_dsc_encoding_supported(dc)) {
> > -           if (!pre_validate_dsc(state, &dm_state, vars)) {
> > -                   ret = -EINVAL;
> > +           ret = pre_validate_dsc(state, &dm_state, vars);
> > +           if (ret != 0)
> >                     goto fail;
> > -           }
> >     }
> >  #endif
> > 
> > @@ -9621,9 +9623,9 @@ static int amdgpu_dm_atomic_check(struct
> > drm_device *dev,
> >             }
> > 
> >  #if defined(CONFIG_DRM_AMD_DC_DCN)
> > -           if (!compute_mst_dsc_configs_for_state(state, dm_state-
> > > context, vars)) {
> > +           ret = compute_mst_dsc_configs_for_state(state, dm_state-
> > > context, vars);
> > +           if (ret) {
> > 
> >     DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state()
> > failed\n");
> > -                   ret = -EINVAL;
> >                     goto fail;
> >             }
> > 
> > diff --git
> > a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
> > index 6ff96b4bdda5c..30bc2e5058b70 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
> > +++
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
> > @@ -864,25 +864,25 @@ static bool try_disable_dsc(struct
> > drm_atomic_state *state,
> >     return true;
> >  }
> > 
> > -static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state
> > *state,
> > -                                        struct dc_state *dc_state,
> > -                                        struct dc_link *dc_link,
> > -                                        struct dsc_mst_fairness_vars *vars,
> > -                                        struct drm_dp_mst_topology_mgr
> > *mgr,
> > -                                        int *link_vars_start_index)
> > +static int compute_mst_dsc_configs_for_link(struct drm_atomic_state
> > *state,
> > +                                       struct dc_state *dc_state,
> > +                                       struct dc_link *dc_link,
> > +                                       struct dsc_mst_fairness_vars *vars,
> > +                                       struct drm_dp_mst_topology_mgr
> > *mgr,
> > +                                       int *link_vars_start_index)
> >  {
> >     struct dc_stream_state *stream;
> >     struct dsc_mst_fairness_params params[MAX_PIPES];
> >     struct amdgpu_dm_connector *aconnector;
> >     struct drm_dp_mst_topology_state *mst_state =
> > drm_atomic_get_mst_topology_state(state, mgr);
> >     int count = 0;
> > -   int i, k;
> > +   int i, k, ret;
> >     bool debugfs_overwrite = false;
> > 
> >     memset(params, 0, sizeof(params));
> > 
> >     if (IS_ERR(mst_state))
> > -           return false;
> > +           return PTR_ERR(mst_state);
> > 
> >     mst_state->pbn_div = dm_mst_get_pbn_divider(dc_link);  #if
> > defined(CONFIG_DRM_AMD_DC_DCN) @@ -933,7 +933,7 @@ static bool
> > compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
> > 
> >     if (count == 0) {
> >             ASSERT(0);
> > -           return true;
> > +           return 0;
> >     }
> > 
> >     /* k is start index of vars for current phy link used by mst hub */ @@
> > -949,11 +949,14 @@ static bool compute_mst_dsc_configs_for_link(struct
> > drm_atomic_state *state,
> >             vars[i + k].bpp_x16 = 0;
> >             if (drm_dp_atomic_find_time_slots(state, params[i].port-
> > > mgr, params[i].port,
> >                                               vars[i + k].pbn) < 0)
> > -                   return false;
> > +                   return -EINVAL;
> 
> Should we also return the error code get from drm_dp_atomic_find_time_slots() 
> rather than 
> assigning a new one here?

Yes we should, nice catch!

> 
> >     }
> > -   if (!drm_dp_mst_atomic_check(state) && !debugfs_overwrite) {
> > +   ret = drm_dp_mst_atomic_check(state);
> > +   if (ret == 0 && !debugfs_overwrite) {
> >             set_dsc_configs_from_fairness_vars(params, vars, count, k);
> > -           return true;
> > +           return 0;
> > +   } else if (ret == -EDEADLK) {
> > +           return ret;
> 
> I think we should return here whenever there is an error. Not just for 
> EDEADLK case. 

sgtm


> 
> >     }
> > 
> >     /* Try max compression */
> > @@ -964,29 +967,30 @@ static bool
> > compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
> >                     vars[i + k].bpp_x16 =
> > params[i].bw_range.min_target_bpp_x16;
> >                     if (drm_dp_atomic_find_time_slots(state,
> > params[i].port->mgr,
> >                                                       params[i].port, vars[i
> > + k].pbn) < 0)
> > -                           return false;
> > +                           return -EINVAL;
> 
> Same as above.
> 
> >             } else {
> >                     vars[i + k].pbn =
> > kbps_to_peak_pbn(params[i].bw_range.stream_kbps);
> >                     vars[i + k].dsc_enabled = false;
> >                     vars[i + k].bpp_x16 = 0;
> >                     if (drm_dp_atomic_find_time_slots(state,
> > params[i].port->mgr,
> >                                                       params[i].port, vars[i
> > + k].pbn) < 0)
> > -                           return false;
> > +                           return -EINVAL;
> 
> Same as above.
> 
> >             }
> >     }
> > -   if (drm_dp_mst_atomic_check(state))
> > -           return false;
> > +   ret = drm_dp_mst_atomic_check(state);
> > +   if (ret != 0)
> > +           return ret;
> > 
> >     /* Optimize degree of compression */
> >     if (!increase_dsc_bpp(state, mst_state, dc_link, params, vars, count,
> > k))
> > -           return false;
> > +           return -ENOSPC;
> > 
> >     if (!try_disable_dsc(state, dc_link, params, vars, count, k))
> > -           return false;
> > +           return -ENOSPC;
> > 
> >     set_dsc_configs_from_fairness_vars(params, vars, count, k);
> > 
> > -   return true;
> > +   return 0;
> >  }
> > 
> >  static bool is_dsc_need_re_compute(
> > @@ -1087,15 +1091,16 @@ static bool is_dsc_need_re_compute(
> >     return is_dsc_need_re_compute;
> >  }
> > 
> > -bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
> > -                                  struct dc_state *dc_state,
> > -                                  struct dsc_mst_fairness_vars *vars)
> > +int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
> > +                                 struct dc_state *dc_state,
> > +                                 struct dsc_mst_fairness_vars *vars)
> >  {
> >     int i, j;
> >     struct dc_stream_state *stream;
> >     bool computed_streams[MAX_PIPES];
> >     struct amdgpu_dm_connector *aconnector;
> >     int link_vars_start_index = 0;
> > +   int ret = 0;
> > 
> >     for (i = 0; i < dc_state->stream_count; i++)
> >             computed_streams[i] = false;
> > @@ -1118,17 +1123,19 @@ bool compute_mst_dsc_configs_for_state(struct
> > drm_atomic_state *state,
> >                     continue;
> > 
> >             if (dcn20_remove_stream_from_ctx(stream->ctx->dc,
> > dc_state, stream) != DC_OK)
> > -                   return false;
> > +                   return -EINVAL;
> > 
> >             if (!is_dsc_need_re_compute(state, dc_state, stream->link))
> >                     continue;
> > 
> >             mutex_lock(&aconnector->mst_mgr.lock);
> > -           if (!compute_mst_dsc_configs_for_link(state, dc_state,
> > stream->link, vars,
> > -                                                 &aconnector->mst_mgr,
> > -                                                 &link_vars_start_index)) {
> > +
> > +           ret = compute_mst_dsc_configs_for_link(state, dc_state,
> > stream->link, vars,
> > +                                                  &aconnector->mst_mgr,
> > +                                                  &link_vars_start_index);
> > +           if (ret != 0) {
> >                     mutex_unlock(&aconnector->mst_mgr.lock);
> > -                   return false;
> > +                   return ret;
> >             }
> >             mutex_unlock(&aconnector->mst_mgr.lock);
> > 
> > @@ -1143,22 +1150,22 @@ bool compute_mst_dsc_configs_for_state(struct
> > drm_atomic_state *state,
> > 
> >             if (stream->timing.flags.DSC == 1)
> >                     if (dc_stream_add_dsc_to_resource(stream->ctx-
> > > dc, dc_state, stream) != DC_OK)
> > -                           return false;
> > +                           return -EINVAL;
> >     }
> > 
> > -   return true;
> > +   return ret;
> >  }
> > 
> > -static bool
> > -   pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state
> > *state,
> > -                                         struct dc_state *dc_state,
> > -                                         struct dsc_mst_fairness_vars
> > *vars)
> > +static int pre_compute_mst_dsc_configs_for_state(struct
> > drm_atomic_state *state,
> > +                                            struct dc_state *dc_state,
> > +                                            struct dsc_mst_fairness_vars
> > *vars)
> >  {
> >     int i, j;
> >     struct dc_stream_state *stream;
> >     bool computed_streams[MAX_PIPES];
> >     struct amdgpu_dm_connector *aconnector;
> >     int link_vars_start_index = 0;
> > +   int ret;
> > 
> >     for (i = 0; i < dc_state->stream_count; i++)
> >             computed_streams[i] = false;
> > @@ -1184,13 +1191,12 @@ static bool
> >                     continue;
> > 
> >             mutex_lock(&aconnector->mst_mgr.lock);
> > -           if (!compute_mst_dsc_configs_for_link(state, dc_state,
> > stream->link, vars,
> > -                                                 &aconnector->mst_mgr,
> > -                                                 &link_vars_start_index)) {
> > -                   mutex_unlock(&aconnector->mst_mgr.lock);
> > -                   return false;
> > -           }
> > +           ret = compute_mst_dsc_configs_for_link(state, dc_state,
> > stream->link, vars,
> > +                                                  &aconnector->mst_mgr,
> > +                                                  &link_vars_start_index);
> >             mutex_unlock(&aconnector->mst_mgr.lock);
> > +           if (ret != 0)
> > +                   return ret;
> > 
> >             for (j = 0; j < dc_state->stream_count; j++) {
> >                     if (dc_state->streams[j]->link == stream->link) @@ -
> > 1198,7 +1204,7 @@ static bool
> >             }
> >     }
> > 
> > -   return true;
> > +   return ret;
> >  }
> > 
> >  static int find_crtc_index_in_state_by_stream(struct drm_atomic_state
> > *state, @@ -1253,9 +1259,9 @@ static bool
> > is_dsc_precompute_needed(struct drm_atomic_state *state)
> >     return ret;
> >  }
> > 
> > -bool pre_validate_dsc(struct drm_atomic_state *state,
> > -                 struct dm_atomic_state **dm_state_ptr,
> > -                 struct dsc_mst_fairness_vars *vars)
> > +int pre_validate_dsc(struct drm_atomic_state *state,
> > +                struct dm_atomic_state **dm_state_ptr,
> > +                struct dsc_mst_fairness_vars *vars)
> >  {
> >     int i;
> >     struct dm_atomic_state *dm_state;
> > @@ -1264,11 +1270,12 @@ bool pre_validate_dsc(struct drm_atomic_state
> > *state,
> > 
> >     if (!is_dsc_precompute_needed(state)) {
> >             DRM_INFO_ONCE("DSC precompute is not needed.\n");
> > -           return true;
> > +           return 0;
> >     }
> > -   if (dm_atomic_get_state(state, dm_state_ptr)) {
> > +   ret = dm_atomic_get_state(state, dm_state_ptr);
> > +   if (ret != 0) {
> >             DRM_INFO_ONCE("dm_atomic_get_state() failed\n");
> > -           return false;
> > +           return ret;
> >     }
> >     dm_state = *dm_state_ptr;
> > 
> > @@ -1280,7 +1287,7 @@ bool pre_validate_dsc(struct drm_atomic_state
> > *state,
> > 
> >     local_dc_state = kmemdup(dm_state->context, sizeof(struct
> > dc_state), GFP_KERNEL);
> >     if (!local_dc_state)
> > -           return false;
> > +           return -ENOMEM;
> > 
> >     for (i = 0; i < local_dc_state->stream_count; i++) {
> >             struct dc_stream_state *stream = dm_state->context-
> > > streams[i]; @@ -1316,9 +1323,9 @@ bool pre_validate_dsc(struct
> > drm_atomic_state *state,
> >     if (ret != 0)
> >             goto clean_exit;
> > 
> > -   if (!pre_compute_mst_dsc_configs_for_state(state, local_dc_state,
> > vars)) {
> > +   ret = pre_compute_mst_dsc_configs_for_state(state, local_dc_state,
> > vars);
> > +   if (ret != 0) {
> > 
> >     DRM_INFO_ONCE("pre_compute_mst_dsc_configs_for_state()
> > failed\n");
> > -           ret = -EINVAL;
> >             goto clean_exit;
> >     }
> > 
> > @@ -1349,7 +1356,7 @@ bool pre_validate_dsc(struct drm_atomic_state
> > *state,
> > 
> >     kfree(local_dc_state);
> > 
> > -   return (ret == 0);
> > +   return ret;
> >  }
> > 
> >  static unsigned int kbps_from_pbn(unsigned int pbn) diff --git
> > a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
> > index b92a7c5671aa2..97fd70df531bf 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
> > +++
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
> > @@ -53,15 +53,15 @@ struct dsc_mst_fairness_vars {
> >     struct amdgpu_dm_connector *aconnector;  };
> > 
> > -bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
> > -                                  struct dc_state *dc_state,
> > -                                  struct dsc_mst_fairness_vars *vars);
> > +int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
> > +                                 struct dc_state *dc_state,
> > +                                 struct dsc_mst_fairness_vars *vars);
> > 
> >  bool needs_dsc_aux_workaround(struct dc_link *link);
> > 
> > -bool pre_validate_dsc(struct drm_atomic_state *state,
> > -                 struct dm_atomic_state **dm_state_ptr,
> > -                 struct dsc_mst_fairness_vars *vars);
> > +int pre_validate_dsc(struct drm_atomic_state *state,
> > +                struct dm_atomic_state **dm_state_ptr,
> > +                struct dsc_mst_fairness_vars *vars);
> > 
> >  enum dc_status dm_dp_mst_is_port_support_mode(
> >     struct amdgpu_dm_connector *aconnector,
> > --
> > 2.37.3
> ---
> Regards,
> Wayne Lin
> 

-- 
Cheers,
 Lyude Paul (she/her)
 Software Engineer at Red Hat

Reply via email to