From: Dillon Varone <dillon.var...@amd.com>

[WHY]
When effective bandwidth from the SoC is enough to perform SubVP
prefetchs, then DF throttling is not required.

[HOW]
Provide SMU the required clocks for which DF throttling is not required.

Reviewed-by: Alvin Lee <alvin.l...@amd.com>
Signed-off-by: Dillon Varone <dillon.var...@amd.com>
Signed-off-by: Rodrigo Siqueira <rodrigo.sique...@amd.com>
---
 .../dc/clk_mgr/dcn401/dcn401_clk_mgr.c        | 46 +++++++++++++++++++
 .../dc/clk_mgr/dcn401/dcn401_clk_mgr.h        |  1 +
 .../clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c   | 23 ++++++++++
 .../clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h   |  3 ++
 drivers/gpu/drm/amd/display/dc/dc.h           |  3 ++
 .../dc/dml2/dml21/dml21_translation_helper.c  |  2 +
 .../display/dc/dml2/dml21/inc/dml_top_types.h |  5 ++
 .../dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c | 43 +++++++++++++++++
 8 files changed, 126 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
index 8cfc5f435937..5b4e1e8a9ae2 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
@@ -141,6 +141,20 @@ static bool dcn401_is_ppclk_idle_dpm_enabled(struct 
clk_mgr_internal *clk_mgr, P
        return ppclk_idle_dpm_enabled;
 }
 
+static bool dcn401_is_df_throttle_opt_enabled(struct clk_mgr_internal *clk_mgr)
+{
+       bool is_df_throttle_opt_enabled = false;
+
+       if (ASICREV_IS_GC_12_0_1_A0(clk_mgr->base.ctx->asic_id.hw_internal_rev) 
&&
+                       clk_mgr->smu_ver >= 0x663500) {
+               is_df_throttle_opt_enabled = 
!clk_mgr->base.ctx->dc->debug.force_subvp_df_throttle;
+       }
+
+       is_df_throttle_opt_enabled &= clk_mgr->smu_present;
+
+       return is_df_throttle_opt_enabled;
+}
+
 /* Query SMU for all clock states for a particular clock */
 static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e 
clk, unsigned int *entry_0,
                unsigned int *num_levels)
@@ -869,6 +883,12 @@ static void dcn401_execute_block_sequence(struct clk_mgr 
*clk_mgr_base, unsigned
                                        
params->update_idle_hardmin_params.uclk_mhz,
                                        
params->update_idle_hardmin_params.fclk_mhz);
                        break;
+               case CLK_MGR401_UPDATE_SUBVP_HARDMINS:
+                       dcn401_smu_set_subvp_uclk_fclk_hardmin(
+                                       clk_mgr_internal,
+                                       
params->update_idle_hardmin_params.uclk_mhz,
+                                       
params->update_idle_hardmin_params.fclk_mhz);
+                       break;
                case CLK_MGR401_UPDATE_DEEP_SLEEP_DCFCLK:
                        dcn401_smu_set_min_deep_sleep_dcef_clk(
                                        clk_mgr_internal,
@@ -945,15 +965,21 @@ static unsigned int 
dcn401_build_update_bandwidth_clocks_sequence(
        bool update_active_uclk = false;
        bool update_idle_fclk = false;
        bool update_idle_uclk = false;
+       bool update_subvp_prefetch_dramclk = false;
+       bool update_subvp_prefetch_fclk = false;
        bool is_idle_dpm_enabled = 
dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK) &&
                        dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, 
PPCLK_FCLK) &&
                        dcn401_is_ppclk_idle_dpm_enabled(clk_mgr_internal, 
PPCLK_UCLK) &&
                        dcn401_is_ppclk_idle_dpm_enabled(clk_mgr_internal, 
PPCLK_FCLK);
+       bool is_df_throttle_opt_enabled = is_idle_dpm_enabled &&
+               dcn401_is_df_throttle_opt_enabled(clk_mgr_internal);
        int total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, 
context);
        int active_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz);
        int active_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.fclk_khz);
        int idle_uclk_mhz = 
khz_to_mhz_ceil(clk_mgr_base->clks.idle_dramclk_khz);
        int idle_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_fclk_khz);
+       int subvp_prefetch_dramclk_mhz = 
khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_dramclk_khz);
+       int subvp_prefetch_fclk_mhz = 
khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_fclk_khz);
 
        unsigned int num_steps = 0;
 
@@ -1109,6 +1135,12 @@ static unsigned int 
dcn401_build_update_bandwidth_clocks_sequence(
                }
        }
 
+       if (should_set_clock(safe_to_lower, 
new_clocks->subvp_prefetch_dramclk_khz, 
clk_mgr_base->clks.subvp_prefetch_dramclk_khz)) {
+               clk_mgr_base->clks.subvp_prefetch_dramclk_khz = 
new_clocks->subvp_prefetch_dramclk_khz;
+               update_subvp_prefetch_dramclk = true;
+               subvp_prefetch_dramclk_mhz = 
khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_dramclk_khz);
+       }
+
        /* FCLK */
        /* Always update saved value, even if new value not set due to P-State 
switching unsupported */
        if (should_set_clock(safe_to_lower, new_clocks->fclk_khz, 
clk_mgr_base->clks.fclk_khz)) {
@@ -1129,6 +1161,12 @@ static unsigned int 
dcn401_build_update_bandwidth_clocks_sequence(
                }
        }
 
+       if (should_set_clock(safe_to_lower, 
new_clocks->subvp_prefetch_fclk_khz, 
clk_mgr_base->clks.subvp_prefetch_fclk_khz)) {
+               clk_mgr_base->clks.subvp_prefetch_fclk_khz = 
new_clocks->subvp_prefetch_fclk_khz;
+               update_subvp_prefetch_fclk = true;
+               subvp_prefetch_fclk_mhz = 
khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_fclk_khz);
+       }
+
        /* When idle DPM is enabled, need to send active and idle hardmins 
separately */
        /* CLK_MGR401_UPDATE_ACTIVE_HARDMINS */
        if ((update_active_uclk || update_active_fclk) && is_idle_dpm_enabled) {
@@ -1146,6 +1184,14 @@ static unsigned int 
dcn401_build_update_bandwidth_clocks_sequence(
                num_steps++;
        }
 
+       /* CLK_MGR401_UPDATE_SUBVP_HARDMINS */
+       if ((update_subvp_prefetch_dramclk || update_subvp_prefetch_fclk) && 
is_df_throttle_opt_enabled) {
+               
block_sequence[num_steps].params.update_idle_hardmin_params.uclk_mhz = 
subvp_prefetch_dramclk_mhz;
+               
block_sequence[num_steps].params.update_idle_hardmin_params.fclk_mhz = 
subvp_prefetch_fclk_mhz;
+               block_sequence[num_steps].func = 
CLK_MGR401_UPDATE_SUBVP_HARDMINS;
+               num_steps++;
+       }
+
        /* set UCLK to requested value if P-State switching is supported, or to 
re-enable P-State switching */
        if (update_active_uclk || update_idle_uclk) {
                if (!is_idle_dpm_enabled) {
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h
index 8b0461992b22..6c9ae5ca2c7e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h
@@ -90,6 +90,7 @@ enum dcn401_clk_mgr_block_sequence_func {
        CLK_MGR401_UPDATE_DTBCLK_DTO,
        CLK_MGR401_UPDATE_DENTIST,
        CLK_MGR401_UPDATE_PSR_WAIT_LOOP,
+       CLK_MGR401_UPDATE_SUBVP_HARDMINS,
 };
 
 struct dcn401_clk_mgr_block_sequence {
diff --git 
a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c
index 7700477d019b..b02a41179b41 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c
@@ -21,6 +21,11 @@
 
 #define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
 
+/* temporary define */
+#ifndef DALSMC_MSG_SubvpUclkFclk
+#define DALSMC_MSG_SubvpUclkFclk 0x1B
+#endif
+
 /*
  * Function to be used instead of REG_WAIT macro because the wait ends when
  * the register is NOT EQUAL to zero, and because the translation in msg_if.h
@@ -296,6 +301,24 @@ bool dcn401_smu_set_active_uclk_fclk_hardmin(struct 
clk_mgr_internal *clk_mgr,
        return success;
 }
 
+bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+               uint16_t uclk_freq_mhz,
+               uint16_t fclk_freq_mhz)
+{
+       uint32_t response = 0;
+       bool success;
+
+       /* 15:0 for uclk, 32:16 for fclk */
+       uint32_t param = (fclk_freq_mhz << 16) | uclk_freq_mhz;
+
+       smu_print("SMU Set active hardmin by freq: uclk_freq_mhz = %d MHz, 
fclk_freq_mhz = %d MHz\n", uclk_freq_mhz, fclk_freq_mhz);
+
+       success = dcn401_smu_send_msg_with_param(clk_mgr,
+                       DALSMC_MSG_SubvpUclkFclk, param, &response);
+
+       return success;
+}
+
 void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, 
uint32_t freq_mhz)
 {
        smu_print("SMU Set min deep sleep dcef clk: freq_mhz = %d MHz\n", 
freq_mhz);
diff --git 
a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h 
b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h
index 651fb8d62864..42cf7885a7cb 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h
@@ -23,6 +23,9 @@ bool dcn401_smu_set_idle_uclk_fclk_hardmin(struct 
clk_mgr_internal *clk_mgr,
 bool dcn401_smu_set_active_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
                uint16_t uclk_freq_mhz,
                uint16_t fclk_freq_mhz);
+bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+               uint16_t uclk_freq_mhz,
+               uint16_t fclk_freq_mhz);
 void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, 
uint32_t freq_mhz);
 void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t 
num_displays);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 961b8245523c..aef70bcde355 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -629,6 +629,8 @@ struct dc_clocks {
        int bw_dispclk_khz;
        int idle_dramclk_khz;
        int idle_fclk_khz;
+       int subvp_prefetch_dramclk_khz;
+       int subvp_prefetch_fclk_khz;
 };
 
 struct dc_bw_validation_profile {
@@ -1072,6 +1074,7 @@ struct dc_debug_options {
        bool skip_full_updated_if_possible;
        unsigned int enable_oled_edp_power_up_opt;
        bool enable_hblank_borrow;
+       bool force_subvp_df_throttle;
 };
 
 
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c 
b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
index 730bf35e6043..efb099905496 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
@@ -1077,6 +1077,8 @@ void dml21_copy_clocks_to_dc_state(struct dml2_context 
*in_ctx, struct dc_state
        context->bw_ctx.bw.dcn.clk.dtbclk_en = 
in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz > 0;
        context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = 
in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz;
        context->bw_ctx.bw.dcn.clk.socclk_khz = 
in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.socclk_khz;
+       context->bw_ctx.bw.dcn.clk.subvp_prefetch_dramclk_khz = 
in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz;
+       context->bw_ctx.bw.dcn.clk.subvp_prefetch_fclk_khz = 
in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz;
 }
 
 void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct 
dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, 
struct dml2_context *in_ctx)
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h 
b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h
index b2ae6232673b..d2d053f2354d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h
@@ -387,6 +387,11 @@ struct dml2_display_cfg_programming {
                                unsigned long fclk_khz;
                                unsigned long dcfclk_khz;
                        } svp_prefetch;
+                       struct {
+                               unsigned long uclk_khz;
+                               unsigned long fclk_khz;
+                               unsigned long dcfclk_khz;
+                       } svp_prefetch_no_throttle;
 
                        unsigned long deepsleep_dcfclk_khz;
                        unsigned long dispclk_khz;
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c 
b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
index 009026950b6c..8a78b9adfc62 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
@@ -96,6 +96,7 @@ static void calculate_svp_prefetch_minimums(struct 
dml2_dpmm_map_mode_to_soc_dpm
        double min_uclk_latency;
        const struct dml2_core_mode_support_result *mode_support_result = 
&in_out->display_cfg->mode_support_result;
 
+       /* assumes DF throttling is enabled */
        min_uclk_avg = 
dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps,
 &in_out->soc_bb->clk_table.dram_config);
        min_uclk_avg = (double)min_uclk_avg / 
((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel
 / 100);
 
@@ -125,6 +126,37 @@ static void calculate_svp_prefetch_minimums(struct 
dml2_dpmm_map_mode_to_soc_dpm
        in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = 
dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency);
        in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz = 
dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency);
        in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = 
dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : 
min_dcfclk_latency);
+
+       /* assumes DF throttling is disabled */
+       min_uclk_avg = 
dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps,
 &in_out->soc_bb->clk_table.dram_config);
+       min_uclk_avg = (double)min_uclk_avg / 
((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel
 / 100);
+
+       min_uclk_urgent = 
dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.urgent_bw_dram_kbps,
 &in_out->soc_bb->clk_table.dram_config);
+       min_uclk_urgent = (double)min_uclk_urgent / 
((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel
 / 100);
+
+       min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : 
min_uclk_avg;
+
+       min_fclk_avg = 
(double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / 
in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes;
+       min_fclk_avg = (double)min_fclk_avg / 
((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.fclk_derate_percent
 / 100);
+
+       min_fclk_urgent = 
(double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / 
in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes;
+       min_fclk_urgent = (double)min_fclk_urgent / 
((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent
 / 100);
+
+       min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : 
min_fclk_avg;
+
+       min_dcfclk_avg = 
(double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / 
in_out->soc_bb->return_bus_width_bytes;
+       min_dcfclk_avg = (double)min_dcfclk_avg / 
((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent
 / 100);
+
+       min_dcfclk_urgent = 
(double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / 
in_out->soc_bb->return_bus_width_bytes;
+       min_dcfclk_urgent = (double)min_dcfclk_urgent / 
((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent
 / 100);
+
+       min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent 
: min_dcfclk_avg;
+
+       get_minimum_clocks_for_latency(in_out, &min_uclk_latency, 
&min_fclk_latency, &min_dcfclk_latency);
+
+       in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz 
= dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency);
+       in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz 
= dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency);
+       
in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = 
dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : 
min_dcfclk_latency);
 }
 
 static void calculate_idle_minimums(struct 
dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out)
@@ -272,6 +304,17 @@ static bool map_soc_min_clocks_to_dpm_fine_grained(struct 
dml2_display_cfg_progr
        if (result)
                result = 
round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.uclk_khz, 
&state_table->uclk);
 
+       /* these clocks are optional, so they can fail to map, in which case 
map all to 0 */
+       if (result) {
+               if 
(!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz,
 &state_table->dcfclk) ||
+                               
!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz,
 &state_table->fclk) ||
+                               
!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz,
 &state_table->uclk)) {
+                       
display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = 0;
+                       
display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = 0;
+                       
display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = 0;
+               }
+       }
+
        return result;
 }
 
-- 
2.45.2

Reply via email to