On 2022-07-08 01:26, Alex Deucher wrote:
> There are several things wrong here.  First, none of these
> numbers are FP, so there is no need to cast to double.  Next
> make sure to use proper 64 bit division helpers.
> 
> Fixes: 85f4bc0c333c ("drm/amd/display: Add SubVP required code")
> Signed-off-by: Alex Deucher <alexander.deuc...@amd.com>

Reviewed-by: Harry Wentland <harry.wentl...@amd.com>

Harry

> ---
>  drivers/gpu/drm/amd/display/dc/Makefile      | 26 --------
>  drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 67 ++++++++++----------
>  2 files changed, 34 insertions(+), 59 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/Makefile 
> b/drivers/gpu/drm/amd/display/dc/Makefile
> index dfe82bcdd17d..64f40b10c163 100644
> --- a/drivers/gpu/drm/amd/display/dc/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/Makefile
> @@ -22,31 +22,6 @@
>  #
>  # Makefile for Display Core (dc) component.
>  
> -ifdef CONFIG_X86
> -dmub_ccflags := -mhard-float -msse
> -endif
> -
> -ifdef CONFIG_PPC64
> -dmub_ccflags := -mhard-float -maltivec
> -endif
> -
> -ifdef CONFIG_CC_IS_GCC
> -ifeq ($(call cc-ifversion, -lt, 0701, y), y)
> -IS_OLD_GCC = 1
> -endif
> -endif
> -
> -ifdef CONFIG_X86
> -ifdef IS_OLD_GCC
> -# Stack alignment mismatch, proceed with caution.
> -# GCC < 7.1 cannot compile code using `double` and 
> -mpreferred-stack-boundary=3
> -# (8B stack alignment).
> -dmub_ccflags += -mpreferred-stack-boundary=4
> -else
> -dmub_ccflags += -msse2
> -endif
> -endif
> -
>  DC_LIBS = basics bios dml clk_mgr dce gpio irq link virtual
>  
>  ifdef CONFIG_DRM_AMD_DC_DCN
> @@ -99,7 +74,6 @@ AMD_DISPLAY_FILES += $(AMD_DISPLAY_CORE)
>  AMD_DISPLAY_FILES += $(AMD_DM_REG_UPDATE)
>  
>  DC_DMUB += dc_dmub_srv.o
> -CFLAGS_$(AMDDALPATH)/dc/dc_dmub_srv.o := $(dmub_ccflags)
>  DC_EDID += dc_edid_parser.o
>  AMD_DISPLAY_DMUB = $(addprefix $(AMDDALPATH)/dc/,$(DC_DMUB))
>  AMD_DISPLAY_EDID = $(addprefix $(AMDDALPATH)/dc/,$(DC_EDID))
> diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c 
> b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
> index 6b446ae9e91f..832f7a4deb03 100644
> --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
> +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
> @@ -416,27 +416,28 @@ static void populate_subvp_cmd_drr_info(struct dc *dc,
>       pipe_data->pipe_config.vblank_data.drr_info.use_ramping = false; // for 
> now don't use ramping
>       pipe_data->pipe_config.vblank_data.drr_info.drr_window_size_ms = 4; // 
> hardcode 4ms DRR window for now
>  
> -     drr_frame_us = drr_timing->v_total * drr_timing->h_total /
> -                     (double)(drr_timing->pix_clk_100hz * 100) * 1000000;
> +     drr_frame_us = div64_s64(drr_timing->v_total * drr_timing->h_total,
> +                              (int64_t)(drr_timing->pix_clk_100hz * 100) * 
> 1000000);
>       // P-State allow width and FW delays already included 
> phantom_timing->v_addressable
> -     mall_region_us = phantom_timing->v_addressable * 
> phantom_timing->h_total /
> -                     (double)(phantom_timing->pix_clk_100hz * 100) * 1000000;
> +     mall_region_us = div64_s64(phantom_timing->v_addressable * 
> phantom_timing->h_total,
> +                                (int64_t)(phantom_timing->pix_clk_100hz * 
> 100) * 1000000);
>       min_drr_supported_us = drr_frame_us + mall_region_us + 
> SUBVP_DRR_MARGIN_US;
> -     min_vtotal_supported = drr_timing->pix_clk_100hz * 100 * 
> ((double)min_drr_supported_us / 1000000) /
> -                     (double)drr_timing->h_total;
> -
> -     prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) 
> * phantom_timing->h_total /
> -                     (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 
> +
> -                     dc->caps.subvp_prefetch_end_to_mall_start_us;
> -     subvp_active_us = main_timing->v_addressable * main_timing->h_total /
> -                     (double)(main_timing->pix_clk_100hz * 100) * 1000000;
> -     drr_active_us = drr_timing->v_addressable * drr_timing->h_total /
> -                     (double)(drr_timing->pix_clk_100hz * 100) * 1000000;
> -     max_drr_vblank_us = (double)(subvp_active_us - prefetch_us - 
> drr_active_us) / 2 + drr_active_us;
> +     min_vtotal_supported = div64_s64(drr_timing->pix_clk_100hz * 100 *
> +                                      
> (div64_s64((int64_t)min_drr_supported_us, 1000000)),
> +                                      (int64_t)drr_timing->h_total);
> +
> +     prefetch_us = div64_s64((phantom_timing->v_total - 
> phantom_timing->v_front_porch) * phantom_timing->h_total,
> +                             (int64_t)(phantom_timing->pix_clk_100hz * 100) 
> * 1000000 +
> +                             dc->caps.subvp_prefetch_end_to_mall_start_us);
> +     subvp_active_us = div64_s64(main_timing->v_addressable * 
> main_timing->h_total,
> +                                 (int64_t)(main_timing->pix_clk_100hz * 100) 
> * 1000000);
> +     drr_active_us = div64_s64(drr_timing->v_addressable * 
> drr_timing->h_total,
> +                               (int64_t)(drr_timing->pix_clk_100hz * 100) * 
> 1000000);
> +     max_drr_vblank_us = div64_s64((int64_t)(subvp_active_us - prefetch_us - 
> drr_active_us), 2) + drr_active_us;
>       max_drr_mallregion_us = subvp_active_us - prefetch_us - mall_region_us;
>       max_drr_supported_us = max_drr_vblank_us > max_drr_mallregion_us ? 
> max_drr_vblank_us : max_drr_mallregion_us;
> -     max_vtotal_supported = drr_timing->pix_clk_100hz * 100 * 
> ((double)max_drr_supported_us / 1000000) /
> -                     (double)drr_timing->h_total;
> +     max_vtotal_supported = div64_s64(drr_timing->pix_clk_100hz * 100 * 
> (div64_s64((int64_t)max_drr_supported_us, 1000000)),
> +                                      (int64_t)drr_timing->h_total);
>  
>       pipe_data->pipe_config.vblank_data.drr_info.min_vtotal_supported = 
> min_vtotal_supported;
>       pipe_data->pipe_config.vblank_data.drr_info.max_vtotal_supported = 
> max_vtotal_supported;
> @@ -530,10 +531,10 @@ static void 
> update_subvp_prefetch_end_to_mall_start(struct dc *dc,
>       struct dc_crtc_timing *phantom_timing1 = 
> &subvp_pipes[1]->stream->mall_stream_config.paired_stream->timing;
>       struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 *pipe_data = NULL;
>  
> -     subvp0_prefetch_us = (phantom_timing0->v_total - 
> phantom_timing0->v_front_porch) * phantom_timing0->h_total /
> -                             (double)(phantom_timing0->pix_clk_100hz * 100) 
> * 1000000 + dc->caps.subvp_prefetch_end_to_mall_start_us;
> -     subvp1_prefetch_us = (phantom_timing1->v_total - 
> phantom_timing1->v_front_porch) * phantom_timing1->h_total /
> -                                     (double)(phantom_timing1->pix_clk_100hz 
> * 100) * 1000000 + dc->caps.subvp_prefetch_end_to_mall_start_us;
> +     subvp0_prefetch_us = div64_s64((phantom_timing0->v_total - 
> phantom_timing0->v_front_porch) * phantom_timing0->h_total,
> +                                    (int64_t)(phantom_timing0->pix_clk_100hz 
> * 100) * 1000000 + dc->caps.subvp_prefetch_end_to_mall_start_us);
> +     subvp1_prefetch_us = div64_s64((phantom_timing1->v_total - 
> phantom_timing1->v_front_porch) * phantom_timing1->h_total,
> +                                    (int64_t)(phantom_timing1->pix_clk_100hz 
> * 100) * 1000000 + dc->caps.subvp_prefetch_end_to_mall_start_us);
>  
>       // Whichever SubVP PIPE has the smaller prefetch (including the 
> prefetch end to mall start time)
>       // should increase it's prefetch time to match the other
> @@ -541,16 +542,16 @@ static void 
> update_subvp_prefetch_end_to_mall_start(struct dc *dc,
>               pipe_data = 
> &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[1];
>               prefetch_delta_us = subvp0_prefetch_us - subvp1_prefetch_us;
>               pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines =
> -                                     
> (((double)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us) 
> / 1000000) *
> -                                     (phantom_timing1->pix_clk_100hz * 100) 
> + phantom_timing1->h_total - 1) /
> -                                     (double)phantom_timing1->h_total;
> +                     
> div64_s64(((div64_s64((int64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us 
> + prefetch_delta_us), 1000000)) *
> +                                (phantom_timing1->pix_clk_100hz * 100) + 
> phantom_timing1->h_total - 1),
> +                               (int64_t)phantom_timing1->h_total);
>       } else if (subvp1_prefetch_us >  subvp0_prefetch_us) {
>               pipe_data = 
> &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[0];
>               prefetch_delta_us = subvp1_prefetch_us - subvp0_prefetch_us;
>               pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines =
> -                                     
> (((double)(dc->caps.subvp_prefetch_end_to_mall_start_us + prefetch_delta_us) 
> / 1000000) *
> -                                     (phantom_timing0->pix_clk_100hz * 100) 
> + phantom_timing0->h_total - 1) /
> -                                     (double)phantom_timing0->h_total;
> +                     
> div64_s64(((div64_s64((int64_t)(dc->caps.subvp_prefetch_end_to_mall_start_us 
> + prefetch_delta_us), 1000000)) *
> +                                (phantom_timing0->pix_clk_100hz * 100) + 
> phantom_timing0->h_total - 1),
> +                               (int64_t)phantom_timing0->h_total);
>       }
>  }
>  
> @@ -601,13 +602,13 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
>  
>       // Round up
>       pipe_data->pipe_config.subvp_data.prefetch_to_mall_start_lines =
> -                     (((double)dc->caps.subvp_prefetch_end_to_mall_start_us 
> / 1000000) *
> -                     (phantom_timing->pix_clk_100hz * 100) + 
> phantom_timing->h_total - 1) /
> -                     (double)phantom_timing->h_total;
> +             
> div64_s64(((div64_s64((int64_t)dc->caps.subvp_prefetch_end_to_mall_start_us, 
> 1000000)) *
> +                        (phantom_timing->pix_clk_100hz * 100) + 
> phantom_timing->h_total - 1),
> +                       (int64_t)phantom_timing->h_total);
>       pipe_data->pipe_config.subvp_data.processing_delay_lines =
> -                     (((double)dc->caps.subvp_fw_processing_delay_us / 
> 1000000) *
> -                     (phantom_timing->pix_clk_100hz * 100) + 
> phantom_timing->h_total - 1) /
> -                     (double)phantom_timing->h_total;
> +             
> div64_s64(((div64_s64((int64_t)dc->caps.subvp_fw_processing_delay_us, 
> 1000000)) *
> +                        (phantom_timing->pix_clk_100hz * 100) + 
> phantom_timing->h_total - 1),
> +                       (int64_t)phantom_timing->h_total);
>       // Find phantom pipe index based on phantom stream
>       for (j = 0; j < dc->res_pool->pipe_count; j++) {
>               struct pipe_ctx *phantom_pipe = &context->res_ctx.pipe_ctx[j];

Reply via email to