On Thu, Sep 11, 2025 at 08:15:44AM +0530, Ankit Nautiyal wrote: > Currently dsc/scaler prefill latencies are handled during watermark > calculations. With the optimized guardband, we need to compute the > latencies to find the minimum guardband that works for most cases. > Extract the helpers to compute these latencies, so that they can be used > while computing vrr guardband. > > While at it, put declarations in reverse xmas tree order for better > redability. > > v2: Initialize {h,v}scale_k to 0, and simplify the check in > intel_display_scaler_prefill_latency(). (Mitul) > v3: Move helpers from intel_display.c to intel_vrr.c as they are specific > to account for latencies to program vrr guardband. (Jani) > > Signed-off-by: Ankit Nautiyal <ankit.k.nauti...@intel.com> > Reviewed-by: Mitul Golani <mitulkumar.ajitkumar.gol...@intel.com> > --- > drivers/gpu/drm/i915/display/intel_vrr.c | 33 ++++++++++++++ > drivers/gpu/drm/i915/display/intel_vrr.h | 8 ++++ > drivers/gpu/drm/i915/display/skl_watermark.c | 47 +++++++++----------- > 3 files changed, 63 insertions(+), 25 deletions(-) > > diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c > b/drivers/gpu/drm/i915/display/intel_vrr.c > index 266cf5e1859d..fd690de5b45b 100644 > --- a/drivers/gpu/drm/i915/display/intel_vrr.c > +++ b/drivers/gpu/drm/i915/display/intel_vrr.c > @@ -772,3 +772,36 @@ void intel_vrr_get_config(struct intel_crtc_state > *crtc_state) > if (crtc_state->vrr.enable) > crtc_state->mode_flags |= I915_MODE_FLAG_VRR; > } > + > +int intel_vrr_guardband_scaler_latency(int num_scaler_users, u64 hscale, u64 > vscale, > + int chroma_downscaling_factor, > + int cdclk_prefill_adjustment, > + int linetime) > +{ > + int scaler_prefill_latency; > + > + scaler_prefill_latency = 4 * linetime + > + DIV_ROUND_UP_ULL((4 * linetime * hscale * > vscale * > + chroma_downscaling_factor), > 1000000); > + > + scaler_prefill_latency *= cdclk_prefill_adjustment; > + > + return scaler_prefill_latency; > +} > + > +int intel_vrr_guardband_dsc_latency(int num_scaler_users, u64 *hscale, u64 > *vscale, > + int chroma_downscaling_factor, > + int cdclk_prefill_adjustment, > + int linetime) > +{ > + int dsc_prefill_latency; > + > + dsc_prefill_latency = DIV_ROUND_UP(15 * linetime * > chroma_downscaling_factor, 10); > + > + for (int i = 0; i < num_scaler_users; i++) > + dsc_prefill_latency = DIV_ROUND_UP_ULL(dsc_prefill_latency * > hscale[i] * vscale[i], > + 1000000); > + dsc_prefill_latency *= cdclk_prefill_adjustment; > + > + return dsc_prefill_latency; > +} > diff --git a/drivers/gpu/drm/i915/display/intel_vrr.h > b/drivers/gpu/drm/i915/display/intel_vrr.h > index 38bf9996b883..950041647e47 100644 > --- a/drivers/gpu/drm/i915/display/intel_vrr.h > +++ b/drivers/gpu/drm/i915/display/intel_vrr.h > @@ -41,5 +41,13 @@ void intel_vrr_transcoder_enable(const struct > intel_crtc_state *crtc_state); > void intel_vrr_transcoder_disable(const struct intel_crtc_state *crtc_state); > void intel_vrr_set_fixed_rr_timings(const struct intel_crtc_state > *crtc_state); > bool intel_vrr_always_use_vrr_tg(struct intel_display *display); > +int intel_vrr_guardband_scaler_latency(int num_scaler_users, u64 hscale, u64 > vscale, > + int chroma_downscaling_factor, > + int cdclk_prefill_adjustment, > + int linetime); > +int intel_vrr_guardband_dsc_latency(int num_scaler_users, u64 *hscale, u64 > *vscale, > + int chroma_downscaling_factor, > + int cdclk_prefill_adjustment, > + int linetime); > > #endif /* __INTEL_VRR_H__ */ > diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c > b/drivers/gpu/drm/i915/display/skl_watermark.c > index 3d322c39ce21..1ff2b4d9a35e 100644 > --- a/drivers/gpu/drm/i915/display/skl_watermark.c > +++ b/drivers/gpu/drm/i915/display/skl_watermark.c > @@ -28,6 +28,7 @@ > #include "intel_flipq.h" > #include "intel_pcode.h" > #include "intel_plane.h" > +#include "intel_vrr.h" > #include "intel_wm.h" > #include "skl_universal_plane_regs.h" > #include "skl_scaler.h" > @@ -2179,11 +2180,12 @@ cdclk_prefill_adjustment(const struct > intel_crtc_state *crtc_state) > static int > dsc_prefill_latency(const struct intel_crtc_state *crtc_state, int linetime) > { > + const struct intel_crtc_scaler_state *scaler_state = > &crtc_state->scaler_state; > + int chroma_downscaling_factor = > skl_scaler_chroma_downscale_factor(crtc_state); > struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); > - const struct intel_crtc_scaler_state *scaler_state = > - &crtc_state->scaler_state; > int num_scaler_users = hweight32(scaler_state->scaler_users); > - int chroma_downscaling_factor = > skl_scaler_chroma_downscale_factor(crtc_state); > + u64 hscale_k[ARRAY_SIZE(scaler_state->scalers)]; > + u64 vscale_k[ARRAY_SIZE(scaler_state->scalers)]; > u32 dsc_prefill_latency = 0; > > if (!crtc_state->dsc.compression_enable || > @@ -2191,18 +2193,16 @@ dsc_prefill_latency(const struct intel_crtc_state > *crtc_state, int linetime) > num_scaler_users > crtc->num_scalers) > return dsc_prefill_latency; > > - dsc_prefill_latency = DIV_ROUND_UP(15 * linetime * > chroma_downscaling_factor, 10); > - > for (int i = 0; i < num_scaler_users; i++) { > - u64 hscale_k, vscale_k; > - > - hscale_k = max(1000, > mul_u32_u32(scaler_state->scalers[i].hscale, 1000) >> 16); > - vscale_k = max(1000, > mul_u32_u32(scaler_state->scalers[i].vscale, 1000) >> 16); > - dsc_prefill_latency = DIV_ROUND_UP_ULL(dsc_prefill_latency * > hscale_k * vscale_k, > - 1000000); > + hscale_k[i] = max(1000, > mul_u32_u32(scaler_state->scalers[i].hscale, 1000) >> 16); > + vscale_k[i] = max(1000, > mul_u32_u32(scaler_state->scalers[i].vscale, 1000) >> 16); > } > > - dsc_prefill_latency *= cdclk_prefill_adjustment(crtc_state); > + dsc_prefill_latency = > + intel_vrr_guardband_dsc_latency(num_scaler_users, hscale_k, > vscale_k, > + chroma_downscaling_factor, > + > cdclk_prefill_adjustment(crtc_state), > + linetime);
Calling vrr stuff for non-vrr stuff feels completely backwards. I suspect we should move these prefill latency calculations into the relevant scaler/dsc/etc files instead. > > return dsc_prefill_latency; > } > @@ -2210,28 +2210,25 @@ dsc_prefill_latency(const struct intel_crtc_state > *crtc_state, int linetime) > static int > scaler_prefill_latency(const struct intel_crtc_state *crtc_state, int > linetime) > { > - const struct intel_crtc_scaler_state *scaler_state = > - &crtc_state->scaler_state; > + const struct intel_crtc_scaler_state *scaler_state = > &crtc_state->scaler_state; > + int chroma_downscaling_factor = > skl_scaler_chroma_downscale_factor(crtc_state); > int num_scaler_users = hweight32(scaler_state->scaler_users); > + u64 hscale_k = 0, vscale_k = 0; > int scaler_prefill_latency = 0; > > if (!num_scaler_users) > return scaler_prefill_latency; > > - scaler_prefill_latency = 4 * linetime; > - > if (num_scaler_users > 1) { > - u64 hscale_k = max(1000, > mul_u32_u32(scaler_state->scalers[0].hscale, 1000) >> 16); > - u64 vscale_k = max(1000, > mul_u32_u32(scaler_state->scalers[0].vscale, 1000) >> 16); > - int chroma_downscaling_factor = > skl_scaler_chroma_downscale_factor(crtc_state); > - int latency; > - > - latency = DIV_ROUND_UP_ULL((4 * linetime * hscale_k * vscale_k * > - chroma_downscaling_factor), > 1000000); > - scaler_prefill_latency += latency; > + hscale_k = max(1000, > mul_u32_u32(scaler_state->scalers[0].hscale, 1000) >> 16); > + vscale_k = max(1000, > mul_u32_u32(scaler_state->scalers[0].vscale, 1000) >> 16); > } > > - scaler_prefill_latency *= cdclk_prefill_adjustment(crtc_state); > + scaler_prefill_latency = > + intel_vrr_guardband_scaler_latency(num_scaler_users, hscale_k, > vscale_k, > + chroma_downscaling_factor, > + > cdclk_prefill_adjustment(crtc_state), > + linetime); > > return scaler_prefill_latency; > } > -- > 2.45.2 -- Ville Syrjälä Intel