On Mon, Sep 08, 2025 at 10:35:33AM +0300, Luca Coelho wrote: > There are currently two booleans to define three tiling modes, which > is bad practice because it allows representing an invalid mode. In > order to simplify this, convert these two booleans into one > enumeration with three possible tiling modes. > > Additionally, introduce the concept of Y "family" of tiling, which > groups Y, Yf and 4 tiling, since they're effectively treated in the > same way in the watermark calculations. Describe the grouping in the > enumeration definition. > > Signed-off-by: Luca Coelho <luciano.coe...@intel.com> > --- > drivers/gpu/drm/i915/display/skl_watermark.c | 35 ++++++++++++++------ > 1 file changed, 24 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c > b/drivers/gpu/drm/i915/display/skl_watermark.c > index 0ce3420a919e..dd4bed02c3c0 100644 > --- a/drivers/gpu/drm/i915/display/skl_watermark.c > +++ b/drivers/gpu/drm/i915/display/skl_watermark.c > @@ -53,9 +53,16 @@ struct intel_dbuf_state { > #define intel_atomic_get_new_dbuf_state(state) \ > to_intel_dbuf_state(intel_atomic_get_new_global_obj_state(state, > &to_intel_display(state)->dbuf.obj)) > > +/* Tiling mode groups relevant to WM calculations */ > +enum wm_tiling_mode { > + WM_TILING_LINEAR, > + WM_TILING_X_TILED, /* mostly like linear */
The _TILED suffix seems redundant here. > + WM_TILING_Y_FAMILY, /* includes Y, Yf and 4 tiling */ I don't really like the "y family" invention. Doesn't really unconfuse anything for the reader without going back to have a look at the comment. I think it would be better to just spell out each tilimg mode. So I guess something like "WM_TILING_Y_Yf_4" > +}; > + > /* Stores plane specific WM parameters */ > struct skl_wm_params { > - bool x_tiled, y_tiled; > + enum wm_tiling_mode tiling; That'll now be 4 bytes. > bool rc_surface; > bool is_planar; and we'll have a two byte hole here. > u32 width; u8 cpp; And there's a 3 byte hole already here after the cpp. Should group the u8 with the bools to avoid so many holes. We could also shrink y_min_scanlines to a u8 and stick it into the last 1 byte hole. That'd shrink the whole struct by 4 bytes. dbuf_block_size would also fit in a u16, but doesn't look like we have any other holes where we could stick it. Hmm, actually 'width' could probably also be shrunk to be a u16. So could get rid of another 4 bytes here if we really wanted to. But I suppose all that repacking should be a separate patch... > @@ -618,7 +625,8 @@ static unsigned int skl_wm_latency(struct intel_display > *display, int level, > display->platform.cometlake) && skl_watermark_ipc_enabled(display)) > latency += 4; > > - if (skl_needs_memory_bw_wa(display) && wp && wp->x_tiled) > + if (skl_needs_memory_bw_wa(display) && > + wp && wp->tiling == WM_TILING_X_TILED) > latency += 15; > > return latency; > @@ -1674,9 +1682,14 @@ skl_compute_wm_params(const struct intel_crtc_state > *crtc_state, > return -EINVAL; > } > > - wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED; > - wp->y_tiled = modifier != I915_FORMAT_MOD_X_TILED && > - intel_fb_is_tiled_modifier(modifier); > + if (modifier == I915_FORMAT_MOD_X_TILED) > + wp->tiling = WM_TILING_X_TILED; > + else if (modifier != I915_FORMAT_MOD_X_TILED && The modifier check here is redundant with the if-else construct. > + intel_fb_is_tiled_modifier(modifier)) > + wp->tiling = WM_TILING_Y_FAMILY; > + else > + wp->tiling = WM_TILING_LINEAR; In fact we can avoid the entire intel_fb_is_tiled_modifier() call with something like: if (mod == LINEAR) tiling = LINEAR; else if (mod == X) tiling = X; else tiling = Y_Yf_4; The wm code always pops up fairly high in cpu profiles, so anything that makes it lighter is worth considering. > + > wp->rc_surface = intel_fb_is_ccs_modifier(modifier); > wp->is_planar = intel_format_info_is_yuv_semiplanar(format, modifier); > > @@ -1716,7 +1729,7 @@ skl_compute_wm_params(const struct intel_crtc_state > *crtc_state, > wp->y_min_scanlines *= 2; > > wp->plane_bytes_per_line = wp->width * wp->cpp; > - if (wp->y_tiled) { > + if (wp->tiling == WM_TILING_Y_FAMILY) { > interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line * > wp->y_min_scanlines, > wp->dbuf_block_size); > @@ -1732,7 +1745,7 @@ skl_compute_wm_params(const struct intel_crtc_state > *crtc_state, > interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, > wp->dbuf_block_size); > > - if (!wp->x_tiled || DISPLAY_VER(display) >= 10) > + if (wp->tiling != WM_TILING_X_TILED || DISPLAY_VER(display) >= > 10) > interm_pbpl++; > > wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); > @@ -1820,7 +1833,7 @@ static void skl_compute_plane_wm(const struct > intel_crtc_state *crtc_state, > latency, > wp->plane_blocks_per_line); > > - if (wp->y_tiled) { > + if (wp->tiling == WM_TILING_Y_FAMILY) { > selected_result = max_fixed16(method2, wp->y_tile_minimum); > } else { > if ((wp->cpp * crtc_state->hw.pipe_mode.crtc_htotal / > @@ -1870,7 +1883,7 @@ static void skl_compute_plane_wm(const struct > intel_crtc_state *crtc_state, > > /* Display WA #1126: skl,bxt,kbl */ > if (level >= 1 && level <= 7) { > - if (wp->y_tiled) { > + if (wp->tiling == WM_TILING_Y_FAMILY) { > blocks += > fixed16_to_u32_round_up(wp->y_tile_minimum); > lines += wp->y_min_scanlines; > } else { > @@ -1889,7 +1902,7 @@ static void skl_compute_plane_wm(const struct > intel_crtc_state *crtc_state, > } > > if (DISPLAY_VER(display) >= 11) { > - if (wp->y_tiled) { > + if (wp->tiling == WM_TILING_Y_FAMILY) { > int extra_lines; > > if (lines % wp->y_min_scanlines == 0) > @@ -2015,7 +2028,7 @@ static void skl_compute_transition_wm(struct > intel_display *display, > */ > wm0_blocks = wm0->blocks - 1; > > - if (wp->y_tiled) { > + if (wp->tiling == WM_TILING_Y_FAMILY) { > trans_y_tile_min = > (u16)mul_round_up_u32_fixed16(2, wp->y_tile_minimum); > blocks = max(wm0_blocks, trans_y_tile_min) + trans_offset; > -- > 2.50.1 -- Ville Syrjälä Intel