Le 19/08/24 - 16:56, Harry Wentland a écrit :
> We add two 3x4 matrices into the VKMS color pipeline. The reason
> we're adding matrices is so that we can test that application
> of a matrix and its inverse yields an output equal to the input
> image.
> 
> One complication with the matrix implementation has to do with
> the fact that the matrix entries are in signed-magnitude fixed
> point, whereas the drm_fixed.h implementation uses 2s-complement.
> The latter one is the one that we want for easy addition and
> subtraction, so we convert all entries to 2s-complement.

Is there a reason to use signed-magnitude and not 2s-complement here? I 
did not read the whole amd driver, but it seems that the matrix is always 
converted to fixed point notation (amdgpu_dm_fixpt_from_s3132 in 
amdgpu_dm_color.c). It may reduce the complexity here and in the amd 
driver too.

> 
> Signed-off-by: Harry Wentland <harry.wentl...@amd.com>
> ---
>  drivers/gpu/drm/vkms/vkms_colorop.c  | 32 +++++++++++++++++++++++++++-
>  drivers/gpu/drm/vkms/vkms_composer.c | 27 +++++++++++++++++++++++
>  2 files changed, 58 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/vkms/vkms_colorop.c 
> b/drivers/gpu/drm/vkms/vkms_colorop.c
> index f61dfde47156..adcb08153a09 100644
> --- a/drivers/gpu/drm/vkms/vkms_colorop.c
> +++ b/drivers/gpu/drm/vkms/vkms_colorop.c
> @@ -37,7 +37,37 @@ static int vkms_initialize_color_pipeline(struct drm_plane 
> *plane, struct drm_pr
>  
>       prev_op = op;
>  
> -     /* 2nd op: 1d curve */
> +     /* 2nd op: 3x4 matrix */
> +     op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
> +     if (!op) {
> +             DRM_ERROR("KMS: Failed to allocate colorop\n");
> +             return -ENOMEM;
> +     }

Same as before, don't we leak memory/properties here?

> +     ret = drm_colorop_ctm_3x4_init(dev, op, plane);
> +     if (ret)
> +             return ret;
> +
> +     drm_colorop_set_next_property(prev_op, op);
> +
> +     prev_op = op;
> +
> +     /* 3rd op: 3x4 matrix */
> +     op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
> +     if (!op) {
> +             DRM_ERROR("KMS: Failed to allocate colorop\n");
> +             return -ENOMEM;
> +     }
> +
> +     ret = drm_colorop_ctm_3x4_init(dev, op, plane);
> +     if (ret)
> +             return ret;
> +
> +     drm_colorop_set_next_property(prev_op, op);
> +
> +     prev_op = op;
> +
> +     /* 4th op: 1d curve */
>       op = kzalloc(sizeof(struct drm_colorop), GFP_KERNEL);
>       if (!op) {
>               DRM_ERROR("KMS: Failed to allocate colorop\n");
> diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
> b/drivers/gpu/drm/vkms/vkms_composer.c
> index 6e939d3a6d5c..bd1df06ced85 100644
> --- a/drivers/gpu/drm/vkms/vkms_composer.c
> +++ b/drivers/gpu/drm/vkms/vkms_composer.c
> @@ -164,6 +164,30 @@ static void apply_lut(const struct vkms_crtc_state 
> *crtc_state, struct line_buff
>       }
>  }
>  
> +static void apply_3x4_matrix(struct pixel_argb_s32 *pixel, const struct 
> drm_color_ctm_3x4 *matrix)
> +{
> +     s64 rf, gf, bf;
> +
> +     rf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[0]), 
> drm_int2fixp(pixel->r)) +
> +          drm_fixp_mul(drm_sm2fixp(matrix->matrix[1]), 
> drm_int2fixp(pixel->g)) +
> +          drm_fixp_mul(drm_sm2fixp(matrix->matrix[2]), 
> drm_int2fixp(pixel->b)) +
> +          drm_sm2fixp(matrix->matrix[3]);
> +
> +     gf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[4]), 
> drm_int2fixp(pixel->r)) +
> +          drm_fixp_mul(drm_sm2fixp(matrix->matrix[5]), 
> drm_int2fixp(pixel->g)) +
> +          drm_fixp_mul(drm_sm2fixp(matrix->matrix[6]), 
> drm_int2fixp(pixel->b)) +
> +          drm_sm2fixp(matrix->matrix[7]);
> +
> +     bf = drm_fixp_mul(drm_sm2fixp(matrix->matrix[8]), 
> drm_int2fixp(pixel->r)) +
> +          drm_fixp_mul(drm_sm2fixp(matrix->matrix[9]), 
> drm_int2fixp(pixel->g)) +
> +          drm_fixp_mul(drm_sm2fixp(matrix->matrix[10]), 
> drm_int2fixp(pixel->b)) +
> +          drm_sm2fixp(matrix->matrix[11]);
> +
> +     pixel->r = drm_fixp2int(rf);
> +     pixel->g = drm_fixp2int(gf);
> +     pixel->b = drm_fixp2int(bf);

There is no need to round here, like done in [1]?

I don't know if the performance improvment is huge, bug maybe you can 
precompute drm_int2fixp(pixel->r/g/b)?

[1]: https://lore.kernel.org/all/20240802-yuv-v9-12-08a706669...@bootlin.com/

> +}
> +
>  static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop 
> *colorop)
>  {
>       struct drm_colorop_state *colorop_state = colorop->state;
> @@ -184,6 +208,9 @@ static void apply_colorop(struct pixel_argb_s32 *pixel, 
> struct drm_colorop *colo
>                               DRM_DEBUG_DRIVER("unkown colorop 1D curve type 
> %d\n", colorop_state->curve_1d_type);
>                               break;
>               }
> +     } else if (colorop->type == DRM_COLOROP_CTM_3X4) {
> +             if (colorop_state->data)
> +                     apply_3x4_matrix(pixel, (struct drm_color_ctm_3x4 *) 
> colorop_state->data->data);
>       }
>  
>  }
> -- 
> 2.46.0
> 

Reply via email to