Import the bicubic filter fragment shader into the compositor, and modify it to use shader uniform input instead of hardcoding video- and destination dimensions. This will help enable correct handling of compositor features such as blending, rotation and additional overlays. It will also save a bunch of duplicated vertex setup code once the original implementation is removed.
v2: Addressed review comments by Sinclair Yeh. Signed-off-by: Thomas Hellstrom <thellst...@vmware.com> Reviewed-by: Sinclair Yeh <s...@vmware.com> --- src/gallium/auxiliary/vl/vl_compositor.c | 355 ++++++++++++++++++++++++++++++- src/gallium/auxiliary/vl/vl_compositor.h | 24 +++ 2 files changed, 378 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 693d685..c9e1613 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -1,6 +1,8 @@ /************************************************************************** * * Copyright 2009 Younes Manton. + * Copyright 2016 Nayan Deshmukh. + * Copyright 2016 VMWare Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -43,6 +45,8 @@ #define MIN_DIRTY (0) #define MAX_DIRTY (1 << 15) +/* Constant buffer index for the bicubic scaler */ +#define VL_BICUBIC_CB 1 enum VS_OUTPUT { @@ -424,9 +428,215 @@ create_frag_shader_rgba(struct vl_compositor *c) return ureg_create_shader_and_destroy(shader, c->pipe); } +static void +create_frag_shader_cubic_interpolater(struct ureg_program *shader, struct ureg_src tex_a, + struct ureg_src tex_b, struct ureg_src tex_c, + struct ureg_src tex_d, struct ureg_src t, + struct ureg_dst o_fragment) +{ + struct ureg_dst temp[11]; + struct ureg_dst t_2; + unsigned i; + + for(i = 0; i < 11; ++i) + temp[i] = ureg_DECL_temporary(shader); + t_2 = ureg_DECL_temporary(shader); + + /* + * |temp[0]| | 0 2 0 0 | |tex_a| + * |temp[1]| = | -1 0 1 0 |* |tex_b| + * |temp[2]| | 2 -5 4 -1 | |tex_c| + * |temp[3]| | -1 3 -3 1 | |tex_d| + */ + ureg_MUL(shader, temp[0], tex_b, ureg_imm1f(shader, 2.0f)); + + ureg_MUL(shader, temp[1], tex_a, ureg_imm1f(shader, -1.0f)); + ureg_MAD(shader, temp[1], tex_c, ureg_imm1f(shader, 1.0f), + ureg_src(temp[1])); + + ureg_MUL(shader, temp[2], tex_a, ureg_imm1f(shader, 2.0f)); + ureg_MAD(shader, temp[2], tex_b, ureg_imm1f(shader, -5.0f), + ureg_src(temp[2])); + ureg_MAD(shader, temp[2], tex_c, ureg_imm1f(shader, 4.0f), + ureg_src(temp[2])); + ureg_MAD(shader, temp[2], tex_d, ureg_imm1f(shader, -1.0f), + ureg_src(temp[2])); + + ureg_MUL(shader, temp[3], tex_a, ureg_imm1f(shader, -1.0f)); + ureg_MAD(shader, temp[3], tex_b, ureg_imm1f(shader, 3.0f), + ureg_src(temp[3])); + ureg_MAD(shader, temp[3], tex_c, ureg_imm1f(shader, -3.0f), + ureg_src(temp[3])); + ureg_MAD(shader, temp[3], tex_d, ureg_imm1f(shader, 1.0f), + ureg_src(temp[3])); + + /* + * t_2 = t*t + * o_fragment = 0.5*|1 t t^2 t^3|*|temp[0]| + * |temp[1]| + * |temp[2]| + * |temp[3]| + */ + + ureg_MUL(shader, t_2, t, t); + ureg_MUL(shader, temp[4], ureg_src(t_2), t); + + ureg_MUL(shader, temp[4], ureg_src(temp[4]), + ureg_src(temp[3])); + ureg_MUL(shader, temp[5], ureg_src(t_2), + ureg_src(temp[2])); + ureg_MUL(shader, temp[6], t, + ureg_src(temp[1])); + ureg_MUL(shader, temp[7], ureg_imm1f(shader, 1.0f), + ureg_src(temp[0])); + ureg_ADD(shader, temp[8], ureg_src(temp[4]), + ureg_src(temp[5])); + ureg_ADD(shader, temp[9], ureg_src(temp[6]), + ureg_src(temp[7])); + + ureg_ADD(shader, temp[10], ureg_src(temp[8]), + ureg_src(temp[9])); + ureg_MUL(shader, o_fragment, ureg_src(temp[10]), + ureg_imm1f(shader, 0.5f)); + + + for(i = 0; i < 11; ++i) + ureg_release_temporary(shader, temp[i]); + ureg_release_temporary(shader, t_2); +} + +/** + * \brief Create the bicubic interpolation fragment shader + * + * \param c[in,out] The compositor. + * \return A pointer to the shader, or NULL if creation failed. + */ +static void * +create_frag_shader_bicubic_rgba(struct vl_compositor *c) +{ + struct pipe_screen *screen = c->pipe->screen; + struct ureg_program *shader; + struct ureg_src i_vtex, vtex; + struct ureg_src sampler; + struct ureg_src half_pixel; + struct ureg_src video_size; + struct ureg_dst t_array[23]; + struct ureg_dst o_fragment; + struct ureg_dst t; + unsigned i; + + if (screen->get_shader_param( + screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_TEMPS) < 23) { + + return NULL; + } + + shader = ureg_create(PIPE_SHADER_FRAGMENT); + if (!shader) { + return NULL; + } + + i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, + TGSI_INTERPOLATE_LINEAR); + sampler = ureg_DECL_sampler(shader, 0); + ureg_DECL_sampler_view(shader, 0, TGSI_TEXTURE_2D, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT); + + for (i = 0; i < 23; ++i) + t_array[i] = ureg_DECL_temporary(shader); + t = ureg_DECL_temporary(shader); + + ureg_DECL_constant2D(shader, 0, VL_BICUBIC_FLOATS / 4 - 1, 1); + half_pixel = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0), + VL_BICUBIC_CB); + video_size = ureg_swizzle(half_pixel, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, + TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W); + half_pixel = ureg_swizzle(half_pixel, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y); + + o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + + /* + * temp = (i_vtex - (0.5/dst_size)) * i_size) + * t = frac(temp) + * vtex = floor(i_vtex)/i_size + * (FIXME: The code actually computes + * vtex = floor(temp)/i_size + 0.5/dst_size + * Need to figure out why.) + */ + ureg_ADD(shader, ureg_writemask(t_array[21], TGSI_WRITEMASK_XY), + i_vtex, ureg_negate(half_pixel)); + ureg_MUL(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY), + ureg_src(t_array[21]), video_size); + ureg_FRC(shader, ureg_writemask(t, TGSI_WRITEMASK_XY), + ureg_src(t_array[22])); + + ureg_FLR(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY), + ureg_src(t_array[22])); + ureg_DIV(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY), + ureg_src(t_array[22]), video_size); + ureg_ADD(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY), + ureg_src(t_array[22]), half_pixel); + + /* + * t_array[0..*] = vtex + offset[0..*] + * t_array[0..*] = tex(t_array[0..*], sampler) + * t_array[16+i] = cubic_interpolate(t_array[4*i..4*i+3], t_x) + * o_fragment = cubic_interpolate(t_array[16..19], t_y) + */ + vtex = ureg_src(t_array[22]); + for (i = 0; i < VL_BICUBIC_OFFSETS; i += 2) { + struct ureg_src offset = + ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 1 + i/2), 1); + + ureg_ADD(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_XY), + vtex, ureg_swizzle(offset, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y)); + ureg_MOV(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_ZW), + ureg_imm1f(shader, 0.0f)); + ureg_ADD(shader, ureg_writemask(t_array[i + 1], TGSI_WRITEMASK_XY), + vtex, ureg_swizzle(offset, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, + TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W)); + ureg_MOV(shader, ureg_writemask(t_array[i + 1], TGSI_WRITEMASK_ZW), + ureg_imm1f(shader, 0.0f)); + } + + for (i = 0; i < VL_BICUBIC_OFFSETS; ++i) { + ureg_TEX(shader, t_array[i], TGSI_TEXTURE_2D, ureg_src(t_array[i]), sampler); + } + + for(i = 0; i < 4; ++i) + create_frag_shader_cubic_interpolater(shader, ureg_src(t_array[4*i]), + ureg_src(t_array[4*i+1]), + ureg_src(t_array[4*i+2]), + ureg_src(t_array[4*i+3]), + ureg_scalar(ureg_src(t), TGSI_SWIZZLE_X), + t_array[16+i]); + + create_frag_shader_cubic_interpolater(shader, ureg_src(t_array[16]), + ureg_src(t_array[17]), + ureg_src(t_array[18]), + ureg_src(t_array[19]), + ureg_scalar(ureg_src(t), TGSI_SWIZZLE_Y), + o_fragment); + + for(i = 0; i < 23; ++i) + ureg_release_temporary(shader, t_array[i]); + ureg_release_temporary(shader, t); + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, c->pipe); +} + + static bool init_shaders(struct vl_compositor *c) { + struct vertex2f *offsets; assert(c); c->vs = create_vert_shader(c); @@ -472,6 +682,33 @@ init_shaders(struct vl_compositor *c) return false; } + c->bicubic.fs_rgba = create_frag_shader_bicubic_rgba(c); + if (!c->bicubic.fs_rgba) { + debug_printf("Unable to create bicubic fragment shader.\n"); + return false; + } + + offsets = c->bicubic.offsets; + offsets[0].x = -1.0f; offsets[0].y = -1.0f; + offsets[1].x = 0.0f; offsets[1].y = -1.0f; + offsets[2].x = 1.0f; offsets[2].y = -1.0f; + offsets[3].x = 2.0f; offsets[3].y = -1.0f; + + offsets[4].x = -1.0f; offsets[4].y = 0.0f; + offsets[5].x = 0.0f; offsets[5].y = 0.0f; + offsets[6].x = 1.0f; offsets[6].y = 0.0f; + offsets[7].x = 2.0f; offsets[7].y = 0.0f; + + offsets[8].x = -1.0f; offsets[8].y = 1.0f; + offsets[9].x = 0.0f; offsets[9].y = 1.0f; + offsets[10].x = 1.0f; offsets[10].y = 1.0f; + offsets[11].x = 2.0f; offsets[11].y = 1.0f; + + offsets[12].x = -1.0f; offsets[12].y = 2.0f; + offsets[13].x = 0.0f; offsets[13].y = 2.0f; + offsets[14].x = 1.0f; offsets[14].y = 2.0f; + offsets[15].x = 2.0f; offsets[15].y = 2.0f; + return true; } @@ -487,6 +724,7 @@ static void cleanup_shaders(struct vl_compositor *c) c->pipe->delete_fs_state(c->pipe, c->fs_palette.yuv); c->pipe->delete_fs_state(c->pipe, c->fs_palette.rgb); c->pipe->delete_fs_state(c->pipe, c->fs_rgba); + c->pipe->delete_fs_state(c->pipe, c->bicubic.fs_rgba); } static bool @@ -847,6 +1085,66 @@ gen_vertex_data(struct vl_compositor *c, struct vl_compositor_state *s, struct u u_upload_unmap(c->pipe->stream_uploader); } +/** + * \brief Update and bind constant- / uniform buffers before rendering a layer + * + * \parameter c[in,out] The compositor. + * \parameter s[in,out] The compositor state bucket. + * \parameter layer[in,out] The layer about to be rendered. + */ +static void +bind_constants(struct vl_compositor *c, struct vl_compositor_state *s, + struct vl_compositor_layer *layer) +{ + if (layer->fs == c->bicubic.fs_rgba) { + int i; + float constants[VL_BICUBIC_FLOATS]; + float width = layer->sampler_views[0]->texture->width0; + float height = layer->sampler_views[0]->texture->height0; + + memset(constants, 0, sizeof(constants)); + + constants[0] = 0.5f / layer->viewport.scale[0]; + constants[1] = 0.5f / layer->viewport.scale[1]; + constants[2] = width; + constants[3] = height; + for (i = 0; i < VL_BICUBIC_OFFSETS; ++i) { + constants[2 * (VL_BICUBIC_OTHER + i) + 0] = + c->bicubic.offsets[i].x / width; + constants[2 * (VL_BICUBIC_OTHER + i) + 1] = + c->bicubic.offsets[i].y / height; + } + + /* Update the constant buffer only if it changed. */ + if (memcmp(constants, s->bicubic.constants, sizeof(constants) != 0)) { + struct pipe_context *pipe = s->pipe; + struct pipe_box box; + struct pipe_transfer *transfer; + float *map; + + u_box_1d(0, sizeof(constants), &box); + map = pipe->transfer_map(pipe, s->bicubic.constbuf, 0, + PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE, + &box, &transfer); + if (map) { + memcpy(map, constants, sizeof(constants)); + memcpy(s->bicubic.constants, constants, sizeof(constants)); + pipe_transfer_unmap(pipe, transfer); + } else { + /* + * Upload fail will corrupt the output. + * Revert back to linear scaling + */ + layer->fs = c->fs_rgba; + layer->samplers[0] = c->sampler_linear; + } + } + pipe_set_constant_buffer(c->pipe, PIPE_SHADER_FRAGMENT, VL_BICUBIC_CB, + s->bicubic.constbuf); + } +} + static void draw_layers(struct vl_compositor *c, struct vl_compositor_state *s, struct u_rect *dirty) { @@ -860,7 +1158,7 @@ draw_layers(struct vl_compositor *c, struct vl_compositor_state *s, struct u_rec struct pipe_sampler_view **samplers = &layer->sampler_views[0]; unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3; void *blend = layer->blend ? layer->blend : i ? c->blend_add : c->blend_clear; - + bind_constants(c, s, layer); c->pipe->bind_blend_state(c->pipe, blend); c->pipe->set_viewport_states(c->pipe, 0, 1, &layer->viewport); c->pipe->bind_fs_state(c->pipe, layer->fs); @@ -1131,6 +1429,42 @@ vl_compositor_set_rgba_layer(struct vl_compositor_state *s, s->layers[layer].colors[i] = colors[i]; } +/** + * \brief Set an RGBA layer which uses bicubic scaling. + * + * \parameter s[in, out] The compositor state bucket. + * \parameter c]in, out] The compositor. + * \parameter layer[in] The layer number. + * \paremeter rgba[in] The sampler view of the RGBA texture to be sampled. + * \parameter src_rect[in] The source area rectangle, in source coordinates, + * of the area to be composited, or NULL if whole area. + * \parameter dst_rect[in] The destination rectangle. + * + */ +void +vl_compositor_set_bicubic_rgba_layer(struct vl_compositor_state *s, + struct vl_compositor *c, + unsigned layer, + struct pipe_sampler_view *rgba, + struct u_rect *src_rect, + struct u_rect *dst_rect) +{ + assert(s && c && rgba); + assert(layer < VL_COMPOSITOR_MAX_LAYERS); + + s->used_layers |= 1 << layer; + s->layers[layer].fs = c->bicubic.fs_rgba; + s->layers[layer].samplers[0] = c->sampler_nearest; + s->layers[layer].samplers[1] = NULL; + s->layers[layer].samplers[2] = NULL; + pipe_sampler_view_reference(&s->layers[layer].sampler_views[0], rgba); + pipe_sampler_view_reference(&s->layers[layer].sampler_views[1], NULL); + pipe_sampler_view_reference(&s->layers[layer].sampler_views[2], NULL); + calc_src_and_dst(&s->layers[layer], rgba->texture->width0, rgba->texture->height0, + src_rect ? *src_rect : default_rect(&s->layers[layer]), + dst_rect ? *dst_rect : default_rect(&s->layers[layer])); +} + void vl_compositor_set_layer_rotation(struct vl_compositor_state *s, unsigned layer, @@ -1267,6 +1601,8 @@ vl_compositor_init_state(struct vl_compositor_state *s, struct pipe_context *pip PIPE_USAGE_DEFAULT, sizeof(csc_matrix) + 2*sizeof(float) ); + if (!s->csc_matrix) + return false; if (!s->csc_matrix) return false; @@ -1277,7 +1613,23 @@ vl_compositor_init_state(struct vl_compositor_state *s, struct pipe_context *pip if (!vl_compositor_set_csc_matrix(s, (const vl_csc_matrix *)&csc_matrix, 1.0f, 0.0f)) return false; + s->bicubic.constbuf = pipe_buffer_create(pipe->screen, + PIPE_BIND_CONSTANT_BUFFER, + PIPE_USAGE_DEFAULT, + VL_BICUBIC_FLOATS * sizeof(float)); + if (!s->bicubic.constbuf) + goto out_bicubic; + + memset(s->bicubic.constants, 0, sizeof(s->bicubic.constants)); + pipe_buffer_write(s->pipe, s->bicubic.constbuf, 0, + sizeof(s->bicubic.constants), s->bicubic.constants); + return true; + +out_bicubic: + pipe_resource_reference(&s->csc_matrix, NULL); + + return false; } void @@ -1287,4 +1639,5 @@ vl_compositor_cleanup_state(struct vl_compositor_state *s) vl_compositor_clear_layers(s); pipe_resource_reference(&s->csc_matrix, NULL); + pipe_resource_reference(&s->bicubic.constbuf, NULL); } diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index 535abb7..65acad1 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -44,6 +44,9 @@ struct pipe_context; */ #define VL_COMPOSITOR_MAX_LAYERS 16 +#define VL_BICUBIC_OFFSETS 16 +#define VL_BICUBIC_OTHER 2 +#define VL_BICUBIC_FLOATS ((VL_BICUBIC_OTHER + VL_BICUBIC_OFFSETS) * 2) /* deinterlace allgorithem */ enum vl_compositor_deinterlace @@ -94,6 +97,12 @@ struct vl_compositor_state unsigned used_layers:VL_COMPOSITOR_MAX_LAYERS; struct vl_compositor_layer layers[VL_COMPOSITOR_MAX_LAYERS]; + struct { + /** \brief Shadow copy of bicubic scaler fragment shader constants */ + float constants[VL_BICUBIC_FLOATS]; + /** \brief The bicubic scaler constant buffer */ + struct pipe_resource *constbuf; + } bicubic; }; struct vl_compositor @@ -114,6 +123,12 @@ struct vl_compositor void *fs_video_buffer; void *fs_weave_rgb; void *fs_rgba; + struct { + /** \brief Pixel offsets for bicubic scaler */ + struct vertex2f offsets[VL_BICUBIC_OFFSETS]; + /** \brief Bicubic scaler fragment shader */ + void *fs_rgba; + } bicubic; struct { void *y; @@ -232,6 +247,15 @@ vl_compositor_set_rgba_layer(struct vl_compositor_state *state, struct u_rect *dst_rect, struct vertex4f *colors); +void +vl_compositor_set_bicubic_rgba_layer(struct vl_compositor_state *s, + struct vl_compositor *c, + unsigned layer, + struct pipe_sampler_view *rgba, + struct u_rect *src_rect, + struct u_rect *dst_rect); + + /** * set the layer rotation */ -- 2.4.11 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev