--- libavcodec/ffv1enc_vulkan.c | 268 ++++++++++++++++++++- libavcodec/vulkan/Makefile | 4 +- libavcodec/vulkan/ffv1_enc_rct_search.comp | 139 +++++++++++ libavcodec/vulkan/ffv1_enc_setup.comp | 16 ++ 4 files changed, 422 insertions(+), 5 deletions(-) create mode 100644 libavcodec/vulkan/ffv1_enc_rct_search.comp
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c index 3c1db9fd14..55b17f9784 100644 --- a/libavcodec/ffv1enc_vulkan.c +++ b/libavcodec/ffv1enc_vulkan.c @@ -36,6 +36,9 @@ #define LG_ALIGN_W 32 #define LG_ALIGN_H 32 +/* Level 4 and higher */ +#define RCT_MODEMAP_FMT AV_PIX_FMT_RGBA128 + typedef struct VulkanEncodeFFv1Context { FFV1Context ctx; @@ -48,6 +51,7 @@ typedef struct VulkanEncodeFFv1Context { FFVulkanShader setup; FFVulkanShader reset; + FFVulkanShader rct_search; FFVulkanShader rct; FFVulkanShader enc; @@ -73,6 +77,9 @@ typedef struct VulkanEncodeFFv1Context { /* Intermediate frame pool */ AVBufferRef *intermediate_frames_ref; + /* Frame pool for RCT mode images */ + AVBufferRef *rct_mode_frames_ref; + /* Representation mode */ enum FFVkShaderRepFormat rep_fmt; @@ -92,6 +99,7 @@ extern const char *ff_source_ffv1_common_comp; extern const char *ff_source_ffv1_reset_comp; extern const char *ff_source_ffv1_enc_common_comp; extern const char *ff_source_ffv1_enc_rct_comp; +extern const char *ff_source_ffv1_enc_rct_search_comp; extern const char *ff_source_ffv1_enc_vlc_comp; extern const char *ff_source_ffv1_enc_ac_comp; extern const char *ff_source_ffv1_enc_setup_comp; @@ -122,6 +130,7 @@ typedef struct FFv1VkParameters { int32_t sar[2]; uint32_t chroma_shift[2]; + int32_t modemap_basis[2]; uint32_t plane_state_size; uint32_t context_count; @@ -154,6 +163,7 @@ static void add_push_data(FFVulkanShader *shd) GLSLC(0, ); GLSLC(1, ivec2 sar; ); GLSLC(1, uvec2 chroma_shift; ); + GLSLC(1, ivec2 modemap_basis; ); GLSLC(0, ); GLSLC(1, uint plane_state_size; ); GLSLC(1, uint context_count; ); @@ -179,6 +189,83 @@ static void add_push_data(FFVulkanShader *shd) VK_SHADER_STAGE_COMPUTE_BIT); } +static int run_rct_search(AVCodecContext *avctx, FFVkExecContext *exec, + AVFrame *enc_in, VkImageView *enc_in_views, + AVFrame **rct_modemap_frame, VkImageView *rct_modemap_views, + VkImageMemoryBarrier2 *img_bar, int *nb_img_bar) +{ + int err; + VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFV1Context *f = &fv->ctx; + FFVulkanFunctions *vk = &fv->s.vkfn; + AVHWFramesContext *src_hwfc = (AVHWFramesContext *)enc_in->hw_frames_ctx->data; + FFv1VkRCTParameters pd; + + /* Create a temporaty frame */ + *rct_modemap_frame = av_frame_alloc(); + if (!(*rct_modemap_frame)) + return AVERROR(ENOMEM); + + RET(av_hwframe_get_buffer(fv->rct_mode_frames_ref, + *rct_modemap_frame, 0)); + RET(ff_vk_exec_add_dep_frame(&fv->s, exec, *rct_modemap_frame, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(&fv->s, exec, rct_modemap_views, + *rct_modemap_frame, + FF_VK_REP_UINT)); + ff_vk_frame_barrier(&fv->s, exec, *rct_modemap_frame, img_bar, nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + /* Update descriptors */ + ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct_search, + enc_in, enc_in_views, + 1, 0, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct_search, + *rct_modemap_frame, rct_modemap_views, + 1, 1, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = *nb_img_bar, + }); + + /* Run the shader */ + ff_vk_exec_bind_shader(&fv->s, exec, &fv->rct_search); + pd = (FFv1VkRCTParameters) { + .offset = 1 << f->bits_per_raw_sample, + .planar_rgb = ff_vk_mt_is_np_rgb(src_hwfc->sw_format) && + (ff_vk_count_images((AVVkFrame *)enc_in->data[0]) > 1), + .transparency = f->transparency, + }; + ff_vk_shader_update_push_const(&fv->s, exec, &fv->rct_search, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + vk->CmdDispatch(exec->buf, + (*rct_modemap_frame)->width, + (*rct_modemap_frame)->height, 1); + + ff_vk_frame_barrier(&fv->s, exec, *rct_modemap_frame, img_bar, nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + +fail: + return err; +} + static int run_rct(AVCodecContext *avctx, FFVkExecContext *exec, AVFrame *enc_in, VkImageView *enc_in_views, AVFrame **intermediate_frame, VkImageView *intermediate_views, @@ -285,6 +372,7 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt, FFv1VkParameters pd; AVFrame *intermediate_frame = NULL; + AVFrame *rct_modemap_frame = NULL; /* Temporary data */ size_t tmp_data_size; @@ -317,6 +405,7 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt, VkImageView in_views[AV_NUM_DATA_POINTERS]; VkImageView intermediate_views[AV_NUM_DATA_POINTERS]; + VkImageView rct_modemap_views[AV_NUM_DATA_POINTERS]; AVFrame *enc_in = (AVFrame *)pict; VkImageView *enc_in_views = in_views; @@ -475,6 +564,19 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt, }; } + if (fv->is_rgb && f->version >= 4) { + RET(run_rct_search(avctx, exec, + enc_in, enc_in_views, + &rct_modemap_frame, rct_modemap_views, + img_bar, &nb_img_bar)); + + ff_vk_shader_update_img_array(&fv->s, exec, &fv->setup, + rct_modemap_frame, rct_modemap_views, + 1, 2, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + } + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .pImageMemoryBarriers = img_bar, @@ -501,6 +603,8 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt, .sar[1] = pict->sample_aspect_ratio.den, .chroma_shift[0] = f->chroma_h_shift, .chroma_shift[1] = f->chroma_v_shift, + .modemap_basis[0] = fv->rct_search.lg_size[0], + .modemap_basis[1] = fv->rct_search.lg_size[1], .plane_state_size = plane_state_size, .context_count = context_count, .crcref = f->crcref, @@ -652,6 +756,7 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt, /* We need the encoded data immediately */ ff_vk_exec_wait(&fv->s, exec); av_frame_free(&intermediate_frame); + av_frame_free(&rct_modemap_frame); /* Invalidate slice/output data if needed */ if (!(results_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { @@ -741,6 +846,7 @@ fail: /* Frames added as a dep are always referenced, so we only need to * clean this up. */ av_frame_free(&intermediate_frame); + av_frame_free(&rct_modemap_frame); return 0; } @@ -752,6 +858,10 @@ static int init_indirect(AVCodecContext *avctx, enum AVPixelFormat sw_format) AVHWFramesContext *frames_ctx; AVVulkanFramesContext *vk_frames; + int subgroup_size = fv->s.props_11.subgroupSize; + int lg_rows = fv->s.props.properties.limits.maxComputeWorkGroupInvocations / + subgroup_size; + fv->intermediate_frames_ref = av_hwframe_ctx_alloc(fv->s.device_ref); if (!fv->intermediate_frames_ref) return AVERROR(ENOMEM); @@ -759,8 +869,8 @@ static int init_indirect(AVCodecContext *avctx, enum AVPixelFormat sw_format) frames_ctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data; frames_ctx->format = AV_PIX_FMT_VULKAN; frames_ctx->sw_format = sw_format; - frames_ctx->width = FFALIGN(fv->s.frames->width, 32); - frames_ctx->height = FFALIGN(fv->s.frames->height, 32); + frames_ctx->width = FFALIGN(fv->s.frames->width, FFMAX(subgroup_size, 32)); + frames_ctx->height = FFALIGN(fv->s.frames->height, FFMAX(lg_rows, 32)); vk_frames = frames_ctx->hwctx; vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; @@ -826,6 +936,39 @@ end: return fmt; } +static int init_modemap(AVCodecContext *avctx, enum AVPixelFormat sw_format, + int lg_size0, int lg_size1) +{ + int err; + VulkanEncodeFFv1Context *fv = avctx->priv_data; + AVHWFramesContext *frames_ctx; + AVVulkanFramesContext *vk_frames; + + fv->rct_mode_frames_ref = av_hwframe_ctx_alloc(fv->s.device_ref); + if (!fv->rct_mode_frames_ref) + return AVERROR(ENOMEM); + + frames_ctx = (AVHWFramesContext *)fv->rct_mode_frames_ref->data; + frames_ctx->format = AV_PIX_FMT_VULKAN; + frames_ctx->sw_format = sw_format; + frames_ctx->width = (FFALIGN(fv->s.frames->width, lg_size0)/lg_size0) + 20; + frames_ctx->height = (FFALIGN(fv->s.frames->height, lg_size1)/lg_size1) + 20; + + vk_frames = frames_ctx->hwctx; + vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; + vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT; + + err = av_hwframe_ctx_init(fv->rct_mode_frames_ref); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize modemap pool with format %s: %s\n", + av_get_pix_fmt_name(sw_format), av_err2str(err)); + av_buffer_unref(&fv->rct_mode_frames_ref); + return err; + } + + return 0; +} + static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd) { VulkanEncodeFFv1Context *fv = avctx->priv_data; @@ -912,8 +1055,18 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) .mem_quali = "readonly", .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, + { + .name = "modemap", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(RCT_MODEMAP_FMT, + FF_VK_REP_UINT), + .elems = av_pix_fmt_count_planes(RCT_MODEMAP_FMT), + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, }; - RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0)); + RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0)); add_push_data(shd); @@ -1013,6 +1166,105 @@ fail: return err; } +static int init_rct_search_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) +{ + int err; + VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFVulkanShader *shd = &fv->rct_search; + FFVulkanDescriptorSetBinding *desc_set; + int subgroup_size = fv->s.props_11.subgroupSize; + int lg_rows = fv->s.props.properties.limits.maxComputeWorkGroupInvocations / + subgroup_size; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + + RET(init_modemap(avctx, RCT_MODEMAP_FMT, subgroup_size, lg_rows)); + + RET(ff_vk_shader_init(&fv->s, shd, "ffv1_rct_search", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_null_initializer", + "GL_KHR_shader_subgroup_basic", + "GL_KHR_shader_subgroup_arithmetic", + "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 5, + subgroup_size, lg_rows, 1, + 0)); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0)); + + define_shared_code(avctx, shd); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "src", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format, + fv->rep_fmt), + .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format), + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "modemap", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(RCT_MODEMAP_FMT, + FF_VK_REP_UINT), + .elems = av_pix_fmt_count_planes(RCT_MODEMAP_FMT), + .mem_quali = "writeonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0)); + + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, int offset; ); + GLSLC(1, uint8_t planar_rgb; ); + GLSLC(1, uint8_t transparency; ); + GLSLC(1, uint8_t padding[2]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + + GLSLD(ff_source_ffv1_enc_rct_search_comp); + + RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) { int err; @@ -1506,6 +1758,14 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) /* Init RCT shader */ if (fv->is_rgb) { + if (f->version >= 4) { + err = init_rct_search_shader(avctx, spv); + if (err < 0) { + spv->uninit(&spv); + return err; + } + } + err = init_rct_shader(avctx, spv); if (err < 0) { spv->uninit(&spv); @@ -1548,9 +1808,11 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx) ff_vk_shader_free(&fv->s, &fv->enc); ff_vk_shader_free(&fv->s, &fv->rct); + ff_vk_shader_free(&fv->s, &fv->rct_search); ff_vk_shader_free(&fv->s, &fv->reset); ff_vk_shader_free(&fv->s, &fv->setup); + av_buffer_unref(&fv->rct_mode_frames_ref); av_buffer_unref(&fv->intermediate_frames_ref); av_buffer_pool_uninit(&fv->results_data_pool); diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index 351332ee44..6b6eedda4d 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -6,8 +6,8 @@ clean:: OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \ vulkan/rangecoder.o vulkan/ffv1_vlc.o \ vulkan/ffv1_common.o vulkan/ffv1_reset.o \ - vulkan/ffv1_enc_common.o \ - vulkan/ffv1_enc_rct.o vulkan/ffv1_enc_setup.o \ + vulkan/ffv1_enc_common.o vulkan/ffv1_enc_setup.o \ + vulkan/ffv1_enc_rct.o vulkan/ffv1_enc_rct_search.o \ vulkan/ffv1_enc_vlc.o vulkan/ffv1_enc_ac.o \ vulkan/ffv1_enc.o vulkan/ffv1_enc_rgb.o diff --git a/libavcodec/vulkan/ffv1_enc_rct_search.comp b/libavcodec/vulkan/ffv1_enc_rct_search.comp new file mode 100644 index 0000000000..ad251b8588 --- /dev/null +++ b/libavcodec/vulkan/ffv1_enc_rct_search.comp @@ -0,0 +1,139 @@ +/* + * FFv1 codec + * + * Copyright (c) 2024 Lynne <d...@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +ivec4 load_components(void) +{ + const ivec2 pos = ivec2(gl_GlobalInvocationID); + + if (planar_rgb == 0) + return ivec4(imageLoad(src[0], pos)); + + ivec4 pix; + for (int i = 0; i < (3 + transparency); i++) + pix[i] = int(imageLoad(src[i], pos)[0]); + + /* Swizzle out the difference */ + if (transparency > 0) + return pix.brga; + return pix.bgra; +} + +#define NUM_CHECKS 15 +const ivec2 rct_y_coeff[] = { + ivec2(0, 0), // 4G + + ivec2(0, 1), // 3G + B + ivec2(1, 0), // R + 3G + ivec2(1, 1), // R + 2G + B + + ivec2(0, 2), // 2G + 2B + ivec2(2, 0), // 2R + 2G + ivec2(2, 2), // 2R + 2B + + ivec2(0, 3), // 1G + 3B + ivec2(3, 0), // 3R + 1G + + ivec2(0, 4), // 4B + ivec2(4, 0), // 4R + + ivec2(1, 2), // R + G + 2B + ivec2(2, 1), // 2R + G + B + + ivec2(3, 1), // 3R + B + ivec2(1, 3), // R + 3B +}; + +shared ivec4 pix_buf[gl_WorkGroupSize.x + 1][gl_WorkGroupSize.y + 1] = { }; + +ivec4 transform_sample(ivec4 pix, ivec2 rct_coef) +{ + pix.b -= pix.g; + pix.r -= pix.g; + pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2; + pix.b += offset; + pix.r += offset; + return pix; +} + +uint get_dist(ivec4 cur) +{ + ivec4 LL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 1]; + ivec4 TL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 0]; + ivec4 TT = pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 0]; + + ivec4 pred = ivec4(predict(LL.r, ivec2(TL.r, TT.r)), + predict(LL.g, ivec2(TL.g, TT.g)), + predict(LL.b, ivec2(TL.b, TT.b)), + predict(LL.a, ivec2(TL.a, TT.a))); + + uvec4 c = abs(cur - pred); + return c.r + c.g + c.b + c.a; +} + +shared uint score_cols[gl_WorkGroupSize.y] = { }; + +void coeff_rating(void) +{ + ivec4 pix = load_components(); + uint min_sum = 0xFFFFFFFF; + int best_mode = 1; + + for (int i = 0; i < NUM_CHECKS; i++) { + ivec4 tx_pix = transform_sample(pix, rct_y_coeff[i]); + pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 1] = tx_pix; + + memoryBarrierShared(); + + uint dist = get_dist(tx_pix); + + /* Sum from all columns */ + uint col_sum = subgroupAdd(dist); + + if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) + score_cols[gl_LocalInvocationID.y] = col_sum; + + memoryBarrierShared(); + + /* Sum row-wise */ + uint row_sum = 0; + for (uint j = gl_LocalInvocationID.x; j < gl_WorkGroupSize.y; j += gl_WorkGroupSize.x) + row_sum += score_cols[j]; + + uint block_sum = subgroupAdd(row_sum); + if (block_sum < min_sum) { + min_sum = block_sum; + best_mode = i; + } + } + + if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) + imageStore(modemap[0], ivec2(gl_WorkGroupID), + uvec4(rct_y_coeff[best_mode].x, + rct_y_coeff[best_mode].y, + min_sum, + 0)); +} + +void main(void) +{ + coeff_rating(); +} diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp b/libavcodec/vulkan/ffv1_enc_setup.comp index b861e25f74..d9bc2b453f 100644 --- a/libavcodec/vulkan/ffv1_enc_setup.comp +++ b/libavcodec/vulkan/ffv1_enc_setup.comp @@ -53,6 +53,22 @@ void init_slice(out SliceContext sc, const uint slice_idx) sc.slice_rct_coef = ivec2(1, 1); sc.slice_coding_mode = int(force_pcm == 1); + if (version >= 4) { + ivec2 modemap_pos = sc.slice_pos / modemap_basis; + ivec2 modemap_end = (sc.slice_pos + sc.slice_dim) / modemap_basis; + + /* Pick the lowest one amongst all blocks within the image */ + uvec4 res = uvec4(1, 1, 0xFFFFFFFF, 0); + for (; modemap_pos.y < modemap_end.y; modemap_pos.y++) { + for (; modemap_pos.x < modemap_end.x; modemap_pos.x++) { + uvec4 tmp = imageLoad(modemap[0], modemap_pos); + if (tmp.z < res.z) + res = tmp; + } + } + sc.slice_rct_coef = ivec2(res.xy); + } + rac_init(sc.c, OFFBUF(u8buf, out_data, slice_idx * slice_size_max), slice_size_max); -- 2.45.2.753.g447d99e1c3b _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".