The branch, master has been updated
       via  d00f41f21347a93cbcf533dceccd7532bd925067 (commit)
      from  28461f2c4332e34cd50ed0cfd947b8ac1bc5fe08 (commit)


- Log -----------------------------------------------------------------
commit d00f41f21347a93cbcf533dceccd7532bd925067
Author:     averne <[email protected]>
AuthorDate: Sat Nov 8 19:57:37 2025 +0100
Commit:     Lynne <[email protected]>
CommitDate: Sat Nov 8 22:31:21 2025 +0000

    vulkan/prores: forward quantization parameter to the IDCT shader
    
    The qScale syntax element has a maximum value of 512, which would overflow 
the 16-bit store from the VLD shader in extreme cases.
    This fixes that edge case by forwarding the element in a storage buffer, 
and applying the inverse quantization fully in the IDCT shader.

diff --git a/libavcodec/vulkan/prores_idct.comp 
b/libavcodec/vulkan/prores_idct.comp
index 645cb02979..4b39b3d8ae 100644
--- a/libavcodec/vulkan/prores_idct.comp
+++ b/libavcodec/vulkan/prores_idct.comp
@@ -87,7 +87,7 @@ void main(void)
     uint chroma_shift = comp != 0 ? log2_chroma_w : 0;
     bool act = gid.x < mb_width << (4 - chroma_shift);
 
-    /* Coalesced load of DCT coeffs in shared memory, second part of inverse 
quantization */
+    /* Coalesced load of DCT coeffs in shared memory, inverse quantization */
     if (act) {
         /**
          * According to spec indexing an array in push constant memory with
@@ -95,9 +95,14 @@ void main(void)
          * so copy the whole matrix locally.
          */
         uint8_t[64] qmat = comp == 0 ? qmat_luma : qmat_chroma;
+
+        /* Table 15 */
+        uint8_t qidx = quant_idx[(gid.y >> 1) * mb_width + (gid.x >> 4)];
+        int qscale = qidx > 128 ? (qidx - 96) << 2 : qidx;
+
         [[unroll]] for (uint i = 0; i < 8; ++i) {
-            int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) | 
i))), 16);
-            blocks[block][i * 9 + idx] = float(v * int(qmat[(i << 3) + idx]));
+            int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) + 
i))), 16);
+            blocks[block][i * 9 + idx] = float(v * qscale * int(qmat[(i << 3) 
+ idx]));
         }
     }
 
@@ -117,7 +122,7 @@ void main(void)
     if (act) {
         [[unroll]] for (uint i = 0; i < 8; ++i) {
             float v = blocks[block][i * 9 + idx] * fact + off;
-            put_px(comp, ivec2(gid.x, (gid.y << 3) | i), clamp(int(v), 0, 
maxv));
+            put_px(comp, ivec2(gid.x, (gid.y << 3) + i), clamp(int(v), 0, 
maxv));
         }
     }
 }
diff --git a/libavcodec/vulkan/prores_vld.comp 
b/libavcodec/vulkan/prores_vld.comp
index 00e78e08ff..298a5baf4c 100644
--- a/libavcodec/vulkan/prores_vld.comp
+++ b/libavcodec/vulkan/prores_vld.comp
@@ -22,9 +22,9 @@
 void put_px(uint tex_idx, ivec2 pos, uint v)
 {
 #ifndef INTERLACED
-    imageStore(dst[tex_idx], pos, uvec4(v));
+    imageStore(dst[tex_idx], pos, uvec4(uint16_t(v)));
 #else
-    imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), 
uvec4(v));
+    imageStore(dst[tex_idx], ivec2(pos.x, (pos.y << 1) + bottom_field), 
uvec4(uint16_t(v)));
 #endif
 }
 
@@ -57,7 +57,7 @@ uint decode_codeword(inout GetBitContext gb, int codebook)
     }
 }
 
-void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
+void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count)
 {
     uvec3 gid = gl_GlobalInvocationID;
     uint is_luma = uint(gid.z == 0);
@@ -70,7 +70,7 @@ void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint 
mb_count, uint qscale)
     {
         /* First coeff */
         uint c = to_signed(decode_codeword(gb, 0x650));
-        put_px(gid.z, base_pos, c * qscale & 0xffff);
+        put_px(gid.z, base_pos, c);
 
         /**
          * Table 9, encoded as (last_rice_q << 0) | (krice or kexp << 4) | 
((kexp or kexp + 1) << 8)
@@ -89,7 +89,7 @@ void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint 
mb_count, uint qscale)
             int s = int(prev_dc_diff) >> 31;
             c += prev_dc_diff = (to_signed(cw) ^ s) - s;
 
-            put_px(gid.z, base_pos + pos_to_block(i, is_luma), c * qscale & 
0xffff);
+            put_px(gid.z, base_pos + pos_to_block(i, is_luma), c);
         }
     }
 
@@ -152,7 +152,7 @@ void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint 
mb_count, uint qscale)
             ivec2 bpos = ivec2(scan & 0xf, scan >> 4);
 
             uint c = ((level + 1) ^ -s) + s;
-            put_px(gid.z, base_pos + spos + bpos, c * qscale & 0xffff);
+            put_px(gid.z, base_pos + spos + bpos, c);
         }
     }
 }
@@ -218,7 +218,7 @@ void decode_alpha(in GetBitContext gb, uvec2 mb_pos, uint 
mb_count)
          */
         uint val = (alpha << alpha_rescale_lshift) | (alpha >> 
alpha_rescale_rshift);
         for (uint end = pos + run; pos < end; ++pos)
-            put_px(3, base_pos + ivec2(pos & block_mask, pos >> block_shift), 
val & 0xffff);
+            put_px(3, base_pos + ivec2(pos & block_mask, pos >> block_shift), 
val);
     }
 }
 
@@ -235,13 +235,8 @@ void main(void)
     u8buf bs = u8buf(slice_data + slice_off);
 
     /* Decode slice header */
-    uint hdr_size, y_size, u_size, v_size, a_size;
-    hdr_size = bs[0].v >> 3;
-
-    /* Table 15 */
-    uint qidx   = clamp(bs[1].v, 1, 224),
-         qscale = qidx > 128 ? (qidx - 96) << 2 : qidx;
-
+    uint hdr_size, qidx, y_size, u_size, v_size, a_size;
+    hdr_size = bs[0].v >> 3, qidx = clamp(bs[1].v, 1, 224);
     y_size = (uint(bs[2].v) << 8) | bs[3].v;
     u_size = (uint(bs[4].v) << 8) | bs[5].v;
 
@@ -308,10 +303,17 @@ void main(void)
     uint mb_count = 1 << log2_width;
 
     if (gid.z < 3) {
-        /* Color entropy decoding, inverse scanning, first part of inverse 
quantization */
-        decode_comp(gb, uvec2(mb_x, mb_y), mb_count, qscale);
+        /* Color entropy decoding, inverse scanning */
+        decode_comp(gb, uvec2(mb_x, mb_y), mb_count);
     } else {
         /* Alpha entropy decoding */
         decode_alpha(gb, uvec2(mb_x, mb_y), mb_count);
     }
+
+    /* Forward the quantization index to the IDCT shader */
+    if (gid.z == 0) {
+        uint base = mb_y * mb_width + mb_x;
+        for (uint i = 0; i < mb_count; ++i)
+            quant_idx[base + i] = uint8_t(qidx);
+    }
 }
diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c
index 2602be112b..8849e337c3 100644
--- a/libavcodec/vulkan_prores.c
+++ b/libavcodec/vulkan_prores.c
@@ -37,11 +37,13 @@ const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc = {
 typedef struct ProresVulkanDecodePicture {
     FFVulkanDecodePicture vp;
 
-    AVBufferRef *slice_offset_buf;
-    uint32_t slice_num;
+    AVBufferRef *metadata_buf;
 
     uint32_t bitstream_start;
     uint32_t bitstream_size;
+    uint32_t slice_num;
+
+    uint32_t slice_offsets_sz, mb_params_sz;
 } ProresVulkanDecodePicture;
 
 typedef struct ProresVulkanDecodeContext {
@@ -51,7 +53,7 @@ typedef struct ProresVulkanDecodeContext {
         FFVulkanShader idct;
     } shaders[2]; /* Progressive/interlaced */
 
-    AVBufferPool *slice_offset_pool;
+    AVBufferPool *metadata_pool;
 } ProresVulkanDecodeContext;
 
 typedef struct ProresVkParameters {
@@ -88,6 +90,9 @@ static int vk_prores_start_frame(AVCodecContext          
*avctx,
 
     int err;
 
+    pp->slice_offsets_sz = (pr->slice_count + 1) * sizeof(uint32_t);
+    pp->mb_params_sz     = pr->mb_width * pr->mb_height * sizeof(uint8_t);
+
     /* Host map the input slices data if supported */
     if (!vp->slices_buf && ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
         RET(ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data,
@@ -96,11 +101,10 @@ static int vk_prores_start_frame(AVCodecContext          
*avctx,
                                   VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT));
 
     /* Allocate slice offsets buffer */
-    RET(ff_vk_get_pooled_buffer(&ctx->s, &pv->slice_offset_pool,
-                                &pp->slice_offset_buf,
-                                VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
-                                VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
-                                NULL, (pr->slice_count + 1) * sizeof(uint32_t),
+    RET(ff_vk_get_pooled_buffer(&ctx->s, &pv->metadata_pool,
+                                &pp->metadata_buf,
+                                VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+                                NULL, pp->slice_offsets_sz + pp->mb_params_sz,
                                 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
                                 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
 
@@ -123,7 +127,7 @@ static int vk_prores_decode_slice(AVCodecContext *avctx,
     ProresVulkanDecodePicture *pp = pr->hwaccel_picture_private;
     FFVulkanDecodePicture     *vp = &pp->vp;
 
-    FFVkBuffer *slice_offset = (FFVkBuffer *)pp->slice_offset_buf->data;
+    FFVkBuffer *slice_offset = (FFVkBuffer *)pp->metadata_buf->data;
     FFVkBuffer *slices_buf   = vp->slices_buf ? (FFVkBuffer 
*)vp->slices_buf->data : NULL;
 
     /* Skip picture header */
@@ -158,7 +162,7 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
     FFVulkanDecodePicture     *vp = &pp->vp;
 
     ProresVkParameters pd;
-    FFVkBuffer *slice_data, *slice_offsets;
+    FFVkBuffer *slice_data, *metadata;
     struct ProresVulkanShaderVariants *shaders;
     VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
     VkBufferMemoryBarrier2 buf_bar[2];
@@ -172,8 +176,8 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
     if (!pix_desc)
         return AVERROR(EINVAL);
 
-    slice_data    = (FFVkBuffer *)vp->slices_buf->data;
-    slice_offsets = (FFVkBuffer *)pp->slice_offset_buf->data;
+    slice_data = (FFVkBuffer *)vp->slices_buf->data;
+    metadata   = (FFVkBuffer *)pp->metadata_buf->data;
 
     shaders = &pv->shaders[pr->frame_type != 0];
 
@@ -209,13 +213,13 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
                                     pr->frame));
 
     RET(ff_vk_exec_add_dep_buf(&ctx->s, exec,
-                               (AVBufferRef *[]){ vp->slices_buf, 
pp->slice_offset_buf },
+                               (AVBufferRef *[]){ vp->slices_buf, 
pp->metadata_buf, },
                                2, 0));
 
     /* Transfer ownership to the exec context */
-    vp->slices_buf = pp->slice_offset_buf = NULL;
+    vp->slices_buf = pp->metadata_buf = NULL;
 
-    /* Input frame barrier */
+    /* Input barrier */
     ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
                         VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
                         VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
@@ -223,6 +227,21 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
                         VK_IMAGE_LAYOUT_GENERAL,
                         VK_QUEUE_FAMILY_IGNORED);
 
+    buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+        .sType               = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+        .srcStageMask        = metadata->stage,
+        .dstStageMask        = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+        .srcAccessMask       = metadata->access,
+        .dstAccessMask       = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer              = metadata->buf,
+        .offset              = pp->slice_offsets_sz,
+        .size                = pp->mb_params_sz,
+    };
+    metadata->stage  = buf_bar[0].dstStageMask;
+    metadata->access = buf_bar[0].dstAccessMask;
+
     vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
         .sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
         .pBufferMemoryBarriers    = buf_bar,
@@ -267,12 +286,17 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
     /* Entropy decode */
     ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->vld,
                                     0, 0, 0,
-                                    slice_offsets,
-                                    0, (pp->slice_num + 1) * sizeof(uint32_t),
+                                    metadata, 0,
+                                    pp->slice_offsets_sz,
+                                    VK_FORMAT_UNDEFINED);
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->vld,
+                                    0, 1, 0,
+                                    metadata, pp->slice_offsets_sz,
+                                    pp->mb_params_sz,
                                     VK_FORMAT_UNDEFINED);
     ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->vld,
                                   pr->frame, vp->view.out,
-                                  0, 1,
+                                  0, 2,
                                   VK_IMAGE_LAYOUT_GENERAL,
                                   VK_NULL_HANDLE);
 
@@ -286,7 +310,6 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
                     3 + !!pr->alpha_info);
 
     /* Synchronize vld and idct shaders */
-    nb_img_bar = 0;
     ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
                         VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
                         VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
@@ -294,6 +317,21 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
                         VK_IMAGE_LAYOUT_GENERAL,
                         VK_QUEUE_FAMILY_IGNORED);
 
+    buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+        .sType               = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+        .srcStageMask        = metadata->stage,
+        .dstStageMask        = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+        .srcAccessMask       = metadata->access,
+        .dstAccessMask       = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer              = metadata->buf,
+        .offset              = pp->slice_offsets_sz,
+        .size                = pp->mb_params_sz,
+    };
+    metadata->stage  = buf_bar[0].dstStageMask;
+    metadata->access = buf_bar[0].dstAccessMask;
+
     vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
         .sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
         .pBufferMemoryBarriers    = buf_bar,
@@ -304,9 +342,14 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
     nb_img_bar = nb_buf_bar = 0;
 
     /* Inverse transform */
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->idct,
+                                    0, 0, 0,
+                                    metadata, pp->slice_offsets_sz,
+                                    pp->mb_params_sz,
+                                    VK_FORMAT_UNDEFINED);
     ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->idct,
                                   pr->frame, vp->view.out,
-                                  0, 0,
+                                  0, 1,
                                   VK_IMAGE_LAYOUT_GENERAL,
                                   VK_NULL_HANDLE);
 
@@ -406,23 +449,23 @@ static void vk_decode_prores_uninit(FFVulkanDecodeShared 
*ctx)
         ff_vk_shader_free(&ctx->s, &pv->shaders[i].idct);
     }
 
-    av_buffer_pool_uninit(&pv->slice_offset_pool);
+    av_buffer_pool_uninit(&pv->metadata_pool);
 
     av_freep(&pv);
 }
 
 static int vk_decode_prores_init(AVCodecContext *avctx)
 {
-    FFVulkanDecodeContext        *dec = avctx->internal->hwaccel_priv_data;
-    FFVulkanDecodeShared         *ctx = NULL;
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared  *ctx = NULL;
 
     AVHWFramesContext *out_frames_ctx;
     ProresVulkanDecodeContext *pv;
     FFVkSPIRVCompiler *spv;
     FFVulkanDescriptorSetBinding *desc_set;
-    int max_num_slices, i, err;
+    int max_num_mbs, i, err;
 
-    max_num_slices = (avctx->coded_width >> 4) * (avctx->coded_height >> 4);
+    max_num_mbs = (avctx->coded_width >> 4) * (avctx->coded_height >> 4);
 
     spv = ff_vk_spirv_init();
     if (!spv) {
@@ -471,7 +514,15 @@ static int vk_decode_prores_init(AVCodecContext *avctx)
                 .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
                 .mem_quali   = "readonly",
                 .buf_content = "uint32_t slice_offsets",
-                .buf_elems   = max_num_slices + 1,
+                .buf_elems   = max_num_mbs + 1,
+            },
+            {
+                .name        = "quant_idx_buf",
+                .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+                .mem_quali   = "writeonly",
+                .buf_content = "uint8_t quant_idx",
+                .buf_elems   = max_num_mbs,
             },
             {
                 .name       = "dst",
@@ -485,10 +536,18 @@ static int vk_decode_prores_init(AVCodecContext *avctx)
             },
         };
         RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->vld,
-                        "prores_dec_vld", "main", desc_set, 2,
+                        "prores_dec_vld", "main", desc_set, 3,
                         ff_source_prores_vld_comp, 0x080801, i));
 
         desc_set = (FFVulkanDescriptorSetBinding []) {
+            {
+                .name        = "quant_idx_buf",
+                .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+                .mem_quali   = "readonly",
+                .buf_content = "uint8_t quant_idx",
+                .buf_elems   = max_num_mbs,
+            },
             {
                 .name       = "dst",
                 .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
@@ -500,7 +559,7 @@ static int vk_decode_prores_init(AVCodecContext *avctx)
             },
         };
         RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->idct,
-                        "prores_dec_idct", "main", desc_set, 1,
+                        "prores_dec_idct", "main", desc_set, 2,
                         ff_source_prores_idct_comp, 0x200201, i));
     }
 

-----------------------------------------------------------------------

Summary of changes:
 libavcodec/vulkan/prores_idct.comp |  13 +++--
 libavcodec/vulkan/prores_vld.comp  |  34 +++++------
 libavcodec/vulkan_prores.c         | 115 ++++++++++++++++++++++++++++---------
 3 files changed, 114 insertions(+), 48 deletions(-)


hooks/post-receive
-- 

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to